patch-realtime 818 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949159501595115952159531595415955159561595715958159591596015961159621596315964159651596615967159681596915970159711597215973159741597515976159771597815979159801598115982159831598415985159861598715988159891599015991159921599315994159951599615997159981599916000160011600216003160041600516006160071600816009160101601116012160131601416015160161601716018160191602016021160221602316024160251602616027160281602916030160311603216033160341603516036160371603816039160401604116042160431604416045160461604716048160491605016051160521605316054160551605616057160581605916060160611606216063160641606516066160671606816069160701607116072160731607416075160761607716078160791608016081160821608316084160851608616087160881608916090160911609216093160941609516096160971609816099161001610116102161031610416105161061610716108161091611016111161121611316114161151611616117161181611916120161211612216123161241612516126161271612816129161301613116132161331613416135161361613716138161391614016141161421614316144161451614616147161481614916150161511615216153161541615516156161571615816159161601616116162161631616416165161661616716168161691617016171161721617316174161751617616177161781617916180161811618216183161841618516186161871618816189161901619116192161931619416195161961619716198161991620016201162021620316204162051620616207162081620916210162111621216213162141621516216162171621816219162201622116222162231622416225162261622716228162291623016231162321623316234162351623616237162381623916240162411624216243162441624516246162471624816249162501625116252162531625416255162561625716258162591626016261162621626316264162651626616267162681626916270162711627216273162741627516276162771627816279162801628116282162831628416285162861628716288162891629016291162921629316294162951629616297162981629916300163011630216303163041630516306163071630816309163101631116312163131631416315163161631716318163191632016321163221632316324163251632616327163281632916330163311633216333163341633516336163371633816339163401634116342163431634416345163461634716348163491635016351163521635316354163551635616357163581635916360163611636216363163641636516366163671636816369163701637116372163731637416375163761637716378163791638016381163821638316384163851638616387163881638916390163911639216393163941639516396163971639816399164001640116402164031640416405164061640716408164091641016411164121641316414164151641616417164181641916420164211642216423164241642516426164271642816429164301643116432164331643416435164361643716438164391644016441164421644316444164451644616447164481644916450164511645216453164541645516456164571645816459164601646116462164631646416465164661646716468164691647016471164721647316474164751647616477164781647916480164811648216483164841648516486164871648816489164901649116492164931649416495164961649716498164991650016501165021650316504165051650616507165081650916510165111651216513165141651516516165171651816519165201652116522165231652416525165261652716528165291653016531165321653316534165351653616537165381653916540165411654216543165441654516546165471654816549165501655116552165531655416555165561655716558165591656016561165621656316564165651656616567165681656916570165711657216573165741657516576165771657816579165801658116582165831658416585165861658716588165891659016591165921659316594165951659616597165981659916600166011660216603166041660516606166071660816609166101661116612166131661416615166161661716618166191662016621166221662316624166251662616627166281662916630166311663216633166341663516636166371663816639166401664116642166431664416645166461664716648166491665016651166521665316654166551665616657166581665916660166611666216663166641666516666166671666816669166701667116672166731667416675166761667716678166791668016681166821668316684166851668616687166881668916690166911669216693166941669516696166971669816699167001670116702167031670416705167061670716708167091671016711167121671316714167151671616717167181671916720167211672216723167241672516726167271672816729167301673116732167331673416735167361673716738167391674016741167421674316744167451674616747167481674916750167511675216753167541675516756167571675816759167601676116762167631676416765167661676716768167691677016771167721677316774167751677616777167781677916780167811678216783167841678516786167871678816789167901679116792167931679416795167961679716798167991680016801168021680316804168051680616807168081680916810168111681216813168141681516816168171681816819168201682116822168231682416825168261682716828168291683016831168321683316834168351683616837168381683916840168411684216843168441684516846168471684816849168501685116852168531685416855168561685716858168591686016861168621686316864168651686616867168681686916870168711687216873168741687516876168771687816879168801688116882168831688416885168861688716888168891689016891168921689316894168951689616897168981689916900169011690216903169041690516906169071690816909169101691116912169131691416915169161691716918169191692016921169221692316924169251692616927169281692916930169311693216933169341693516936169371693816939169401694116942169431694416945169461694716948169491695016951169521695316954169551695616957169581695916960169611696216963169641696516966169671696816969169701697116972169731697416975169761697716978169791698016981169821698316984169851698616987169881698916990169911699216993169941699516996169971699816999170001700117002170031700417005170061700717008170091701017011170121701317014170151701617017170181701917020170211702217023170241702517026170271702817029170301703117032170331703417035170361703717038170391704017041170421704317044170451704617047170481704917050170511705217053170541705517056170571705817059170601706117062170631706417065170661706717068170691707017071170721707317074170751707617077170781707917080170811708217083170841708517086170871708817089170901709117092170931709417095170961709717098170991710017101171021710317104171051710617107171081710917110171111711217113171141711517116171171711817119171201712117122171231712417125171261712717128171291713017131171321713317134171351713617137171381713917140171411714217143171441714517146171471714817149171501715117152171531715417155171561715717158171591716017161171621716317164171651716617167171681716917170171711717217173171741717517176171771717817179171801718117182171831718417185171861718717188171891719017191171921719317194171951719617197171981719917200172011720217203172041720517206172071720817209172101721117212172131721417215172161721717218172191722017221172221722317224172251722617227172281722917230172311723217233172341723517236172371723817239172401724117242172431724417245172461724717248172491725017251172521725317254172551725617257172581725917260172611726217263172641726517266172671726817269172701727117272172731727417275172761727717278172791728017281172821728317284172851728617287172881728917290172911729217293172941729517296172971729817299173001730117302173031730417305173061730717308173091731017311173121731317314173151731617317173181731917320173211732217323173241732517326173271732817329173301733117332173331733417335173361733717338173391734017341173421734317344173451734617347173481734917350173511735217353173541735517356173571735817359173601736117362173631736417365173661736717368173691737017371173721737317374173751737617377173781737917380173811738217383173841738517386173871738817389173901739117392173931739417395173961739717398173991740017401174021740317404174051740617407174081740917410174111741217413174141741517416174171741817419174201742117422174231742417425174261742717428174291743017431174321743317434174351743617437174381743917440174411744217443174441744517446174471744817449174501745117452174531745417455174561745717458174591746017461174621746317464174651746617467174681746917470174711747217473174741747517476174771747817479174801748117482174831748417485174861748717488174891749017491174921749317494174951749617497174981749917500175011750217503175041750517506175071750817509175101751117512175131751417515175161751717518175191752017521175221752317524175251752617527175281752917530175311753217533175341753517536175371753817539175401754117542175431754417545175461754717548175491755017551175521755317554175551755617557175581755917560175611756217563175641756517566175671756817569175701757117572175731757417575175761757717578175791758017581175821758317584175851758617587175881758917590175911759217593175941759517596175971759817599176001760117602176031760417605176061760717608176091761017611176121761317614176151761617617176181761917620176211762217623176241762517626176271762817629176301763117632176331763417635176361763717638176391764017641176421764317644176451764617647176481764917650176511765217653176541765517656176571765817659176601766117662176631766417665176661766717668176691767017671176721767317674176751767617677176781767917680176811768217683176841768517686176871768817689176901769117692176931769417695176961769717698176991770017701177021770317704177051770617707177081770917710177111771217713177141771517716177171771817719177201772117722177231772417725177261772717728177291773017731177321773317734177351773617737177381773917740177411774217743177441774517746177471774817749177501775117752177531775417755177561775717758177591776017761177621776317764177651776617767177681776917770177711777217773177741777517776177771777817779177801778117782177831778417785177861778717788177891779017791177921779317794177951779617797177981779917800178011780217803178041780517806178071780817809178101781117812178131781417815178161781717818178191782017821178221782317824178251782617827178281782917830178311783217833178341783517836178371783817839178401784117842178431784417845178461784717848178491785017851178521785317854178551785617857178581785917860178611786217863178641786517866178671786817869178701787117872178731787417875178761787717878178791788017881178821788317884178851788617887178881788917890178911789217893178941789517896178971789817899179001790117902179031790417905179061790717908179091791017911179121791317914179151791617917179181791917920179211792217923179241792517926179271792817929179301793117932179331793417935179361793717938179391794017941179421794317944179451794617947179481794917950179511795217953179541795517956179571795817959179601796117962179631796417965179661796717968179691797017971179721797317974179751797617977179781797917980179811798217983179841798517986179871798817989179901799117992179931799417995179961799717998179991800018001180021800318004180051800618007180081800918010180111801218013180141801518016180171801818019180201802118022180231802418025180261802718028180291803018031180321803318034180351803618037180381803918040180411804218043180441804518046180471804818049180501805118052180531805418055180561805718058180591806018061180621806318064180651806618067180681806918070180711807218073180741807518076180771807818079180801808118082180831808418085180861808718088180891809018091180921809318094180951809618097180981809918100181011810218103181041810518106181071810818109181101811118112181131811418115181161811718118181191812018121181221812318124181251812618127181281812918130181311813218133181341813518136181371813818139181401814118142181431814418145181461814718148181491815018151181521815318154181551815618157181581815918160181611816218163181641816518166181671816818169181701817118172181731817418175181761817718178181791818018181181821818318184181851818618187181881818918190181911819218193181941819518196181971819818199182001820118202182031820418205182061820718208182091821018211182121821318214182151821618217182181821918220182211822218223182241822518226182271822818229182301823118232182331823418235182361823718238182391824018241182421824318244182451824618247182481824918250182511825218253182541825518256182571825818259182601826118262182631826418265182661826718268182691827018271182721827318274182751827618277182781827918280182811828218283182841828518286182871828818289182901829118292182931829418295182961829718298182991830018301183021830318304183051830618307183081830918310183111831218313183141831518316183171831818319183201832118322183231832418325183261832718328183291833018331183321833318334183351833618337183381833918340183411834218343183441834518346183471834818349183501835118352183531835418355183561835718358183591836018361183621836318364183651836618367183681836918370183711837218373183741837518376183771837818379183801838118382183831838418385183861838718388183891839018391183921839318394183951839618397183981839918400184011840218403184041840518406184071840818409184101841118412184131841418415184161841718418184191842018421184221842318424184251842618427184281842918430184311843218433184341843518436184371843818439184401844118442184431844418445184461844718448184491845018451184521845318454184551845618457184581845918460184611846218463184641846518466184671846818469184701847118472184731847418475184761847718478184791848018481184821848318484184851848618487184881848918490184911849218493184941849518496184971849818499185001850118502185031850418505185061850718508185091851018511185121851318514185151851618517185181851918520185211852218523185241852518526185271852818529185301853118532185331853418535185361853718538185391854018541185421854318544185451854618547185481854918550185511855218553185541855518556185571855818559185601856118562185631856418565185661856718568185691857018571185721857318574185751857618577185781857918580185811858218583185841858518586185871858818589185901859118592185931859418595185961859718598185991860018601186021860318604186051860618607186081860918610186111861218613186141861518616186171861818619186201862118622186231862418625186261862718628186291863018631186321863318634186351863618637186381863918640186411864218643186441864518646186471864818649186501865118652186531865418655186561865718658186591866018661186621866318664186651866618667186681866918670186711867218673186741867518676186771867818679186801868118682186831868418685186861868718688186891869018691186921869318694186951869618697186981869918700187011870218703187041870518706187071870818709187101871118712187131871418715187161871718718187191872018721187221872318724187251872618727187281872918730187311873218733187341873518736187371873818739187401874118742187431874418745187461874718748187491875018751187521875318754187551875618757187581875918760187611876218763187641876518766187671876818769187701877118772187731877418775187761877718778187791878018781187821878318784187851878618787187881878918790187911879218793187941879518796187971879818799188001880118802188031880418805188061880718808188091881018811188121881318814188151881618817188181881918820188211882218823188241882518826188271882818829188301883118832188331883418835188361883718838188391884018841188421884318844188451884618847188481884918850188511885218853188541885518856188571885818859188601886118862188631886418865188661886718868188691887018871188721887318874188751887618877188781887918880188811888218883188841888518886188871888818889188901889118892188931889418895188961889718898188991890018901189021890318904189051890618907189081890918910189111891218913189141891518916189171891818919189201892118922189231892418925189261892718928189291893018931189321893318934189351893618937189381893918940189411894218943189441894518946189471894818949189501895118952189531895418955189561895718958189591896018961189621896318964189651896618967189681896918970189711897218973189741897518976189771897818979189801898118982189831898418985189861898718988189891899018991189921899318994189951899618997189981899919000190011900219003190041900519006190071900819009190101901119012190131901419015190161901719018190191902019021190221902319024190251902619027190281902919030190311903219033190341903519036190371903819039190401904119042190431904419045190461904719048190491905019051190521905319054190551905619057190581905919060190611906219063190641906519066190671906819069190701907119072190731907419075190761907719078190791908019081190821908319084190851908619087190881908919090190911909219093190941909519096190971909819099191001910119102191031910419105191061910719108191091911019111191121911319114191151911619117191181911919120191211912219123191241912519126191271912819129191301913119132191331913419135191361913719138191391914019141191421914319144191451914619147191481914919150191511915219153191541915519156191571915819159191601916119162191631916419165191661916719168191691917019171191721917319174191751917619177191781917919180191811918219183191841918519186191871918819189191901919119192191931919419195191961919719198191991920019201192021920319204192051920619207192081920919210192111921219213192141921519216192171921819219192201922119222192231922419225192261922719228192291923019231192321923319234192351923619237192381923919240192411924219243192441924519246192471924819249192501925119252192531925419255192561925719258192591926019261192621926319264192651926619267192681926919270192711927219273192741927519276192771927819279192801928119282192831928419285192861928719288192891929019291192921929319294192951929619297192981929919300193011930219303193041930519306193071930819309193101931119312193131931419315193161931719318193191932019321193221932319324193251932619327193281932919330193311933219333193341933519336193371933819339193401934119342193431934419345193461934719348193491935019351193521935319354193551935619357193581935919360193611936219363193641936519366193671936819369193701937119372193731937419375193761937719378193791938019381193821938319384193851938619387193881938919390193911939219393193941939519396193971939819399194001940119402194031940419405194061940719408194091941019411194121941319414194151941619417194181941919420194211942219423194241942519426194271942819429194301943119432194331943419435194361943719438194391944019441194421944319444194451944619447194481944919450194511945219453194541945519456194571945819459194601946119462194631946419465194661946719468194691947019471194721947319474194751947619477194781947919480194811948219483194841948519486194871948819489194901949119492194931949419495194961949719498194991950019501195021950319504195051950619507195081950919510195111951219513195141951519516195171951819519195201952119522195231952419525195261952719528195291953019531195321953319534195351953619537195381953919540195411954219543195441954519546195471954819549195501955119552195531955419555195561955719558195591956019561195621956319564195651956619567195681956919570195711957219573195741957519576195771957819579195801958119582195831958419585195861958719588195891959019591195921959319594195951959619597195981959919600196011960219603196041960519606196071960819609196101961119612196131961419615196161961719618196191962019621196221962319624196251962619627196281962919630196311963219633196341963519636196371963819639196401964119642196431964419645196461964719648196491965019651196521965319654196551965619657196581965919660196611966219663196641966519666196671966819669196701967119672196731967419675196761967719678196791968019681196821968319684196851968619687196881968919690196911969219693196941969519696196971969819699197001970119702197031970419705197061970719708197091971019711197121971319714197151971619717197181971919720197211972219723197241972519726197271972819729197301973119732197331973419735197361973719738197391974019741197421974319744197451974619747197481974919750197511975219753197541975519756197571975819759197601976119762197631976419765197661976719768197691977019771197721977319774197751977619777197781977919780197811978219783197841978519786197871978819789197901979119792197931979419795197961979719798197991980019801198021980319804198051980619807198081980919810198111981219813198141981519816198171981819819198201982119822198231982419825198261982719828198291983019831198321983319834198351983619837198381983919840198411984219843198441984519846198471984819849198501985119852198531985419855198561985719858198591986019861198621986319864198651986619867198681986919870198711987219873198741987519876198771987819879198801988119882198831988419885198861988719888198891989019891198921989319894198951989619897198981989919900199011990219903199041990519906199071990819909199101991119912199131991419915199161991719918199191992019921199221992319924199251992619927199281992919930199311993219933199341993519936199371993819939199401994119942199431994419945199461994719948199491995019951199521995319954199551995619957199581995919960199611996219963199641996519966199671996819969199701997119972199731997419975199761997719978199791998019981199821998319984199851998619987199881998919990199911999219993199941999519996199971999819999200002000120002200032000420005200062000720008200092001020011200122001320014200152001620017200182001920020200212002220023200242002520026200272002820029200302003120032200332003420035200362003720038200392004020041200422004320044200452004620047200482004920050200512005220053200542005520056200572005820059200602006120062200632006420065200662006720068200692007020071200722007320074200752007620077200782007920080200812008220083200842008520086200872008820089200902009120092200932009420095200962009720098200992010020101201022010320104201052010620107201082010920110201112011220113201142011520116201172011820119201202012120122201232012420125201262012720128201292013020131201322013320134201352013620137201382013920140201412014220143201442014520146201472014820149201502015120152201532015420155201562015720158201592016020161201622016320164201652016620167201682016920170201712017220173201742017520176201772017820179201802018120182201832018420185201862018720188201892019020191201922019320194201952019620197201982019920200202012020220203202042020520206202072020820209202102021120212202132021420215202162021720218202192022020221202222022320224202252022620227202282022920230202312023220233202342023520236202372023820239202402024120242202432024420245202462024720248202492025020251202522025320254202552025620257202582025920260202612026220263202642026520266202672026820269202702027120272202732027420275202762027720278202792028020281202822028320284202852028620287202882028920290202912029220293202942029520296202972029820299203002030120302203032030420305203062030720308203092031020311203122031320314203152031620317203182031920320203212032220323203242032520326203272032820329203302033120332203332033420335203362033720338203392034020341203422034320344203452034620347203482034920350203512035220353203542035520356203572035820359203602036120362203632036420365203662036720368203692037020371203722037320374203752037620377203782037920380203812038220383203842038520386203872038820389203902039120392203932039420395203962039720398203992040020401204022040320404204052040620407204082040920410204112041220413204142041520416204172041820419204202042120422204232042420425204262042720428204292043020431204322043320434204352043620437204382043920440204412044220443204442044520446204472044820449204502045120452204532045420455204562045720458204592046020461204622046320464204652046620467204682046920470204712047220473204742047520476204772047820479204802048120482204832048420485204862048720488204892049020491204922049320494204952049620497204982049920500205012050220503205042050520506205072050820509205102051120512205132051420515205162051720518205192052020521205222052320524205252052620527205282052920530205312053220533205342053520536205372053820539205402054120542205432054420545205462054720548205492055020551205522055320554205552055620557205582055920560205612056220563205642056520566205672056820569205702057120572205732057420575205762057720578205792058020581205822058320584205852058620587205882058920590205912059220593205942059520596205972059820599206002060120602206032060420605206062060720608206092061020611206122061320614206152061620617206182061920620206212062220623206242062520626206272062820629206302063120632206332063420635206362063720638206392064020641206422064320644206452064620647206482064920650206512065220653206542065520656206572065820659206602066120662206632066420665206662066720668206692067020671206722067320674206752067620677206782067920680206812068220683206842068520686206872068820689206902069120692206932069420695206962069720698206992070020701207022070320704207052070620707207082070920710207112071220713207142071520716207172071820719207202072120722207232072420725207262072720728207292073020731207322073320734207352073620737207382073920740207412074220743207442074520746207472074820749207502075120752207532075420755207562075720758207592076020761207622076320764207652076620767207682076920770207712077220773207742077520776207772077820779207802078120782207832078420785207862078720788207892079020791207922079320794207952079620797207982079920800208012080220803208042080520806208072080820809208102081120812208132081420815208162081720818208192082020821208222082320824208252082620827208282082920830208312083220833208342083520836208372083820839208402084120842208432084420845208462084720848208492085020851208522085320854208552085620857208582085920860208612086220863208642086520866208672086820869208702087120872208732087420875208762087720878208792088020881208822088320884208852088620887208882088920890208912089220893208942089520896208972089820899209002090120902209032090420905209062090720908209092091020911209122091320914209152091620917209182091920920209212092220923209242092520926209272092820929209302093120932209332093420935209362093720938209392094020941209422094320944209452094620947209482094920950209512095220953209542095520956209572095820959209602096120962209632096420965209662096720968209692097020971209722097320974209752097620977209782097920980209812098220983209842098520986209872098820989209902099120992209932099420995209962099720998209992100021001210022100321004210052100621007210082100921010210112101221013210142101521016210172101821019210202102121022210232102421025210262102721028210292103021031210322103321034210352103621037210382103921040210412104221043210442104521046210472104821049210502105121052210532105421055210562105721058210592106021061210622106321064210652106621067210682106921070210712107221073210742107521076210772107821079210802108121082210832108421085210862108721088210892109021091210922109321094210952109621097210982109921100211012110221103211042110521106211072110821109211102111121112211132111421115211162111721118211192112021121211222112321124211252112621127211282112921130211312113221133211342113521136211372113821139211402114121142211432114421145211462114721148211492115021151211522115321154211552115621157211582115921160211612116221163211642116521166211672116821169211702117121172211732117421175211762117721178211792118021181211822118321184211852118621187211882118921190211912119221193211942119521196211972119821199212002120121202212032120421205212062120721208212092121021211212122121321214212152121621217212182121921220212212122221223212242122521226212272122821229212302123121232212332123421235212362123721238212392124021241212422124321244212452124621247212482124921250212512125221253212542125521256212572125821259212602126121262212632126421265212662126721268212692127021271212722127321274212752127621277212782127921280212812128221283212842128521286212872128821289212902129121292212932129421295212962129721298212992130021301213022130321304213052130621307213082130921310213112131221313213142131521316213172131821319213202132121322213232132421325213262132721328213292133021331213322133321334213352133621337213382133921340213412134221343213442134521346213472134821349213502135121352213532135421355213562135721358213592136021361213622136321364213652136621367213682136921370213712137221373213742137521376213772137821379213802138121382213832138421385213862138721388213892139021391213922139321394213952139621397213982139921400214012140221403214042140521406214072140821409214102141121412214132141421415214162141721418214192142021421214222142321424214252142621427214282142921430214312143221433214342143521436214372143821439214402144121442214432144421445214462144721448214492145021451214522145321454214552145621457214582145921460214612146221463214642146521466214672146821469214702147121472214732147421475214762147721478214792148021481214822148321484214852148621487214882148921490214912149221493214942149521496214972149821499215002150121502215032150421505215062150721508215092151021511215122151321514215152151621517215182151921520215212152221523215242152521526215272152821529215302153121532215332153421535215362153721538215392154021541215422154321544215452154621547215482154921550215512155221553215542155521556215572155821559215602156121562215632156421565215662156721568215692157021571215722157321574215752157621577215782157921580215812158221583215842158521586215872158821589215902159121592215932159421595215962159721598215992160021601216022160321604216052160621607216082160921610216112161221613216142161521616216172161821619216202162121622216232162421625216262162721628216292163021631216322163321634216352163621637216382163921640216412164221643216442164521646216472164821649216502165121652216532165421655216562165721658216592166021661216622166321664216652166621667216682166921670216712167221673216742167521676216772167821679216802168121682216832168421685216862168721688216892169021691216922169321694216952169621697216982169921700217012170221703217042170521706217072170821709217102171121712217132171421715217162171721718217192172021721217222172321724217252172621727217282172921730217312173221733217342173521736217372173821739217402174121742217432174421745217462174721748217492175021751217522175321754217552175621757217582175921760217612176221763217642176521766217672176821769217702177121772217732177421775217762177721778217792178021781217822178321784217852178621787217882178921790217912179221793217942179521796217972179821799218002180121802218032180421805218062180721808218092181021811218122181321814218152181621817218182181921820218212182221823218242182521826218272182821829218302183121832218332183421835218362183721838218392184021841218422184321844218452184621847218482184921850218512185221853218542185521856218572185821859218602186121862218632186421865218662186721868218692187021871218722187321874218752187621877218782187921880218812188221883218842188521886218872188821889218902189121892218932189421895218962189721898218992190021901219022190321904219052190621907219082190921910219112191221913219142191521916219172191821919219202192121922219232192421925219262192721928219292193021931219322193321934219352193621937219382193921940219412194221943219442194521946219472194821949219502195121952219532195421955219562195721958219592196021961219622196321964219652196621967219682196921970219712197221973219742197521976219772197821979219802198121982219832198421985219862198721988219892199021991219922199321994219952199621997219982199922000220012200222003220042200522006220072200822009220102201122012220132201422015220162201722018220192202022021220222202322024220252202622027220282202922030220312203222033220342203522036220372203822039220402204122042220432204422045220462204722048220492205022051220522205322054220552205622057220582205922060220612206222063220642206522066220672206822069220702207122072220732207422075220762207722078220792208022081220822208322084220852208622087220882208922090220912209222093220942209522096220972209822099221002210122102221032210422105221062210722108221092211022111221122211322114221152211622117221182211922120221212212222123221242212522126221272212822129221302213122132221332213422135221362213722138221392214022141221422214322144221452214622147221482214922150221512215222153221542215522156221572215822159221602216122162221632216422165221662216722168221692217022171221722217322174221752217622177221782217922180221812218222183221842218522186221872218822189221902219122192221932219422195221962219722198221992220022201222022220322204222052220622207222082220922210222112221222213222142221522216222172221822219222202222122222222232222422225222262222722228222292223022231222322223322234222352223622237222382223922240222412224222243222442224522246222472224822249222502225122252222532225422255222562225722258222592226022261222622226322264222652226622267222682226922270222712227222273222742227522276222772227822279222802228122282222832228422285222862228722288222892229022291222922229322294222952229622297222982229922300223012230222303223042230522306223072230822309223102231122312223132231422315223162231722318223192232022321223222232322324223252232622327223282232922330223312233222333223342233522336223372233822339223402234122342223432234422345223462234722348223492235022351223522235322354223552235622357223582235922360223612236222363223642236522366223672236822369223702237122372223732237422375223762237722378223792238022381223822238322384223852238622387223882238922390223912239222393223942239522396223972239822399224002240122402224032240422405224062240722408224092241022411224122241322414224152241622417224182241922420224212242222423224242242522426224272242822429224302243122432224332243422435224362243722438224392244022441224422244322444224452244622447224482244922450224512245222453224542245522456224572245822459224602246122462224632246422465224662246722468224692247022471224722247322474224752247622477224782247922480224812248222483224842248522486224872248822489224902249122492224932249422495224962249722498224992250022501225022250322504225052250622507225082250922510225112251222513225142251522516225172251822519225202252122522225232252422525225262252722528225292253022531225322253322534225352253622537225382253922540225412254222543225442254522546225472254822549225502255122552225532255422555225562255722558225592256022561225622256322564225652256622567225682256922570225712257222573225742257522576225772257822579225802258122582225832258422585225862258722588225892259022591225922259322594225952259622597225982259922600226012260222603226042260522606226072260822609226102261122612226132261422615226162261722618226192262022621226222262322624226252262622627226282262922630226312263222633226342263522636226372263822639226402264122642226432264422645226462264722648226492265022651226522265322654226552265622657226582265922660226612266222663226642266522666226672266822669226702267122672226732267422675226762267722678226792268022681226822268322684226852268622687226882268922690226912269222693226942269522696226972269822699227002270122702227032270422705227062270722708227092271022711227122271322714227152271622717227182271922720227212272222723227242272522726227272272822729227302273122732227332273422735227362273722738227392274022741227422274322744227452274622747227482274922750227512275222753227542275522756227572275822759227602276122762227632276422765227662276722768227692277022771227722277322774227752277622777227782277922780227812278222783227842278522786227872278822789227902279122792227932279422795227962279722798227992280022801228022280322804228052280622807228082280922810228112281222813228142281522816228172281822819228202282122822228232282422825228262282722828228292283022831228322283322834228352283622837228382283922840228412284222843228442284522846228472284822849228502285122852228532285422855228562285722858228592286022861228622286322864228652286622867228682286922870228712287222873228742287522876228772287822879228802288122882228832288422885228862288722888228892289022891228922289322894228952289622897228982289922900229012290222903229042290522906229072290822909229102291122912229132291422915229162291722918229192292022921229222292322924229252292622927229282292922930229312293222933229342293522936229372293822939229402294122942229432294422945229462294722948229492295022951229522295322954229552295622957229582295922960229612296222963229642296522966229672296822969229702297122972229732297422975229762297722978229792298022981229822298322984229852298622987229882298922990229912299222993229942299522996229972299822999230002300123002230032300423005230062300723008230092301023011230122301323014230152301623017230182301923020230212302223023230242302523026230272302823029230302303123032230332303423035230362303723038230392304023041230422304323044230452304623047230482304923050230512305223053230542305523056230572305823059230602306123062230632306423065230662306723068230692307023071230722307323074230752307623077230782307923080230812308223083230842308523086230872308823089230902309123092230932309423095230962309723098230992310023101231022310323104231052310623107231082310923110231112311223113231142311523116231172311823119231202312123122231232312423125231262312723128231292313023131231322313323134231352313623137231382313923140231412314223143231442314523146231472314823149231502315123152231532315423155231562315723158231592316023161231622316323164231652316623167231682316923170231712317223173231742317523176231772317823179231802318123182231832318423185231862318723188231892319023191231922319323194231952319623197231982319923200232012320223203232042320523206232072320823209232102321123212232132321423215232162321723218232192322023221232222322323224232252322623227232282322923230232312323223233232342323523236232372323823239232402324123242232432324423245232462324723248232492325023251232522325323254232552325623257232582325923260232612326223263232642326523266232672326823269232702327123272232732327423275232762327723278232792328023281232822328323284232852328623287232882328923290232912329223293232942329523296232972329823299233002330123302233032330423305233062330723308233092331023311233122331323314233152331623317233182331923320233212332223323233242332523326233272332823329233302333123332233332333423335233362333723338233392334023341233422334323344233452334623347233482334923350233512335223353233542335523356233572335823359233602336123362233632336423365233662336723368233692337023371233722337323374233752337623377233782337923380233812338223383233842338523386233872338823389233902339123392233932339423395233962339723398233992340023401234022340323404234052340623407234082340923410234112341223413234142341523416234172341823419234202342123422234232342423425234262342723428234292343023431234322343323434234352343623437234382343923440234412344223443234442344523446234472344823449234502345123452234532345423455234562345723458234592346023461234622346323464234652346623467234682346923470234712347223473234742347523476234772347823479234802348123482234832348423485234862348723488234892349023491234922349323494234952349623497234982349923500235012350223503235042350523506235072350823509235102351123512235132351423515235162351723518235192352023521235222352323524235252352623527235282352923530235312353223533235342353523536235372353823539235402354123542235432354423545235462354723548235492355023551235522355323554235552355623557235582355923560235612356223563235642356523566235672356823569235702357123572235732357423575235762357723578235792358023581235822358323584235852358623587235882358923590235912359223593235942359523596235972359823599236002360123602236032360423605236062360723608236092361023611236122361323614236152361623617236182361923620236212362223623236242362523626236272362823629236302363123632236332363423635236362363723638236392364023641236422364323644236452364623647236482364923650236512365223653236542365523656236572365823659236602366123662236632366423665236662366723668236692367023671236722367323674236752367623677236782367923680236812368223683236842368523686236872368823689236902369123692236932369423695236962369723698236992370023701237022370323704237052370623707237082370923710237112371223713237142371523716237172371823719237202372123722237232372423725237262372723728237292373023731237322373323734237352373623737237382373923740237412374223743237442374523746237472374823749237502375123752237532375423755237562375723758237592376023761237622376323764237652376623767237682376923770237712377223773237742377523776237772377823779237802378123782237832378423785237862378723788237892379023791237922379323794237952379623797237982379923800238012380223803238042380523806238072380823809238102381123812238132381423815238162381723818238192382023821238222382323824238252382623827238282382923830238312383223833238342383523836238372383823839238402384123842238432384423845238462384723848238492385023851238522385323854238552385623857238582385923860238612386223863238642386523866238672386823869238702387123872238732387423875238762387723878238792388023881238822388323884238852388623887238882388923890238912389223893238942389523896238972389823899239002390123902239032390423905239062390723908239092391023911239122391323914239152391623917239182391923920239212392223923239242392523926239272392823929239302393123932239332393423935239362393723938239392394023941239422394323944239452394623947239482394923950239512395223953239542395523956239572395823959239602396123962239632396423965239662396723968239692397023971239722397323974239752397623977239782397923980239812398223983239842398523986239872398823989239902399123992239932399423995239962399723998239992400024001240022400324004240052400624007240082400924010240112401224013240142401524016240172401824019240202402124022240232402424025240262402724028240292403024031240322403324034240352403624037240382403924040240412404224043240442404524046240472404824049240502405124052240532405424055240562405724058240592406024061240622406324064240652406624067240682406924070240712407224073240742407524076240772407824079240802408124082240832408424085240862408724088240892409024091240922409324094240952409624097240982409924100241012410224103241042410524106241072410824109241102411124112241132411424115241162411724118241192412024121241222412324124241252412624127241282412924130241312413224133241342413524136241372413824139241402414124142241432414424145241462414724148241492415024151241522415324154241552415624157241582415924160241612416224163241642416524166241672416824169241702417124172241732417424175241762417724178241792418024181241822418324184241852418624187241882418924190241912419224193241942419524196241972419824199242002420124202242032420424205242062420724208242092421024211242122421324214242152421624217242182421924220242212422224223242242422524226242272422824229242302423124232242332423424235242362423724238242392424024241242422424324244242452424624247242482424924250242512425224253242542425524256242572425824259242602426124262242632426424265242662426724268242692427024271242722427324274242752427624277242782427924280242812428224283242842428524286242872428824289242902429124292242932429424295242962429724298242992430024301243022430324304243052430624307243082430924310243112431224313243142431524316243172431824319243202432124322243232432424325243262432724328243292433024331243322433324334243352433624337243382433924340243412434224343243442434524346243472434824349243502435124352243532435424355243562435724358243592436024361243622436324364243652436624367243682436924370243712437224373243742437524376243772437824379243802438124382243832438424385243862438724388243892439024391243922439324394243952439624397243982439924400244012440224403244042440524406244072440824409244102441124412244132441424415244162441724418244192442024421244222442324424244252442624427244282442924430244312443224433244342443524436244372443824439244402444124442244432444424445244462444724448244492445024451244522445324454244552445624457244582445924460244612446224463244642446524466244672446824469244702447124472244732447424475244762447724478244792448024481244822448324484244852448624487244882448924490244912449224493244942449524496244972449824499245002450124502245032450424505245062450724508245092451024511245122451324514245152451624517245182451924520245212452224523245242452524526245272452824529245302453124532245332453424535245362453724538245392454024541245422454324544245452454624547245482454924550245512455224553245542455524556245572455824559245602456124562245632456424565245662456724568245692457024571245722457324574245752457624577245782457924580245812458224583245842458524586245872458824589245902459124592245932459424595245962459724598245992460024601246022460324604246052460624607246082460924610246112461224613246142461524616246172461824619246202462124622246232462424625246262462724628246292463024631246322463324634246352463624637246382463924640246412464224643246442464524646246472464824649246502465124652246532465424655246562465724658246592466024661246622466324664246652466624667246682466924670246712467224673246742467524676246772467824679246802468124682246832468424685246862468724688246892469024691246922469324694246952469624697246982469924700247012470224703247042470524706247072470824709247102471124712247132471424715247162471724718247192472024721247222472324724247252472624727247282472924730247312473224733247342473524736247372473824739247402474124742247432474424745247462474724748247492475024751247522475324754247552475624757247582475924760247612476224763247642476524766247672476824769247702477124772247732477424775247762477724778247792478024781247822478324784247852478624787247882478924790247912479224793247942479524796247972479824799248002480124802248032480424805248062480724808248092481024811248122481324814248152481624817248182481924820248212482224823248242482524826248272482824829248302483124832248332483424835248362483724838248392484024841248422484324844248452484624847248482484924850248512485224853248542485524856248572485824859248602486124862248632486424865248662486724868248692487024871248722487324874248752487624877248782487924880248812488224883248842488524886248872488824889248902489124892248932489424895248962489724898248992490024901249022490324904249052490624907249082490924910249112491224913249142491524916249172491824919249202492124922249232492424925249262492724928249292493024931249322493324934249352493624937249382493924940249412494224943249442494524946249472494824949249502495124952249532495424955249562495724958249592496024961249622496324964249652496624967249682496924970249712497224973249742497524976249772497824979249802498124982249832498424985249862498724988249892499024991249922499324994249952499624997249982499925000250012500225003250042500525006250072500825009250102501125012250132501425015250162501725018250192502025021250222502325024250252502625027250282502925030250312503225033250342503525036250372503825039250402504125042250432504425045250462504725048250492505025051250522505325054250552505625057250582505925060250612506225063250642506525066250672506825069250702507125072250732507425075250762507725078250792508025081250822508325084250852508625087250882508925090250912509225093250942509525096250972509825099251002510125102251032510425105251062510725108251092511025111251122511325114251152511625117251182511925120251212512225123251242512525126251272512825129251302513125132251332513425135251362513725138251392514025141251422514325144251452514625147251482514925150251512515225153251542515525156251572515825159251602516125162251632516425165251662516725168251692517025171251722517325174251752517625177251782517925180251812518225183251842518525186251872518825189251902519125192251932519425195251962519725198251992520025201252022520325204252052520625207252082520925210252112521225213252142521525216252172521825219252202522125222252232522425225252262522725228252292523025231252322523325234252352523625237252382523925240252412524225243252442524525246252472524825249252502525125252252532525425255252562525725258252592526025261252622526325264252652526625267252682526925270252712527225273252742527525276252772527825279252802528125282252832528425285252862528725288252892529025291252922529325294252952529625297252982529925300253012530225303253042530525306253072530825309253102531125312253132531425315253162531725318253192532025321253222532325324253252532625327253282532925330253312533225333253342533525336253372533825339253402534125342253432534425345253462534725348253492535025351253522535325354253552535625357253582535925360253612536225363253642536525366253672536825369253702537125372253732537425375253762537725378253792538025381253822538325384253852538625387253882538925390253912539225393253942539525396253972539825399254002540125402254032540425405254062540725408254092541025411254122541325414254152541625417254182541925420254212542225423254242542525426254272542825429254302543125432254332543425435254362543725438254392544025441254422544325444254452544625447254482544925450254512545225453254542545525456254572545825459254602546125462254632546425465254662546725468254692547025471254722547325474254752547625477254782547925480254812548225483254842548525486254872548825489254902549125492254932549425495254962549725498254992550025501255022550325504255052550625507255082550925510255112551225513255142551525516255172551825519255202552125522255232552425525255262552725528255292553025531255322553325534255352553625537255382553925540255412554225543255442554525546255472554825549255502555125552255532555425555255562555725558255592556025561255622556325564255652556625567255682556925570255712557225573255742557525576255772557825579255802558125582255832558425585255862558725588255892559025591255922559325594255952559625597255982559925600256012560225603256042560525606256072560825609256102561125612256132561425615256162561725618256192562025621256222562325624256252562625627256282562925630256312563225633256342563525636256372563825639256402564125642256432564425645256462564725648256492565025651256522565325654256552565625657256582565925660256612566225663256642566525666256672566825669256702567125672256732567425675256762567725678256792568025681256822568325684256852568625687256882568925690256912569225693256942569525696256972569825699257002570125702257032570425705257062570725708257092571025711257122571325714257152571625717257182571925720257212572225723257242572525726257272572825729257302573125732257332573425735257362573725738257392574025741257422574325744257452574625747257482574925750257512575225753257542575525756257572575825759257602576125762257632576425765257662576725768257692577025771257722577325774257752577625777257782577925780257812578225783257842578525786257872578825789257902579125792257932579425795257962579725798257992580025801258022580325804258052580625807258082580925810258112581225813258142581525816258172581825819258202582125822258232582425825258262582725828258292583025831258322583325834258352583625837258382583925840258412584225843258442584525846258472584825849258502585125852258532585425855258562585725858258592586025861258622586325864258652586625867258682586925870258712587225873258742587525876258772587825879258802588125882258832588425885258862588725888258892589025891258922589325894258952589625897258982589925900259012590225903259042590525906259072590825909259102591125912259132591425915259162591725918259192592025921259222592325924259252592625927259282592925930259312593225933259342593525936259372593825939259402594125942259432594425945259462594725948259492595025951259522595325954259552595625957259582595925960259612596225963259642596525966259672596825969259702597125972259732597425975259762597725978259792598025981259822598325984259852598625987259882598925990259912599225993259942599525996259972599825999260002600126002260032600426005260062600726008260092601026011260122601326014260152601626017260182601926020260212602226023260242602526026260272602826029260302603126032260332603426035260362603726038260392604026041260422604326044260452604626047260482604926050260512605226053260542605526056260572605826059260602606126062260632606426065260662606726068260692607026071260722607326074260752607626077260782607926080260812608226083260842608526086260872608826089260902609126092260932609426095260962609726098260992610026101261022610326104261052610626107261082610926110261112611226113261142611526116261172611826119261202612126122261232612426125261262612726128261292613026131261322613326134261352613626137261382613926140261412614226143261442614526146261472614826149261502615126152261532615426155261562615726158261592616026161261622616326164261652616626167261682616926170261712617226173261742617526176261772617826179261802618126182261832618426185261862618726188261892619026191261922619326194261952619626197261982619926200262012620226203262042620526206262072620826209262102621126212262132621426215262162621726218262192622026221262222622326224262252622626227262282622926230262312623226233262342623526236262372623826239262402624126242262432624426245262462624726248262492625026251262522625326254262552625626257262582625926260262612626226263262642626526266262672626826269262702627126272262732627426275262762627726278262792628026281262822628326284262852628626287262882628926290262912629226293262942629526296262972629826299263002630126302263032630426305263062630726308263092631026311263122631326314263152631626317263182631926320263212632226323263242632526326263272632826329263302633126332263332633426335263362633726338263392634026341263422634326344263452634626347263482634926350263512635226353263542635526356263572635826359263602636126362263632636426365263662636726368263692637026371263722637326374263752637626377263782637926380263812638226383263842638526386263872638826389263902639126392263932639426395263962639726398263992640026401264022640326404264052640626407264082640926410264112641226413264142641526416264172641826419264202642126422264232642426425264262642726428264292643026431264322643326434264352643626437264382643926440264412644226443264442644526446264472644826449264502645126452264532645426455264562645726458264592646026461264622646326464264652646626467264682646926470264712647226473264742647526476264772647826479264802648126482264832648426485264862648726488264892649026491264922649326494264952649626497264982649926500265012650226503265042650526506265072650826509265102651126512265132651426515265162651726518265192652026521265222652326524265252652626527265282652926530265312653226533265342653526536265372653826539265402654126542265432654426545265462654726548265492655026551265522655326554265552655626557265582655926560265612656226563265642656526566265672656826569265702657126572265732657426575265762657726578265792658026581265822658326584265852658626587265882658926590265912659226593265942659526596265972659826599266002660126602266032660426605266062660726608266092661026611266122661326614266152661626617266182661926620266212662226623266242662526626266272662826629266302663126632266332663426635266362663726638266392664026641266422664326644266452664626647266482664926650266512665226653266542665526656266572665826659266602666126662266632666426665266662666726668266692667026671266722667326674266752667626677266782667926680266812668226683266842668526686266872668826689266902669126692266932669426695266962669726698266992670026701267022670326704267052670626707267082670926710267112671226713267142671526716267172671826719267202672126722267232672426725267262672726728267292673026731267322673326734267352673626737267382673926740267412674226743267442674526746267472674826749267502675126752267532675426755267562675726758267592676026761267622676326764267652676626767267682676926770267712677226773267742677526776267772677826779267802678126782267832678426785267862678726788267892679026791267922679326794267952679626797267982679926800268012680226803268042680526806268072680826809268102681126812268132681426815268162681726818268192682026821268222682326824268252682626827268282682926830268312683226833268342683526836268372683826839268402684126842268432684426845268462684726848268492685026851268522685326854268552685626857268582685926860268612686226863268642686526866268672686826869268702687126872268732687426875268762687726878268792688026881268822688326884268852688626887268882688926890268912689226893268942689526896268972689826899269002690126902269032690426905269062690726908269092691026911269122691326914269152691626917269182691926920269212692226923269242692526926269272692826929269302693126932269332693426935269362693726938269392694026941269422694326944269452694626947269482694926950269512695226953269542695526956269572695826959269602696126962269632696426965269662696726968269692697026971269722697326974269752697626977269782697926980269812698226983269842698526986269872698826989269902699126992269932699426995269962699726998269992700027001270022700327004270052700627007270082700927010270112701227013270142701527016270172701827019270202702127022270232702427025270262702727028270292703027031270322703327034270352703627037270382703927040270412704227043270442704527046270472704827049270502705127052270532705427055270562705727058270592706027061270622706327064270652706627067270682706927070270712707227073270742707527076270772707827079270802708127082270832708427085270862708727088270892709027091270922709327094270952709627097270982709927100271012710227103271042710527106271072710827109271102711127112271132711427115271162711727118271192712027121271222712327124271252712627127271282712927130271312713227133271342713527136271372713827139271402714127142271432714427145271462714727148271492715027151271522715327154271552715627157271582715927160271612716227163271642716527166271672716827169271702717127172271732717427175271762717727178271792718027181271822718327184271852718627187271882718927190271912719227193271942719527196271972719827199272002720127202272032720427205272062720727208272092721027211272122721327214272152721627217272182721927220272212722227223272242722527226272272722827229272302723127232272332723427235272362723727238272392724027241272422724327244272452724627247272482724927250272512725227253272542725527256272572725827259272602726127262272632726427265272662726727268272692727027271272722727327274272752727627277272782727927280272812728227283272842728527286272872728827289272902729127292272932729427295272962729727298272992730027301273022730327304273052730627307273082730927310273112731227313273142731527316273172731827319273202732127322273232732427325273262732727328273292733027331273322733327334273352733627337273382733927340273412734227343273442734527346273472734827349273502735127352273532735427355273562735727358273592736027361273622736327364273652736627367273682736927370273712737227373273742737527376273772737827379273802738127382273832738427385273862738727388273892739027391273922739327394273952739627397273982739927400274012740227403274042740527406274072740827409274102741127412274132741427415274162741727418274192742027421274222742327424274252742627427274282742927430274312743227433274342743527436274372743827439274402744127442274432744427445274462744727448274492745027451274522745327454274552745627457274582745927460274612746227463274642746527466274672746827469274702747127472274732747427475274762747727478274792748027481274822748327484274852748627487274882748927490274912749227493274942749527496274972749827499275002750127502275032750427505275062750727508275092751027511275122751327514275152751627517275182751927520275212752227523275242752527526275272752827529275302753127532275332753427535275362753727538275392754027541275422754327544275452754627547275482754927550275512755227553275542755527556275572755827559275602756127562275632756427565275662756727568275692757027571275722757327574275752757627577275782757927580275812758227583275842758527586275872758827589275902759127592275932759427595275962759727598275992760027601276022760327604276052760627607276082760927610276112761227613276142761527616276172761827619276202762127622276232762427625276262762727628276292763027631276322763327634276352763627637276382763927640276412764227643276442764527646276472764827649276502765127652276532765427655276562765727658276592766027661276622766327664276652766627667276682766927670276712767227673276742767527676276772767827679276802768127682276832768427685276862768727688276892769027691276922769327694276952769627697276982769927700277012770227703277042770527706277072770827709277102771127712277132771427715277162771727718
  1. diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
  2. index 3a3b30ac2a75..9e0745cafbd8 100644
  3. --- a/Documentation/sysrq.txt
  4. +++ b/Documentation/sysrq.txt
  5. @@ -59,10 +59,17 @@ On PowerPC - Press 'ALT - Print Screen (or F13) - <command key>,
  6. On other - If you know of the key combos for other architectures, please
  7. let me know so I can add them to this section.
  8. -On all - write a character to /proc/sysrq-trigger. e.g.:
  9. -
  10. +On all - write a character to /proc/sysrq-trigger, e.g.:
  11. echo t > /proc/sysrq-trigger
  12. +On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g.
  13. + echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq
  14. + Send an ICMP echo request with this pattern plus the particular
  15. + SysRq command key. Example:
  16. + # ping -c1 -s57 -p0102030468
  17. + will trigger the SysRq-H (help) command.
  18. +
  19. +
  20. * What are the 'command' keys?
  21. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  22. 'b' - Will immediately reboot the system without syncing or unmounting
  23. diff --git a/Documentation/trace/histograms.txt b/Documentation/trace/histograms.txt
  24. new file mode 100644
  25. index 000000000000..6f2aeabf7faa
  26. --- /dev/null
  27. +++ b/Documentation/trace/histograms.txt
  28. @@ -0,0 +1,186 @@
  29. + Using the Linux Kernel Latency Histograms
  30. +
  31. +
  32. +This document gives a short explanation how to enable, configure and use
  33. +latency histograms. Latency histograms are primarily relevant in the
  34. +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT)
  35. +and are used in the quality management of the Linux real-time
  36. +capabilities.
  37. +
  38. +
  39. +* Purpose of latency histograms
  40. +
  41. +A latency histogram continuously accumulates the frequencies of latency
  42. +data. There are two types of histograms
  43. +- potential sources of latencies
  44. +- effective latencies
  45. +
  46. +
  47. +* Potential sources of latencies
  48. +
  49. +Potential sources of latencies are code segments where interrupts,
  50. +preemption or both are disabled (aka critical sections). To create
  51. +histograms of potential sources of latency, the kernel stores the time
  52. +stamp at the start of a critical section, determines the time elapsed
  53. +when the end of the section is reached, and increments the frequency
  54. +counter of that latency value - irrespective of whether any concurrently
  55. +running process is affected by latency or not.
  56. +- Configuration items (in the Kernel hacking/Tracers submenu)
  57. + CONFIG_INTERRUPT_OFF_LATENCY
  58. + CONFIG_PREEMPT_OFF_LATENCY
  59. +
  60. +
  61. +* Effective latencies
  62. +
  63. +Effective latencies are actually occuring during wakeup of a process. To
  64. +determine effective latencies, the kernel stores the time stamp when a
  65. +process is scheduled to be woken up, and determines the duration of the
  66. +wakeup time shortly before control is passed over to this process. Note
  67. +that the apparent latency in user space may be somewhat longer, since the
  68. +process may be interrupted after control is passed over to it but before
  69. +the execution in user space takes place. Simply measuring the interval
  70. +between enqueuing and wakeup may also not appropriate in cases when a
  71. +process is scheduled as a result of a timer expiration. The timer may have
  72. +missed its deadline, e.g. due to disabled interrupts, but this latency
  73. +would not be registered. Therefore, the offsets of missed timers are
  74. +recorded in a separate histogram. If both wakeup latency and missed timer
  75. +offsets are configured and enabled, a third histogram may be enabled that
  76. +records the overall latency as a sum of the timer latency, if any, and the
  77. +wakeup latency. This histogram is called "timerandwakeup".
  78. +- Configuration items (in the Kernel hacking/Tracers submenu)
  79. + CONFIG_WAKEUP_LATENCY
  80. + CONFIG_MISSED_TIMER_OFSETS
  81. +
  82. +
  83. +* Usage
  84. +
  85. +The interface to the administration of the latency histograms is located
  86. +in the debugfs file system. To mount it, either enter
  87. +
  88. +mount -t sysfs nodev /sys
  89. +mount -t debugfs nodev /sys/kernel/debug
  90. +
  91. +from shell command line level, or add
  92. +
  93. +nodev /sys sysfs defaults 0 0
  94. +nodev /sys/kernel/debug debugfs defaults 0 0
  95. +
  96. +to the file /etc/fstab. All latency histogram related files are then
  97. +available in the directory /sys/kernel/debug/tracing/latency_hist. A
  98. +particular histogram type is enabled by writing non-zero to the related
  99. +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory.
  100. +Select "preemptirqsoff" for the histograms of potential sources of
  101. +latencies and "wakeup" for histograms of effective latencies etc. The
  102. +histogram data - one per CPU - are available in the files
  103. +
  104. +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx
  105. +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx
  106. +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx
  107. +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx
  108. +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx
  109. +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx
  110. +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx
  111. +
  112. +The histograms are reset by writing non-zero to the file "reset" in a
  113. +particular latency directory. To reset all latency data, use
  114. +
  115. +#!/bin/sh
  116. +
  117. +TRACINGDIR=/sys/kernel/debug/tracing
  118. +HISTDIR=$TRACINGDIR/latency_hist
  119. +
  120. +if test -d $HISTDIR
  121. +then
  122. + cd $HISTDIR
  123. + for i in `find . | grep /reset$`
  124. + do
  125. + echo 1 >$i
  126. + done
  127. +fi
  128. +
  129. +
  130. +* Data format
  131. +
  132. +Latency data are stored with a resolution of one microsecond. The
  133. +maximum latency is 10,240 microseconds. The data are only valid, if the
  134. +overflow register is empty. Every output line contains the latency in
  135. +microseconds in the first row and the number of samples in the second
  136. +row. To display only lines with a positive latency count, use, for
  137. +example,
  138. +
  139. +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0
  140. +
  141. +#Minimum latency: 0 microseconds.
  142. +#Average latency: 0 microseconds.
  143. +#Maximum latency: 25 microseconds.
  144. +#Total samples: 3104770694
  145. +#There are 0 samples greater or equal than 10240 microseconds
  146. +#usecs samples
  147. + 0 2984486876
  148. + 1 49843506
  149. + 2 58219047
  150. + 3 5348126
  151. + 4 2187960
  152. + 5 3388262
  153. + 6 959289
  154. + 7 208294
  155. + 8 40420
  156. + 9 4485
  157. + 10 14918
  158. + 11 18340
  159. + 12 25052
  160. + 13 19455
  161. + 14 5602
  162. + 15 969
  163. + 16 47
  164. + 17 18
  165. + 18 14
  166. + 19 1
  167. + 20 3
  168. + 21 2
  169. + 22 5
  170. + 23 2
  171. + 25 1
  172. +
  173. +
  174. +* Wakeup latency of a selected process
  175. +
  176. +To only collect wakeup latency data of a particular process, write the
  177. +PID of the requested process to
  178. +
  179. +/sys/kernel/debug/tracing/latency_hist/wakeup/pid
  180. +
  181. +PIDs are not considered, if this variable is set to 0.
  182. +
  183. +
  184. +* Details of the process with the highest wakeup latency so far
  185. +
  186. +Selected data of the process that suffered from the highest wakeup
  187. +latency that occurred in a particular CPU are available in the file
  188. +
  189. +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx.
  190. +
  191. +In addition, other relevant system data at the time when the
  192. +latency occurred are given.
  193. +
  194. +The format of the data is (all in one line):
  195. +<PID> <Priority> <Latency> (<Timeroffset>) <Command> \
  196. +<- <PID> <Priority> <Command> <Timestamp>
  197. +
  198. +The value of <Timeroffset> is only relevant in the combined timer
  199. +and wakeup latency recording. In the wakeup recording, it is
  200. +always 0, in the missed_timer_offsets recording, it is the same
  201. +as <Latency>.
  202. +
  203. +When retrospectively searching for the origin of a latency and
  204. +tracing was not enabled, it may be helpful to know the name and
  205. +some basic data of the task that (finally) was switching to the
  206. +late real-tlme task. In addition to the victim's data, also the
  207. +data of the possible culprit are therefore displayed after the
  208. +"<-" symbol.
  209. +
  210. +Finally, the timestamp of the time when the latency occurred
  211. +in <seconds>.<microseconds> after the most recent system boot
  212. +is provided.
  213. +
  214. +These data are also reset when the wakeup histogram is reset.
  215. diff --git a/MAINTAINERS b/MAINTAINERS
  216. index 63cefa62324c..be0ea1e5c4cc 100644
  217. --- a/MAINTAINERS
  218. +++ b/MAINTAINERS
  219. @@ -5196,6 +5196,23 @@ F: fs/fuse/
  220. F: include/uapi/linux/fuse.h
  221. F: Documentation/filesystems/fuse.txt
  222. +FUTEX SUBSYSTEM
  223. +M: Thomas Gleixner <tglx@linutronix.de>
  224. +M: Ingo Molnar <mingo@redhat.com>
  225. +R: Peter Zijlstra <peterz@infradead.org>
  226. +R: Darren Hart <dvhart@infradead.org>
  227. +L: linux-kernel@vger.kernel.org
  228. +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
  229. +S: Maintained
  230. +F: kernel/futex.c
  231. +F: kernel/futex_compat.c
  232. +F: include/asm-generic/futex.h
  233. +F: include/linux/futex.h
  234. +F: include/uapi/linux/futex.h
  235. +F: tools/testing/selftests/futex/
  236. +F: tools/perf/bench/futex*
  237. +F: Documentation/*futex*
  238. +
  239. FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit)
  240. M: Rik Faith <faith@cs.unc.edu>
  241. L: linux-scsi@vger.kernel.org
  242. diff --git a/arch/Kconfig b/arch/Kconfig
  243. index 659bdd079277..099fc0f5155e 100644
  244. --- a/arch/Kconfig
  245. +++ b/arch/Kconfig
  246. @@ -9,6 +9,7 @@ config OPROFILE
  247. tristate "OProfile system profiling"
  248. depends on PROFILING
  249. depends on HAVE_OPROFILE
  250. + depends on !PREEMPT_RT_FULL
  251. select RING_BUFFER
  252. select RING_BUFFER_ALLOW_SWAP
  253. help
  254. @@ -52,6 +53,7 @@ config KPROBES
  255. config JUMP_LABEL
  256. bool "Optimize very unlikely/likely branches"
  257. depends on HAVE_ARCH_JUMP_LABEL
  258. + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST)
  259. help
  260. This option enables a transparent branch optimization that
  261. makes certain almost-always-true or almost-always-false branch
  262. diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
  263. index b5d529fdffab..5715844e83e3 100644
  264. --- a/arch/arm/Kconfig
  265. +++ b/arch/arm/Kconfig
  266. @@ -36,7 +36,7 @@ config ARM
  267. select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
  268. select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
  269. select HAVE_ARCH_HARDENED_USERCOPY
  270. - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
  271. + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT_BASE
  272. select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
  273. select HAVE_ARCH_MMAP_RND_BITS if MMU
  274. select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
  275. @@ -75,6 +75,7 @@ config ARM
  276. select HAVE_PERF_EVENTS
  277. select HAVE_PERF_REGS
  278. select HAVE_PERF_USER_STACK_DUMP
  279. + select HAVE_PREEMPT_LAZY
  280. select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
  281. select HAVE_REGS_AND_STACK_ACCESS_API
  282. select HAVE_SYSCALL_TRACEPOINTS
  283. diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h
  284. index e53638c8ed8a..6095a1649865 100644
  285. --- a/arch/arm/include/asm/irq.h
  286. +++ b/arch/arm/include/asm/irq.h
  287. @@ -22,6 +22,8 @@
  288. #endif
  289. #ifndef __ASSEMBLY__
  290. +#include <linux/cpumask.h>
  291. +
  292. struct irqaction;
  293. struct pt_regs;
  294. extern void migrate_irqs(void);
  295. diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h
  296. index 12ebfcc1d539..c962084605bc 100644
  297. --- a/arch/arm/include/asm/switch_to.h
  298. +++ b/arch/arm/include/asm/switch_to.h
  299. @@ -3,6 +3,13 @@
  300. #include <linux/thread_info.h>
  301. +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
  302. +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
  303. +#else
  304. +static inline void
  305. +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
  306. +#endif
  307. +
  308. /*
  309. * For v7 SMP cores running a preemptible kernel we may be pre-empted
  310. * during a TLB maintenance operation, so execute an inner-shareable dsb
  311. @@ -25,6 +32,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info
  312. #define switch_to(prev,next,last) \
  313. do { \
  314. __complete_pending_tlbi(); \
  315. + switch_kmaps(prev, next); \
  316. last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
  317. } while (0)
  318. diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
  319. index 776757d1604a..1f36a4eccc72 100644
  320. --- a/arch/arm/include/asm/thread_info.h
  321. +++ b/arch/arm/include/asm/thread_info.h
  322. @@ -49,6 +49,7 @@ struct cpu_context_save {
  323. struct thread_info {
  324. unsigned long flags; /* low level flags */
  325. int preempt_count; /* 0 => preemptable, <0 => bug */
  326. + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
  327. mm_segment_t addr_limit; /* address limit */
  328. struct task_struct *task; /* main task structure */
  329. __u32 cpu; /* cpu */
  330. @@ -142,7 +143,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
  331. #define TIF_SYSCALL_TRACE 4 /* syscall trace active */
  332. #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */
  333. #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
  334. -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */
  335. +#define TIF_SECCOMP 8 /* seccomp syscall filtering active */
  336. +#define TIF_NEED_RESCHED_LAZY 7
  337. #define TIF_NOHZ 12 /* in adaptive nohz mode */
  338. #define TIF_USING_IWMMXT 17
  339. @@ -152,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
  340. #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
  341. #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
  342. #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
  343. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  344. #define _TIF_UPROBE (1 << TIF_UPROBE)
  345. #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
  346. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  347. @@ -167,7 +170,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
  348. * Change these and you break ASM code in entry-common.S
  349. */
  350. #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
  351. - _TIF_NOTIFY_RESUME | _TIF_UPROBE)
  352. + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  353. + _TIF_NEED_RESCHED_LAZY)
  354. #endif /* __KERNEL__ */
  355. #endif /* __ASM_ARM_THREAD_INFO_H */
  356. diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
  357. index 608008229c7d..3866da3f7bb7 100644
  358. --- a/arch/arm/kernel/asm-offsets.c
  359. +++ b/arch/arm/kernel/asm-offsets.c
  360. @@ -65,6 +65,7 @@ int main(void)
  361. BLANK();
  362. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  363. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  364. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  365. DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
  366. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  367. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  368. diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
  369. index 9f157e7c51e7..468e224d76aa 100644
  370. --- a/arch/arm/kernel/entry-armv.S
  371. +++ b/arch/arm/kernel/entry-armv.S
  372. @@ -220,11 +220,18 @@ __irq_svc:
  373. #ifdef CONFIG_PREEMPT
  374. ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
  375. - ldr r0, [tsk, #TI_FLAGS] @ get flags
  376. teq r8, #0 @ if preempt count != 0
  377. + bne 1f @ return from exeption
  378. + ldr r0, [tsk, #TI_FLAGS] @ get flags
  379. + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
  380. + blne svc_preempt @ preempt!
  381. +
  382. + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
  383. + teq r8, #0 @ if preempt lazy count != 0
  384. movne r0, #0 @ force flags to 0
  385. - tst r0, #_TIF_NEED_RESCHED
  386. + tst r0, #_TIF_NEED_RESCHED_LAZY
  387. blne svc_preempt
  388. +1:
  389. #endif
  390. svc_exit r5, irq = 1 @ return from exception
  391. @@ -239,8 +246,14 @@ svc_preempt:
  392. 1: bl preempt_schedule_irq @ irq en/disable is done inside
  393. ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
  394. tst r0, #_TIF_NEED_RESCHED
  395. + bne 1b
  396. + tst r0, #_TIF_NEED_RESCHED_LAZY
  397. reteq r8 @ go again
  398. - b 1b
  399. + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
  400. + teq r0, #0 @ if preempt lazy count != 0
  401. + beq 1b
  402. + ret r8 @ go again
  403. +
  404. #endif
  405. __und_fault:
  406. diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
  407. index 10c3283d6c19..8872937862cc 100644
  408. --- a/arch/arm/kernel/entry-common.S
  409. +++ b/arch/arm/kernel/entry-common.S
  410. @@ -36,7 +36,9 @@ ret_fast_syscall:
  411. UNWIND(.cantunwind )
  412. disable_irq_notrace @ disable interrupts
  413. ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
  414. - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
  415. + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
  416. + bne fast_work_pending
  417. + tst r1, #_TIF_SECCOMP
  418. bne fast_work_pending
  419. /* perform architecture specific actions before user return */
  420. @@ -62,8 +64,11 @@ ret_fast_syscall:
  421. str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
  422. disable_irq_notrace @ disable interrupts
  423. ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
  424. - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
  425. + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
  426. + bne do_slower_path
  427. + tst r1, #_TIF_SECCOMP
  428. beq no_work_pending
  429. +do_slower_path:
  430. UNWIND(.fnend )
  431. ENDPROC(ret_fast_syscall)
  432. diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c
  433. index 69bda1a5707e..1f665acaa6a9 100644
  434. --- a/arch/arm/kernel/patch.c
  435. +++ b/arch/arm/kernel/patch.c
  436. @@ -15,7 +15,7 @@ struct patch {
  437. unsigned int insn;
  438. };
  439. -static DEFINE_SPINLOCK(patch_lock);
  440. +static DEFINE_RAW_SPINLOCK(patch_lock);
  441. static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
  442. __acquires(&patch_lock)
  443. @@ -32,7 +32,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
  444. return addr;
  445. if (flags)
  446. - spin_lock_irqsave(&patch_lock, *flags);
  447. + raw_spin_lock_irqsave(&patch_lock, *flags);
  448. else
  449. __acquire(&patch_lock);
  450. @@ -47,7 +47,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
  451. clear_fixmap(fixmap);
  452. if (flags)
  453. - spin_unlock_irqrestore(&patch_lock, *flags);
  454. + raw_spin_unlock_irqrestore(&patch_lock, *flags);
  455. else
  456. __release(&patch_lock);
  457. }
  458. diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
  459. index 91d2d5b01414..750550098b59 100644
  460. --- a/arch/arm/kernel/process.c
  461. +++ b/arch/arm/kernel/process.c
  462. @@ -322,6 +322,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
  463. }
  464. #ifdef CONFIG_MMU
  465. +/*
  466. + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not
  467. + * initialized by pgtable_page_ctor() then a coredump of the vector page will
  468. + * fail.
  469. + */
  470. +static int __init vectors_user_mapping_init_page(void)
  471. +{
  472. + struct page *page;
  473. + unsigned long addr = 0xffff0000;
  474. + pgd_t *pgd;
  475. + pud_t *pud;
  476. + pmd_t *pmd;
  477. +
  478. + pgd = pgd_offset_k(addr);
  479. + pud = pud_offset(pgd, addr);
  480. + pmd = pmd_offset(pud, addr);
  481. + page = pmd_page(*(pmd));
  482. +
  483. + pgtable_page_ctor(page);
  484. +
  485. + return 0;
  486. +}
  487. +late_initcall(vectors_user_mapping_init_page);
  488. +
  489. #ifdef CONFIG_KUSER_HELPERS
  490. /*
  491. * The vectors page is always readable from user space for the
  492. diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
  493. index 7b8f2141427b..96541e00b74a 100644
  494. --- a/arch/arm/kernel/signal.c
  495. +++ b/arch/arm/kernel/signal.c
  496. @@ -572,7 +572,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
  497. */
  498. trace_hardirqs_off();
  499. do {
  500. - if (likely(thread_flags & _TIF_NEED_RESCHED)) {
  501. + if (likely(thread_flags & (_TIF_NEED_RESCHED |
  502. + _TIF_NEED_RESCHED_LAZY))) {
  503. schedule();
  504. } else {
  505. if (unlikely(!user_mode(regs)))
  506. diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
  507. index 7dd14e8395e6..4cd7e3d98035 100644
  508. --- a/arch/arm/kernel/smp.c
  509. +++ b/arch/arm/kernel/smp.c
  510. @@ -234,8 +234,6 @@ int __cpu_disable(void)
  511. flush_cache_louis();
  512. local_flush_tlb_all();
  513. - clear_tasks_mm_cpumask(cpu);
  514. -
  515. return 0;
  516. }
  517. @@ -251,6 +249,9 @@ void __cpu_die(unsigned int cpu)
  518. pr_err("CPU%u: cpu didn't die\n", cpu);
  519. return;
  520. }
  521. +
  522. + clear_tasks_mm_cpumask(cpu);
  523. +
  524. pr_notice("CPU%u: shutdown\n", cpu);
  525. /*
  526. diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
  527. index 0bee233fef9a..314cfb232a63 100644
  528. --- a/arch/arm/kernel/unwind.c
  529. +++ b/arch/arm/kernel/unwind.c
  530. @@ -93,7 +93,7 @@ extern const struct unwind_idx __start_unwind_idx[];
  531. static const struct unwind_idx *__origin_unwind_idx;
  532. extern const struct unwind_idx __stop_unwind_idx[];
  533. -static DEFINE_SPINLOCK(unwind_lock);
  534. +static DEFINE_RAW_SPINLOCK(unwind_lock);
  535. static LIST_HEAD(unwind_tables);
  536. /* Convert a prel31 symbol to an absolute address */
  537. @@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr)
  538. /* module unwind tables */
  539. struct unwind_table *table;
  540. - spin_lock_irqsave(&unwind_lock, flags);
  541. + raw_spin_lock_irqsave(&unwind_lock, flags);
  542. list_for_each_entry(table, &unwind_tables, list) {
  543. if (addr >= table->begin_addr &&
  544. addr < table->end_addr) {
  545. @@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr)
  546. break;
  547. }
  548. }
  549. - spin_unlock_irqrestore(&unwind_lock, flags);
  550. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  551. }
  552. pr_debug("%s: idx = %p\n", __func__, idx);
  553. @@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size,
  554. tab->begin_addr = text_addr;
  555. tab->end_addr = text_addr + text_size;
  556. - spin_lock_irqsave(&unwind_lock, flags);
  557. + raw_spin_lock_irqsave(&unwind_lock, flags);
  558. list_add_tail(&tab->list, &unwind_tables);
  559. - spin_unlock_irqrestore(&unwind_lock, flags);
  560. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  561. return tab;
  562. }
  563. @@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_table *tab)
  564. if (!tab)
  565. return;
  566. - spin_lock_irqsave(&unwind_lock, flags);
  567. + raw_spin_lock_irqsave(&unwind_lock, flags);
  568. list_del(&tab->list);
  569. - spin_unlock_irqrestore(&unwind_lock, flags);
  570. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  571. kfree(tab);
  572. }
  573. diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
  574. index 19b5f5c1c0ff..82aa639e6737 100644
  575. --- a/arch/arm/kvm/arm.c
  576. +++ b/arch/arm/kvm/arm.c
  577. @@ -619,7 +619,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
  578. * involves poking the GIC, which must be done in a
  579. * non-preemptible context.
  580. */
  581. - preempt_disable();
  582. + migrate_disable();
  583. kvm_pmu_flush_hwstate(vcpu);
  584. kvm_timer_flush_hwstate(vcpu);
  585. kvm_vgic_flush_hwstate(vcpu);
  586. @@ -640,7 +640,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
  587. kvm_pmu_sync_hwstate(vcpu);
  588. kvm_timer_sync_hwstate(vcpu);
  589. kvm_vgic_sync_hwstate(vcpu);
  590. - preempt_enable();
  591. + migrate_enable();
  592. continue;
  593. }
  594. @@ -696,7 +696,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
  595. kvm_vgic_sync_hwstate(vcpu);
  596. - preempt_enable();
  597. + migrate_enable();
  598. ret = handle_exit(vcpu, run, ret);
  599. }
  600. diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
  601. index 98ffe1e62ad5..df9769ddece5 100644
  602. --- a/arch/arm/mach-exynos/platsmp.c
  603. +++ b/arch/arm/mach-exynos/platsmp.c
  604. @@ -229,7 +229,7 @@ static void __iomem *scu_base_addr(void)
  605. return (void __iomem *)(S5P_VA_SCU);
  606. }
  607. -static DEFINE_SPINLOCK(boot_lock);
  608. +static DEFINE_RAW_SPINLOCK(boot_lock);
  609. static void exynos_secondary_init(unsigned int cpu)
  610. {
  611. @@ -242,8 +242,8 @@ static void exynos_secondary_init(unsigned int cpu)
  612. /*
  613. * Synchronise with the boot thread.
  614. */
  615. - spin_lock(&boot_lock);
  616. - spin_unlock(&boot_lock);
  617. + raw_spin_lock(&boot_lock);
  618. + raw_spin_unlock(&boot_lock);
  619. }
  620. int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr)
  621. @@ -307,7 +307,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
  622. * Set synchronisation state between this boot processor
  623. * and the secondary one
  624. */
  625. - spin_lock(&boot_lock);
  626. + raw_spin_lock(&boot_lock);
  627. /*
  628. * The secondary processor is waiting to be released from
  629. @@ -334,7 +334,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
  630. if (timeout == 0) {
  631. printk(KERN_ERR "cpu1 power enable failed");
  632. - spin_unlock(&boot_lock);
  633. + raw_spin_unlock(&boot_lock);
  634. return -ETIMEDOUT;
  635. }
  636. }
  637. @@ -380,7 +380,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
  638. * calibrations, then wait for it to finish
  639. */
  640. fail:
  641. - spin_unlock(&boot_lock);
  642. + raw_spin_unlock(&boot_lock);
  643. return pen_release != -1 ? ret : 0;
  644. }
  645. diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c
  646. index 4b653a8cb75c..b03d5a922cb1 100644
  647. --- a/arch/arm/mach-hisi/platmcpm.c
  648. +++ b/arch/arm/mach-hisi/platmcpm.c
  649. @@ -61,7 +61,7 @@
  650. static void __iomem *sysctrl, *fabric;
  651. static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
  652. -static DEFINE_SPINLOCK(boot_lock);
  653. +static DEFINE_RAW_SPINLOCK(boot_lock);
  654. static u32 fabric_phys_addr;
  655. /*
  656. * [0]: bootwrapper physical address
  657. @@ -113,7 +113,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle)
  658. if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
  659. return -EINVAL;
  660. - spin_lock_irq(&boot_lock);
  661. + raw_spin_lock_irq(&boot_lock);
  662. if (hip04_cpu_table[cluster][cpu])
  663. goto out;
  664. @@ -147,7 +147,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle)
  665. out:
  666. hip04_cpu_table[cluster][cpu]++;
  667. - spin_unlock_irq(&boot_lock);
  668. + raw_spin_unlock_irq(&boot_lock);
  669. return 0;
  670. }
  671. @@ -162,11 +162,11 @@ static void hip04_cpu_die(unsigned int l_cpu)
  672. cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
  673. cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
  674. - spin_lock(&boot_lock);
  675. + raw_spin_lock(&boot_lock);
  676. hip04_cpu_table[cluster][cpu]--;
  677. if (hip04_cpu_table[cluster][cpu] == 1) {
  678. /* A power_up request went ahead of us. */
  679. - spin_unlock(&boot_lock);
  680. + raw_spin_unlock(&boot_lock);
  681. return;
  682. } else if (hip04_cpu_table[cluster][cpu] > 1) {
  683. pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
  684. @@ -174,7 +174,7 @@ static void hip04_cpu_die(unsigned int l_cpu)
  685. }
  686. last_man = hip04_cluster_is_down(cluster);
  687. - spin_unlock(&boot_lock);
  688. + raw_spin_unlock(&boot_lock);
  689. if (last_man) {
  690. /* Since it's Cortex A15, disable L2 prefetching. */
  691. asm volatile(
  692. @@ -203,7 +203,7 @@ static int hip04_cpu_kill(unsigned int l_cpu)
  693. cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
  694. count = TIMEOUT_MSEC / POLL_MSEC;
  695. - spin_lock_irq(&boot_lock);
  696. + raw_spin_lock_irq(&boot_lock);
  697. for (tries = 0; tries < count; tries++) {
  698. if (hip04_cpu_table[cluster][cpu])
  699. goto err;
  700. @@ -211,10 +211,10 @@ static int hip04_cpu_kill(unsigned int l_cpu)
  701. data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
  702. if (data & CORE_WFI_STATUS(cpu))
  703. break;
  704. - spin_unlock_irq(&boot_lock);
  705. + raw_spin_unlock_irq(&boot_lock);
  706. /* Wait for clean L2 when the whole cluster is down. */
  707. msleep(POLL_MSEC);
  708. - spin_lock_irq(&boot_lock);
  709. + raw_spin_lock_irq(&boot_lock);
  710. }
  711. if (tries >= count)
  712. goto err;
  713. @@ -231,10 +231,10 @@ static int hip04_cpu_kill(unsigned int l_cpu)
  714. goto err;
  715. if (hip04_cluster_is_down(cluster))
  716. hip04_set_snoop_filter(cluster, 0);
  717. - spin_unlock_irq(&boot_lock);
  718. + raw_spin_unlock_irq(&boot_lock);
  719. return 1;
  720. err:
  721. - spin_unlock_irq(&boot_lock);
  722. + raw_spin_unlock_irq(&boot_lock);
  723. return 0;
  724. }
  725. #endif
  726. diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
  727. index b4de3da6dffa..b52893319d75 100644
  728. --- a/arch/arm/mach-omap2/omap-smp.c
  729. +++ b/arch/arm/mach-omap2/omap-smp.c
  730. @@ -64,7 +64,7 @@ static const struct omap_smp_config omap5_cfg __initconst = {
  731. .startup_addr = omap5_secondary_startup,
  732. };
  733. -static DEFINE_SPINLOCK(boot_lock);
  734. +static DEFINE_RAW_SPINLOCK(boot_lock);
  735. void __iomem *omap4_get_scu_base(void)
  736. {
  737. @@ -131,8 +131,8 @@ static void omap4_secondary_init(unsigned int cpu)
  738. /*
  739. * Synchronise with the boot thread.
  740. */
  741. - spin_lock(&boot_lock);
  742. - spin_unlock(&boot_lock);
  743. + raw_spin_lock(&boot_lock);
  744. + raw_spin_unlock(&boot_lock);
  745. }
  746. static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
  747. @@ -146,7 +146,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
  748. * Set synchronisation state between this boot processor
  749. * and the secondary one
  750. */
  751. - spin_lock(&boot_lock);
  752. + raw_spin_lock(&boot_lock);
  753. /*
  754. * Update the AuxCoreBoot0 with boot state for secondary core.
  755. @@ -223,7 +223,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
  756. * Now the secondary core is starting up let it run its
  757. * calibrations, then wait for it to finish
  758. */
  759. - spin_unlock(&boot_lock);
  760. + raw_spin_unlock(&boot_lock);
  761. return 0;
  762. }
  763. diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c
  764. index 0875b99add18..18b6d98d2581 100644
  765. --- a/arch/arm/mach-prima2/platsmp.c
  766. +++ b/arch/arm/mach-prima2/platsmp.c
  767. @@ -22,7 +22,7 @@
  768. static void __iomem *clk_base;
  769. -static DEFINE_SPINLOCK(boot_lock);
  770. +static DEFINE_RAW_SPINLOCK(boot_lock);
  771. static void sirfsoc_secondary_init(unsigned int cpu)
  772. {
  773. @@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsigned int cpu)
  774. /*
  775. * Synchronise with the boot thread.
  776. */
  777. - spin_lock(&boot_lock);
  778. - spin_unlock(&boot_lock);
  779. + raw_spin_lock(&boot_lock);
  780. + raw_spin_unlock(&boot_lock);
  781. }
  782. static const struct of_device_id clk_ids[] = {
  783. @@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle)
  784. /* make sure write buffer is drained */
  785. mb();
  786. - spin_lock(&boot_lock);
  787. + raw_spin_lock(&boot_lock);
  788. /*
  789. * The secondary processor is waiting to be released from
  790. @@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle)
  791. * now the secondary core is starting up let it run its
  792. * calibrations, then wait for it to finish
  793. */
  794. - spin_unlock(&boot_lock);
  795. + raw_spin_unlock(&boot_lock);
  796. return pen_release != -1 ? -ENOSYS : 0;
  797. }
  798. diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c
  799. index 5494c9e0c909..e8ce157d3548 100644
  800. --- a/arch/arm/mach-qcom/platsmp.c
  801. +++ b/arch/arm/mach-qcom/platsmp.c
  802. @@ -46,7 +46,7 @@
  803. extern void secondary_startup_arm(void);
  804. -static DEFINE_SPINLOCK(boot_lock);
  805. +static DEFINE_RAW_SPINLOCK(boot_lock);
  806. #ifdef CONFIG_HOTPLUG_CPU
  807. static void qcom_cpu_die(unsigned int cpu)
  808. @@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned int cpu)
  809. /*
  810. * Synchronise with the boot thread.
  811. */
  812. - spin_lock(&boot_lock);
  813. - spin_unlock(&boot_lock);
  814. + raw_spin_lock(&boot_lock);
  815. + raw_spin_unlock(&boot_lock);
  816. }
  817. static int scss_release_secondary(unsigned int cpu)
  818. @@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int))
  819. * set synchronisation state between this boot processor
  820. * and the secondary one
  821. */
  822. - spin_lock(&boot_lock);
  823. + raw_spin_lock(&boot_lock);
  824. /*
  825. * Send the secondary CPU a soft interrupt, thereby causing
  826. @@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int))
  827. * now the secondary core is starting up let it run its
  828. * calibrations, then wait for it to finish
  829. */
  830. - spin_unlock(&boot_lock);
  831. + raw_spin_unlock(&boot_lock);
  832. return ret;
  833. }
  834. diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c
  835. index 8d1e2d551786..7fa56cc78118 100644
  836. --- a/arch/arm/mach-spear/platsmp.c
  837. +++ b/arch/arm/mach-spear/platsmp.c
  838. @@ -32,7 +32,7 @@ static void write_pen_release(int val)
  839. sync_cache_w(&pen_release);
  840. }
  841. -static DEFINE_SPINLOCK(boot_lock);
  842. +static DEFINE_RAW_SPINLOCK(boot_lock);
  843. static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
  844. @@ -47,8 +47,8 @@ static void spear13xx_secondary_init(unsigned int cpu)
  845. /*
  846. * Synchronise with the boot thread.
  847. */
  848. - spin_lock(&boot_lock);
  849. - spin_unlock(&boot_lock);
  850. + raw_spin_lock(&boot_lock);
  851. + raw_spin_unlock(&boot_lock);
  852. }
  853. static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
  854. @@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
  855. * set synchronisation state between this boot processor
  856. * and the secondary one
  857. */
  858. - spin_lock(&boot_lock);
  859. + raw_spin_lock(&boot_lock);
  860. /*
  861. * The secondary processor is waiting to be released from
  862. @@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
  863. * now the secondary core is starting up let it run its
  864. * calibrations, then wait for it to finish
  865. */
  866. - spin_unlock(&boot_lock);
  867. + raw_spin_unlock(&boot_lock);
  868. return pen_release != -1 ? -ENOSYS : 0;
  869. }
  870. diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c
  871. index ea5a2277ee46..b988e081ac79 100644
  872. --- a/arch/arm/mach-sti/platsmp.c
  873. +++ b/arch/arm/mach-sti/platsmp.c
  874. @@ -35,7 +35,7 @@ static void write_pen_release(int val)
  875. sync_cache_w(&pen_release);
  876. }
  877. -static DEFINE_SPINLOCK(boot_lock);
  878. +static DEFINE_RAW_SPINLOCK(boot_lock);
  879. static void sti_secondary_init(unsigned int cpu)
  880. {
  881. @@ -48,8 +48,8 @@ static void sti_secondary_init(unsigned int cpu)
  882. /*
  883. * Synchronise with the boot thread.
  884. */
  885. - spin_lock(&boot_lock);
  886. - spin_unlock(&boot_lock);
  887. + raw_spin_lock(&boot_lock);
  888. + raw_spin_unlock(&boot_lock);
  889. }
  890. static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
  891. @@ -60,7 +60,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
  892. * set synchronisation state between this boot processor
  893. * and the secondary one
  894. */
  895. - spin_lock(&boot_lock);
  896. + raw_spin_lock(&boot_lock);
  897. /*
  898. * The secondary processor is waiting to be released from
  899. @@ -91,7 +91,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
  900. * now the secondary core is starting up let it run its
  901. * calibrations, then wait for it to finish
  902. */
  903. - spin_unlock(&boot_lock);
  904. + raw_spin_unlock(&boot_lock);
  905. return pen_release != -1 ? -ENOSYS : 0;
  906. }
  907. diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
  908. index f7861dc83182..ce47dfe25fb0 100644
  909. --- a/arch/arm/mm/fault.c
  910. +++ b/arch/arm/mm/fault.c
  911. @@ -433,6 +433,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
  912. if (addr < TASK_SIZE)
  913. return do_page_fault(addr, fsr, regs);
  914. + if (interrupts_enabled(regs))
  915. + local_irq_enable();
  916. +
  917. if (user_mode(regs))
  918. goto bad_area;
  919. @@ -500,6 +503,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
  920. static int
  921. do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
  922. {
  923. + if (interrupts_enabled(regs))
  924. + local_irq_enable();
  925. +
  926. do_bad_area(addr, fsr, regs);
  927. return 0;
  928. }
  929. diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
  930. index d02f8187b1cc..542692dbd40a 100644
  931. --- a/arch/arm/mm/highmem.c
  932. +++ b/arch/arm/mm/highmem.c
  933. @@ -34,6 +34,11 @@ static inline pte_t get_fixmap_pte(unsigned long vaddr)
  934. return *ptep;
  935. }
  936. +static unsigned int fixmap_idx(int type)
  937. +{
  938. + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  939. +}
  940. +
  941. void *kmap(struct page *page)
  942. {
  943. might_sleep();
  944. @@ -54,12 +59,13 @@ EXPORT_SYMBOL(kunmap);
  945. void *kmap_atomic(struct page *page)
  946. {
  947. + pte_t pte = mk_pte(page, kmap_prot);
  948. unsigned int idx;
  949. unsigned long vaddr;
  950. void *kmap;
  951. int type;
  952. - preempt_disable();
  953. + preempt_disable_nort();
  954. pagefault_disable();
  955. if (!PageHighMem(page))
  956. return page_address(page);
  957. @@ -79,7 +85,7 @@ void *kmap_atomic(struct page *page)
  958. type = kmap_atomic_idx_push();
  959. - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  960. + idx = fixmap_idx(type);
  961. vaddr = __fix_to_virt(idx);
  962. #ifdef CONFIG_DEBUG_HIGHMEM
  963. /*
  964. @@ -93,7 +99,10 @@ void *kmap_atomic(struct page *page)
  965. * in place, so the contained TLB flush ensures the TLB is updated
  966. * with the new mapping.
  967. */
  968. - set_fixmap_pte(idx, mk_pte(page, kmap_prot));
  969. +#ifdef CONFIG_PREEMPT_RT_FULL
  970. + current->kmap_pte[type] = pte;
  971. +#endif
  972. + set_fixmap_pte(idx, pte);
  973. return (void *)vaddr;
  974. }
  975. @@ -106,44 +115,75 @@ void __kunmap_atomic(void *kvaddr)
  976. if (kvaddr >= (void *)FIXADDR_START) {
  977. type = kmap_atomic_idx();
  978. - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  979. + idx = fixmap_idx(type);
  980. if (cache_is_vivt())
  981. __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
  982. +#ifdef CONFIG_PREEMPT_RT_FULL
  983. + current->kmap_pte[type] = __pte(0);
  984. +#endif
  985. #ifdef CONFIG_DEBUG_HIGHMEM
  986. BUG_ON(vaddr != __fix_to_virt(idx));
  987. - set_fixmap_pte(idx, __pte(0));
  988. #else
  989. (void) idx; /* to kill a warning */
  990. #endif
  991. + set_fixmap_pte(idx, __pte(0));
  992. kmap_atomic_idx_pop();
  993. } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
  994. /* this address was obtained through kmap_high_get() */
  995. kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
  996. }
  997. pagefault_enable();
  998. - preempt_enable();
  999. + preempt_enable_nort();
  1000. }
  1001. EXPORT_SYMBOL(__kunmap_atomic);
  1002. void *kmap_atomic_pfn(unsigned long pfn)
  1003. {
  1004. + pte_t pte = pfn_pte(pfn, kmap_prot);
  1005. unsigned long vaddr;
  1006. int idx, type;
  1007. struct page *page = pfn_to_page(pfn);
  1008. - preempt_disable();
  1009. + preempt_disable_nort();
  1010. pagefault_disable();
  1011. if (!PageHighMem(page))
  1012. return page_address(page);
  1013. type = kmap_atomic_idx_push();
  1014. - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  1015. + idx = fixmap_idx(type);
  1016. vaddr = __fix_to_virt(idx);
  1017. #ifdef CONFIG_DEBUG_HIGHMEM
  1018. BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
  1019. #endif
  1020. - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
  1021. +#ifdef CONFIG_PREEMPT_RT_FULL
  1022. + current->kmap_pte[type] = pte;
  1023. +#endif
  1024. + set_fixmap_pte(idx, pte);
  1025. return (void *)vaddr;
  1026. }
  1027. +#if defined CONFIG_PREEMPT_RT_FULL
  1028. +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
  1029. +{
  1030. + int i;
  1031. +
  1032. + /*
  1033. + * Clear @prev's kmap_atomic mappings
  1034. + */
  1035. + for (i = 0; i < prev_p->kmap_idx; i++) {
  1036. + int idx = fixmap_idx(i);
  1037. +
  1038. + set_fixmap_pte(idx, __pte(0));
  1039. + }
  1040. + /*
  1041. + * Restore @next_p's kmap_atomic mappings
  1042. + */
  1043. + for (i = 0; i < next_p->kmap_idx; i++) {
  1044. + int idx = fixmap_idx(i);
  1045. +
  1046. + if (!pte_none(next_p->kmap_pte[i]))
  1047. + set_fixmap_pte(idx, next_p->kmap_pte[i]);
  1048. + }
  1049. +}
  1050. +#endif
  1051. diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c
  1052. index c2366510187a..6b60f582b738 100644
  1053. --- a/arch/arm/plat-versatile/platsmp.c
  1054. +++ b/arch/arm/plat-versatile/platsmp.c
  1055. @@ -32,7 +32,7 @@ static void write_pen_release(int val)
  1056. sync_cache_w(&pen_release);
  1057. }
  1058. -static DEFINE_SPINLOCK(boot_lock);
  1059. +static DEFINE_RAW_SPINLOCK(boot_lock);
  1060. void versatile_secondary_init(unsigned int cpu)
  1061. {
  1062. @@ -45,8 +45,8 @@ void versatile_secondary_init(unsigned int cpu)
  1063. /*
  1064. * Synchronise with the boot thread.
  1065. */
  1066. - spin_lock(&boot_lock);
  1067. - spin_unlock(&boot_lock);
  1068. + raw_spin_lock(&boot_lock);
  1069. + raw_spin_unlock(&boot_lock);
  1070. }
  1071. int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1072. @@ -57,7 +57,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1073. * Set synchronisation state between this boot processor
  1074. * and the secondary one
  1075. */
  1076. - spin_lock(&boot_lock);
  1077. + raw_spin_lock(&boot_lock);
  1078. /*
  1079. * This is really belt and braces; we hold unintended secondary
  1080. @@ -87,7 +87,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1081. * now the secondary core is starting up let it run its
  1082. * calibrations, then wait for it to finish
  1083. */
  1084. - spin_unlock(&boot_lock);
  1085. + raw_spin_unlock(&boot_lock);
  1086. return pen_release != -1 ? -ENOSYS : 0;
  1087. }
  1088. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
  1089. index cf57a7799a0f..78d1b49fbed5 100644
  1090. --- a/arch/arm64/Kconfig
  1091. +++ b/arch/arm64/Kconfig
  1092. @@ -91,6 +91,7 @@ config ARM64
  1093. select HAVE_PERF_EVENTS
  1094. select HAVE_PERF_REGS
  1095. select HAVE_PERF_USER_STACK_DUMP
  1096. + select HAVE_PREEMPT_LAZY
  1097. select HAVE_REGS_AND_STACK_ACCESS_API
  1098. select HAVE_RCU_TABLE_FREE
  1099. select HAVE_SYSCALL_TRACEPOINTS
  1100. @@ -704,7 +705,7 @@ config XEN_DOM0
  1101. config XEN
  1102. bool "Xen guest support on ARM64"
  1103. - depends on ARM64 && OF
  1104. + depends on ARM64 && OF && !PREEMPT_RT_FULL
  1105. select SWIOTLB_XEN
  1106. select PARAVIRT
  1107. help
  1108. diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
  1109. index e9ea5a6bd449..6c500ad63c6a 100644
  1110. --- a/arch/arm64/include/asm/thread_info.h
  1111. +++ b/arch/arm64/include/asm/thread_info.h
  1112. @@ -49,6 +49,7 @@ struct thread_info {
  1113. mm_segment_t addr_limit; /* address limit */
  1114. struct task_struct *task; /* main task structure */
  1115. int preempt_count; /* 0 => preemptable, <0 => bug */
  1116. + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
  1117. int cpu; /* cpu */
  1118. };
  1119. @@ -112,6 +113,7 @@ static inline struct thread_info *current_thread_info(void)
  1120. #define TIF_NEED_RESCHED 1
  1121. #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
  1122. #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
  1123. +#define TIF_NEED_RESCHED_LAZY 4
  1124. #define TIF_NOHZ 7
  1125. #define TIF_SYSCALL_TRACE 8
  1126. #define TIF_SYSCALL_AUDIT 9
  1127. @@ -127,6 +129,7 @@ static inline struct thread_info *current_thread_info(void)
  1128. #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
  1129. #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
  1130. #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
  1131. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  1132. #define _TIF_NOHZ (1 << TIF_NOHZ)
  1133. #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
  1134. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  1135. @@ -135,7 +138,9 @@ static inline struct thread_info *current_thread_info(void)
  1136. #define _TIF_32BIT (1 << TIF_32BIT)
  1137. #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
  1138. - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
  1139. + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
  1140. + _TIF_NEED_RESCHED_LAZY)
  1141. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  1142. #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
  1143. _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
  1144. diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
  1145. index c58ddf8c4062..a8f2f7c1fe12 100644
  1146. --- a/arch/arm64/kernel/asm-offsets.c
  1147. +++ b/arch/arm64/kernel/asm-offsets.c
  1148. @@ -38,6 +38,7 @@ int main(void)
  1149. BLANK();
  1150. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  1151. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  1152. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  1153. DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
  1154. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  1155. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  1156. diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
  1157. index b4c7db434654..433d846f4f51 100644
  1158. --- a/arch/arm64/kernel/entry.S
  1159. +++ b/arch/arm64/kernel/entry.S
  1160. @@ -430,11 +430,16 @@ el1_irq:
  1161. #ifdef CONFIG_PREEMPT
  1162. ldr w24, [tsk, #TI_PREEMPT] // get preempt count
  1163. - cbnz w24, 1f // preempt count != 0
  1164. + cbnz w24, 2f // preempt count != 0
  1165. ldr x0, [tsk, #TI_FLAGS] // get flags
  1166. - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
  1167. - bl el1_preempt
  1168. + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
  1169. +
  1170. + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count
  1171. + cbnz w24, 2f // preempt lazy count != 0
  1172. + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling?
  1173. 1:
  1174. + bl el1_preempt
  1175. +2:
  1176. #endif
  1177. #ifdef CONFIG_TRACE_IRQFLAGS
  1178. bl trace_hardirqs_on
  1179. @@ -448,6 +453,7 @@ el1_preempt:
  1180. 1: bl preempt_schedule_irq // irq en/disable is done inside
  1181. ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
  1182. tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
  1183. + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling?
  1184. ret x24
  1185. #endif
  1186. diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
  1187. index 404dd67080b9..639dc6d12e72 100644
  1188. --- a/arch/arm64/kernel/signal.c
  1189. +++ b/arch/arm64/kernel/signal.c
  1190. @@ -409,7 +409,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
  1191. */
  1192. trace_hardirqs_off();
  1193. do {
  1194. - if (thread_flags & _TIF_NEED_RESCHED) {
  1195. + if (thread_flags & _TIF_NEED_RESCHED_MASK) {
  1196. schedule();
  1197. } else {
  1198. local_irq_enable();
  1199. diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
  1200. index 5e844f68e847..dc613cc10f54 100644
  1201. --- a/arch/mips/Kconfig
  1202. +++ b/arch/mips/Kconfig
  1203. @@ -2516,7 +2516,7 @@ config MIPS_ASID_BITS_VARIABLE
  1204. #
  1205. config HIGHMEM
  1206. bool "High Memory Support"
  1207. - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA
  1208. + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL
  1209. config CPU_SUPPORTS_HIGHMEM
  1210. bool
  1211. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
  1212. index 6eda5abbd719..601e27701a4a 100644
  1213. --- a/arch/powerpc/Kconfig
  1214. +++ b/arch/powerpc/Kconfig
  1215. @@ -52,10 +52,11 @@ config LOCKDEP_SUPPORT
  1216. config RWSEM_GENERIC_SPINLOCK
  1217. bool
  1218. + default y if PREEMPT_RT_FULL
  1219. config RWSEM_XCHGADD_ALGORITHM
  1220. bool
  1221. - default y
  1222. + default y if !PREEMPT_RT_FULL
  1223. config GENERIC_LOCKBREAK
  1224. bool
  1225. @@ -134,6 +135,7 @@ config PPC
  1226. select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
  1227. select GENERIC_STRNCPY_FROM_USER
  1228. select GENERIC_STRNLEN_USER
  1229. + select HAVE_PREEMPT_LAZY
  1230. select HAVE_MOD_ARCH_SPECIFIC
  1231. select MODULES_USE_ELF_RELA
  1232. select CLONE_BACKWARDS
  1233. @@ -321,7 +323,7 @@ menu "Kernel options"
  1234. config HIGHMEM
  1235. bool "High memory support"
  1236. - depends on PPC32
  1237. + depends on PPC32 && !PREEMPT_RT_FULL
  1238. source kernel/Kconfig.hz
  1239. source kernel/Kconfig.preempt
  1240. diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
  1241. index 87e4b2d8dcd4..981e501a4359 100644
  1242. --- a/arch/powerpc/include/asm/thread_info.h
  1243. +++ b/arch/powerpc/include/asm/thread_info.h
  1244. @@ -43,6 +43,8 @@ struct thread_info {
  1245. int cpu; /* cpu we're on */
  1246. int preempt_count; /* 0 => preemptable,
  1247. <0 => BUG */
  1248. + int preempt_lazy_count; /* 0 => preemptable,
  1249. + <0 => BUG */
  1250. unsigned long local_flags; /* private flags for thread */
  1251. #ifdef CONFIG_LIVEPATCH
  1252. unsigned long *livepatch_sp;
  1253. @@ -88,8 +90,7 @@ static inline struct thread_info *current_thread_info(void)
  1254. #define TIF_SYSCALL_TRACE 0 /* syscall trace active */
  1255. #define TIF_SIGPENDING 1 /* signal pending */
  1256. #define TIF_NEED_RESCHED 2 /* rescheduling necessary */
  1257. -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
  1258. - TIF_NEED_RESCHED */
  1259. +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */
  1260. #define TIF_32BIT 4 /* 32 bit binary */
  1261. #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
  1262. #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
  1263. @@ -107,6 +108,8 @@ static inline struct thread_info *current_thread_info(void)
  1264. #if defined(CONFIG_PPC64)
  1265. #define TIF_ELF2ABI 18 /* function descriptors must die! */
  1266. #endif
  1267. +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling
  1268. + TIF_NEED_RESCHED */
  1269. /* as above, but as bit values */
  1270. #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
  1271. @@ -125,14 +128,16 @@ static inline struct thread_info *current_thread_info(void)
  1272. #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
  1273. #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
  1274. #define _TIF_NOHZ (1<<TIF_NOHZ)
  1275. +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
  1276. #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
  1277. _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
  1278. _TIF_NOHZ)
  1279. #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
  1280. _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  1281. - _TIF_RESTORE_TM)
  1282. + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY)
  1283. #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
  1284. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  1285. /* Bits in local_flags */
  1286. /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
  1287. diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
  1288. index c833d88c423d..96e9fbc3f684 100644
  1289. --- a/arch/powerpc/kernel/asm-offsets.c
  1290. +++ b/arch/powerpc/kernel/asm-offsets.c
  1291. @@ -156,6 +156,7 @@ int main(void)
  1292. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  1293. DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
  1294. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  1295. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  1296. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  1297. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  1298. diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
  1299. index 3841d749a430..6dbaeff192b9 100644
  1300. --- a/arch/powerpc/kernel/entry_32.S
  1301. +++ b/arch/powerpc/kernel/entry_32.S
  1302. @@ -835,7 +835,14 @@ resume_kernel:
  1303. cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
  1304. bne restore
  1305. andi. r8,r8,_TIF_NEED_RESCHED
  1306. + bne+ 1f
  1307. + lwz r0,TI_PREEMPT_LAZY(r9)
  1308. + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
  1309. + bne restore
  1310. + lwz r0,TI_FLAGS(r9)
  1311. + andi. r0,r0,_TIF_NEED_RESCHED_LAZY
  1312. beq+ restore
  1313. +1:
  1314. lwz r3,_MSR(r1)
  1315. andi. r0,r3,MSR_EE /* interrupts off? */
  1316. beq restore /* don't schedule if so */
  1317. @@ -846,11 +853,11 @@ resume_kernel:
  1318. */
  1319. bl trace_hardirqs_off
  1320. #endif
  1321. -1: bl preempt_schedule_irq
  1322. +2: bl preempt_schedule_irq
  1323. CURRENT_THREAD_INFO(r9, r1)
  1324. lwz r3,TI_FLAGS(r9)
  1325. - andi. r0,r3,_TIF_NEED_RESCHED
  1326. - bne- 1b
  1327. + andi. r0,r3,_TIF_NEED_RESCHED_MASK
  1328. + bne- 2b
  1329. #ifdef CONFIG_TRACE_IRQFLAGS
  1330. /* And now, to properly rebalance the above, we tell lockdep they
  1331. * are being turned back on, which will happen when we return
  1332. @@ -1171,7 +1178,7 @@ global_dbcr0:
  1333. #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
  1334. do_work: /* r10 contains MSR_KERNEL here */
  1335. - andi. r0,r9,_TIF_NEED_RESCHED
  1336. + andi. r0,r9,_TIF_NEED_RESCHED_MASK
  1337. beq do_user_signal
  1338. do_resched: /* r10 contains MSR_KERNEL here */
  1339. @@ -1192,7 +1199,7 @@ recheck:
  1340. MTMSRD(r10) /* disable interrupts */
  1341. CURRENT_THREAD_INFO(r9, r1)
  1342. lwz r9,TI_FLAGS(r9)
  1343. - andi. r0,r9,_TIF_NEED_RESCHED
  1344. + andi. r0,r9,_TIF_NEED_RESCHED_MASK
  1345. bne- do_resched
  1346. andi. r0,r9,_TIF_USER_WORK_MASK
  1347. beq restore_user
  1348. diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
  1349. index caa659671599..891080c4a41e 100644
  1350. --- a/arch/powerpc/kernel/entry_64.S
  1351. +++ b/arch/powerpc/kernel/entry_64.S
  1352. @@ -656,7 +656,7 @@ _GLOBAL(ret_from_except_lite)
  1353. bl restore_math
  1354. b restore
  1355. #endif
  1356. -1: andi. r0,r4,_TIF_NEED_RESCHED
  1357. +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK
  1358. beq 2f
  1359. bl restore_interrupts
  1360. SCHEDULE_USER
  1361. @@ -718,10 +718,18 @@ resume_kernel:
  1362. #ifdef CONFIG_PREEMPT
  1363. /* Check if we need to preempt */
  1364. + lwz r8,TI_PREEMPT(r9)
  1365. + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */
  1366. + bne restore
  1367. andi. r0,r4,_TIF_NEED_RESCHED
  1368. + bne+ check_count
  1369. +
  1370. + andi. r0,r4,_TIF_NEED_RESCHED_LAZY
  1371. beq+ restore
  1372. + lwz r8,TI_PREEMPT_LAZY(r9)
  1373. +
  1374. /* Check that preempt_count() == 0 and interrupts are enabled */
  1375. - lwz r8,TI_PREEMPT(r9)
  1376. +check_count:
  1377. cmpwi cr1,r8,0
  1378. ld r0,SOFTE(r1)
  1379. cmpdi r0,0
  1380. @@ -738,7 +746,7 @@ resume_kernel:
  1381. /* Re-test flags and eventually loop */
  1382. CURRENT_THREAD_INFO(r9, r1)
  1383. ld r4,TI_FLAGS(r9)
  1384. - andi. r0,r4,_TIF_NEED_RESCHED
  1385. + andi. r0,r4,_TIF_NEED_RESCHED_MASK
  1386. bne 1b
  1387. /*
  1388. diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
  1389. index 028a22bfa90c..a75e2dd3e71f 100644
  1390. --- a/arch/powerpc/kernel/irq.c
  1391. +++ b/arch/powerpc/kernel/irq.c
  1392. @@ -651,6 +651,7 @@ void irq_ctx_init(void)
  1393. }
  1394. }
  1395. +#ifndef CONFIG_PREEMPT_RT_FULL
  1396. void do_softirq_own_stack(void)
  1397. {
  1398. struct thread_info *curtp, *irqtp;
  1399. @@ -668,6 +669,7 @@ void do_softirq_own_stack(void)
  1400. if (irqtp->flags)
  1401. set_bits(irqtp->flags, &curtp->flags);
  1402. }
  1403. +#endif
  1404. irq_hw_number_t virq_to_hw(unsigned int virq)
  1405. {
  1406. diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
  1407. index 030d72df5dd5..b471a709e100 100644
  1408. --- a/arch/powerpc/kernel/misc_32.S
  1409. +++ b/arch/powerpc/kernel/misc_32.S
  1410. @@ -41,6 +41,7 @@
  1411. * We store the saved ksp_limit in the unused part
  1412. * of the STACK_FRAME_OVERHEAD
  1413. */
  1414. +#ifndef CONFIG_PREEMPT_RT_FULL
  1415. _GLOBAL(call_do_softirq)
  1416. mflr r0
  1417. stw r0,4(r1)
  1418. @@ -57,6 +58,7 @@ _GLOBAL(call_do_softirq)
  1419. stw r10,THREAD+KSP_LIMIT(r2)
  1420. mtlr r0
  1421. blr
  1422. +#endif
  1423. /*
  1424. * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
  1425. diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
  1426. index 4cefe6888b18..cb2ee4be999a 100644
  1427. --- a/arch/powerpc/kernel/misc_64.S
  1428. +++ b/arch/powerpc/kernel/misc_64.S
  1429. @@ -31,6 +31,7 @@
  1430. .text
  1431. +#ifndef CONFIG_PREEMPT_RT_FULL
  1432. _GLOBAL(call_do_softirq)
  1433. mflr r0
  1434. std r0,16(r1)
  1435. @@ -41,6 +42,7 @@ _GLOBAL(call_do_softirq)
  1436. ld r0,16(r1)
  1437. mtlr r0
  1438. blr
  1439. +#endif
  1440. _GLOBAL(call_do_irq)
  1441. mflr r0
  1442. diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
  1443. index 029be26b5a17..9528089ea142 100644
  1444. --- a/arch/powerpc/kvm/Kconfig
  1445. +++ b/arch/powerpc/kvm/Kconfig
  1446. @@ -175,6 +175,7 @@ config KVM_E500MC
  1447. config KVM_MPIC
  1448. bool "KVM in-kernel MPIC emulation"
  1449. depends on KVM && E500
  1450. + depends on !PREEMPT_RT_FULL
  1451. select HAVE_KVM_IRQCHIP
  1452. select HAVE_KVM_IRQFD
  1453. select HAVE_KVM_IRQ_ROUTING
  1454. diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
  1455. index e48462447ff0..2670cee66064 100644
  1456. --- a/arch/powerpc/platforms/ps3/device-init.c
  1457. +++ b/arch/powerpc/platforms/ps3/device-init.c
  1458. @@ -752,7 +752,7 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev,
  1459. }
  1460. pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op);
  1461. - res = wait_event_interruptible(dev->done.wait,
  1462. + res = swait_event_interruptible(dev->done.wait,
  1463. dev->done.done || kthread_should_stop());
  1464. if (kthread_should_stop())
  1465. res = -EINTR;
  1466. diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
  1467. index 6c0378c0b8b5..abd58b4dff97 100644
  1468. --- a/arch/sh/kernel/irq.c
  1469. +++ b/arch/sh/kernel/irq.c
  1470. @@ -147,6 +147,7 @@ void irq_ctx_exit(int cpu)
  1471. hardirq_ctx[cpu] = NULL;
  1472. }
  1473. +#ifndef CONFIG_PREEMPT_RT_FULL
  1474. void do_softirq_own_stack(void)
  1475. {
  1476. struct thread_info *curctx;
  1477. @@ -174,6 +175,7 @@ void do_softirq_own_stack(void)
  1478. "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
  1479. );
  1480. }
  1481. +#endif
  1482. #else
  1483. static inline void handle_one_irq(unsigned int irq)
  1484. {
  1485. diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
  1486. index 8b4152f3a764..c5cca159692a 100644
  1487. --- a/arch/sparc/Kconfig
  1488. +++ b/arch/sparc/Kconfig
  1489. @@ -194,12 +194,10 @@ config NR_CPUS
  1490. source kernel/Kconfig.hz
  1491. config RWSEM_GENERIC_SPINLOCK
  1492. - bool
  1493. - default y if SPARC32
  1494. + def_bool PREEMPT_RT_FULL
  1495. config RWSEM_XCHGADD_ALGORITHM
  1496. - bool
  1497. - default y if SPARC64
  1498. + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
  1499. config GENERIC_HWEIGHT
  1500. bool
  1501. diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
  1502. index 5cbf03c14981..6067d9379e5b 100644
  1503. --- a/arch/sparc/kernel/irq_64.c
  1504. +++ b/arch/sparc/kernel/irq_64.c
  1505. @@ -854,6 +854,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs)
  1506. set_irq_regs(old_regs);
  1507. }
  1508. +#ifndef CONFIG_PREEMPT_RT_FULL
  1509. void do_softirq_own_stack(void)
  1510. {
  1511. void *orig_sp, *sp = softirq_stack[smp_processor_id()];
  1512. @@ -868,6 +869,7 @@ void do_softirq_own_stack(void)
  1513. __asm__ __volatile__("mov %0, %%sp"
  1514. : : "r" (orig_sp));
  1515. }
  1516. +#endif
  1517. #ifdef CONFIG_HOTPLUG_CPU
  1518. void fixup_irqs(void)
  1519. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
  1520. index da8156fd3d58..d8cd3bc807fc 100644
  1521. --- a/arch/x86/Kconfig
  1522. +++ b/arch/x86/Kconfig
  1523. @@ -17,6 +17,7 @@ config X86_64
  1524. ### Arch settings
  1525. config X86
  1526. def_bool y
  1527. + select HAVE_PREEMPT_LAZY
  1528. select ACPI_LEGACY_TABLES_LOOKUP if ACPI
  1529. select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
  1530. select ANON_INODES
  1531. @@ -232,8 +233,11 @@ config ARCH_MAY_HAVE_PC_FDC
  1532. def_bool y
  1533. depends on ISA_DMA_API
  1534. +config RWSEM_GENERIC_SPINLOCK
  1535. + def_bool PREEMPT_RT_FULL
  1536. +
  1537. config RWSEM_XCHGADD_ALGORITHM
  1538. - def_bool y
  1539. + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
  1540. config GENERIC_CALIBRATE_DELAY
  1541. def_bool y
  1542. @@ -897,7 +901,7 @@ config IOMMU_HELPER
  1543. config MAXSMP
  1544. bool "Enable Maximum number of SMP Processors and NUMA Nodes"
  1545. depends on X86_64 && SMP && DEBUG_KERNEL
  1546. - select CPUMASK_OFFSTACK
  1547. + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
  1548. ---help---
  1549. Enable maximum number of CPUS and NUMA Nodes for this architecture.
  1550. If unsure, say N.
  1551. diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
  1552. index aa8b0672f87a..2429414bfc71 100644
  1553. --- a/arch/x86/crypto/aesni-intel_glue.c
  1554. +++ b/arch/x86/crypto/aesni-intel_glue.c
  1555. @@ -372,14 +372,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc,
  1556. err = blkcipher_walk_virt(desc, &walk);
  1557. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1558. - kernel_fpu_begin();
  1559. while ((nbytes = walk.nbytes)) {
  1560. + kernel_fpu_begin();
  1561. aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1562. - nbytes & AES_BLOCK_MASK);
  1563. + nbytes & AES_BLOCK_MASK);
  1564. + kernel_fpu_end();
  1565. nbytes &= AES_BLOCK_SIZE - 1;
  1566. err = blkcipher_walk_done(desc, &walk, nbytes);
  1567. }
  1568. - kernel_fpu_end();
  1569. return err;
  1570. }
  1571. @@ -396,14 +396,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc,
  1572. err = blkcipher_walk_virt(desc, &walk);
  1573. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1574. - kernel_fpu_begin();
  1575. while ((nbytes = walk.nbytes)) {
  1576. + kernel_fpu_begin();
  1577. aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1578. nbytes & AES_BLOCK_MASK);
  1579. + kernel_fpu_end();
  1580. nbytes &= AES_BLOCK_SIZE - 1;
  1581. err = blkcipher_walk_done(desc, &walk, nbytes);
  1582. }
  1583. - kernel_fpu_end();
  1584. return err;
  1585. }
  1586. @@ -420,14 +420,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc,
  1587. err = blkcipher_walk_virt(desc, &walk);
  1588. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1589. - kernel_fpu_begin();
  1590. while ((nbytes = walk.nbytes)) {
  1591. + kernel_fpu_begin();
  1592. aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1593. nbytes & AES_BLOCK_MASK, walk.iv);
  1594. + kernel_fpu_end();
  1595. nbytes &= AES_BLOCK_SIZE - 1;
  1596. err = blkcipher_walk_done(desc, &walk, nbytes);
  1597. }
  1598. - kernel_fpu_end();
  1599. return err;
  1600. }
  1601. @@ -444,14 +444,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc,
  1602. err = blkcipher_walk_virt(desc, &walk);
  1603. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1604. - kernel_fpu_begin();
  1605. while ((nbytes = walk.nbytes)) {
  1606. + kernel_fpu_begin();
  1607. aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1608. nbytes & AES_BLOCK_MASK, walk.iv);
  1609. + kernel_fpu_end();
  1610. nbytes &= AES_BLOCK_SIZE - 1;
  1611. err = blkcipher_walk_done(desc, &walk, nbytes);
  1612. }
  1613. - kernel_fpu_end();
  1614. return err;
  1615. }
  1616. @@ -503,18 +503,20 @@ static int ctr_crypt(struct blkcipher_desc *desc,
  1617. err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
  1618. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1619. - kernel_fpu_begin();
  1620. while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
  1621. + kernel_fpu_begin();
  1622. aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1623. nbytes & AES_BLOCK_MASK, walk.iv);
  1624. + kernel_fpu_end();
  1625. nbytes &= AES_BLOCK_SIZE - 1;
  1626. err = blkcipher_walk_done(desc, &walk, nbytes);
  1627. }
  1628. if (walk.nbytes) {
  1629. + kernel_fpu_begin();
  1630. ctr_crypt_final(ctx, &walk);
  1631. + kernel_fpu_end();
  1632. err = blkcipher_walk_done(desc, &walk, 0);
  1633. }
  1634. - kernel_fpu_end();
  1635. return err;
  1636. }
  1637. diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
  1638. index 8648158f3916..d7699130ee36 100644
  1639. --- a/arch/x86/crypto/cast5_avx_glue.c
  1640. +++ b/arch/x86/crypto/cast5_avx_glue.c
  1641. @@ -59,7 +59,7 @@ static inline void cast5_fpu_end(bool fpu_enabled)
  1642. static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
  1643. bool enc)
  1644. {
  1645. - bool fpu_enabled = false;
  1646. + bool fpu_enabled;
  1647. struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  1648. const unsigned int bsize = CAST5_BLOCK_SIZE;
  1649. unsigned int nbytes;
  1650. @@ -75,7 +75,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
  1651. u8 *wsrc = walk->src.virt.addr;
  1652. u8 *wdst = walk->dst.virt.addr;
  1653. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1654. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1655. /* Process multi-block batch */
  1656. if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
  1657. @@ -103,10 +103,9 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
  1658. } while (nbytes >= bsize);
  1659. done:
  1660. + cast5_fpu_end(fpu_enabled);
  1661. err = blkcipher_walk_done(desc, walk, nbytes);
  1662. }
  1663. -
  1664. - cast5_fpu_end(fpu_enabled);
  1665. return err;
  1666. }
  1667. @@ -227,7 +226,7 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
  1668. static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  1669. struct scatterlist *src, unsigned int nbytes)
  1670. {
  1671. - bool fpu_enabled = false;
  1672. + bool fpu_enabled;
  1673. struct blkcipher_walk walk;
  1674. int err;
  1675. @@ -236,12 +235,11 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  1676. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1677. while ((nbytes = walk.nbytes)) {
  1678. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1679. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1680. nbytes = __cbc_decrypt(desc, &walk);
  1681. + cast5_fpu_end(fpu_enabled);
  1682. err = blkcipher_walk_done(desc, &walk, nbytes);
  1683. }
  1684. -
  1685. - cast5_fpu_end(fpu_enabled);
  1686. return err;
  1687. }
  1688. @@ -311,7 +309,7 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
  1689. static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  1690. struct scatterlist *src, unsigned int nbytes)
  1691. {
  1692. - bool fpu_enabled = false;
  1693. + bool fpu_enabled;
  1694. struct blkcipher_walk walk;
  1695. int err;
  1696. @@ -320,13 +318,12 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  1697. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1698. while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
  1699. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1700. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1701. nbytes = __ctr_crypt(desc, &walk);
  1702. + cast5_fpu_end(fpu_enabled);
  1703. err = blkcipher_walk_done(desc, &walk, nbytes);
  1704. }
  1705. - cast5_fpu_end(fpu_enabled);
  1706. -
  1707. if (walk.nbytes) {
  1708. ctr_crypt_final(desc, &walk);
  1709. err = blkcipher_walk_done(desc, &walk, 0);
  1710. diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
  1711. index 6a85598931b5..3a506ce7ed93 100644
  1712. --- a/arch/x86/crypto/glue_helper.c
  1713. +++ b/arch/x86/crypto/glue_helper.c
  1714. @@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
  1715. void *ctx = crypto_blkcipher_ctx(desc->tfm);
  1716. const unsigned int bsize = 128 / 8;
  1717. unsigned int nbytes, i, func_bytes;
  1718. - bool fpu_enabled = false;
  1719. + bool fpu_enabled;
  1720. int err;
  1721. err = blkcipher_walk_virt(desc, walk);
  1722. @@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
  1723. u8 *wdst = walk->dst.virt.addr;
  1724. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1725. - desc, fpu_enabled, nbytes);
  1726. + desc, false, nbytes);
  1727. for (i = 0; i < gctx->num_funcs; i++) {
  1728. func_bytes = bsize * gctx->funcs[i].num_blocks;
  1729. @@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
  1730. }
  1731. done:
  1732. + glue_fpu_end(fpu_enabled);
  1733. err = blkcipher_walk_done(desc, walk, nbytes);
  1734. }
  1735. - glue_fpu_end(fpu_enabled);
  1736. return err;
  1737. }
  1738. @@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
  1739. struct scatterlist *src, unsigned int nbytes)
  1740. {
  1741. const unsigned int bsize = 128 / 8;
  1742. - bool fpu_enabled = false;
  1743. + bool fpu_enabled;
  1744. struct blkcipher_walk walk;
  1745. int err;
  1746. @@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
  1747. while ((nbytes = walk.nbytes)) {
  1748. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1749. - desc, fpu_enabled, nbytes);
  1750. + desc, false, nbytes);
  1751. nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
  1752. + glue_fpu_end(fpu_enabled);
  1753. err = blkcipher_walk_done(desc, &walk, nbytes);
  1754. }
  1755. - glue_fpu_end(fpu_enabled);
  1756. return err;
  1757. }
  1758. EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
  1759. @@ -277,7 +277,7 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
  1760. struct scatterlist *src, unsigned int nbytes)
  1761. {
  1762. const unsigned int bsize = 128 / 8;
  1763. - bool fpu_enabled = false;
  1764. + bool fpu_enabled;
  1765. struct blkcipher_walk walk;
  1766. int err;
  1767. @@ -286,13 +286,12 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
  1768. while ((nbytes = walk.nbytes) >= bsize) {
  1769. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1770. - desc, fpu_enabled, nbytes);
  1771. + desc, false, nbytes);
  1772. nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
  1773. + glue_fpu_end(fpu_enabled);
  1774. err = blkcipher_walk_done(desc, &walk, nbytes);
  1775. }
  1776. - glue_fpu_end(fpu_enabled);
  1777. -
  1778. if (walk.nbytes) {
  1779. glue_ctr_crypt_final_128bit(
  1780. gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
  1781. @@ -347,7 +346,7 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
  1782. void *tweak_ctx, void *crypt_ctx)
  1783. {
  1784. const unsigned int bsize = 128 / 8;
  1785. - bool fpu_enabled = false;
  1786. + bool fpu_enabled;
  1787. struct blkcipher_walk walk;
  1788. int err;
  1789. @@ -360,21 +359,21 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
  1790. /* set minimum length to bsize, for tweak_fn */
  1791. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1792. - desc, fpu_enabled,
  1793. + desc, false,
  1794. nbytes < bsize ? bsize : nbytes);
  1795. -
  1796. /* calculate first value of T */
  1797. tweak_fn(tweak_ctx, walk.iv, walk.iv);
  1798. + glue_fpu_end(fpu_enabled);
  1799. while (nbytes) {
  1800. + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1801. + desc, false, nbytes);
  1802. nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
  1803. + glue_fpu_end(fpu_enabled);
  1804. err = blkcipher_walk_done(desc, &walk, nbytes);
  1805. nbytes = walk.nbytes;
  1806. }
  1807. -
  1808. - glue_fpu_end(fpu_enabled);
  1809. -
  1810. return err;
  1811. }
  1812. EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
  1813. diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
  1814. index bdd9cc59d20f..56d01a339ba4 100644
  1815. --- a/arch/x86/entry/common.c
  1816. +++ b/arch/x86/entry/common.c
  1817. @@ -129,7 +129,7 @@ static long syscall_trace_enter(struct pt_regs *regs)
  1818. #define EXIT_TO_USERMODE_LOOP_FLAGS \
  1819. (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  1820. - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY)
  1821. + _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY)
  1822. static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
  1823. {
  1824. @@ -145,9 +145,16 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
  1825. /* We have work to do. */
  1826. local_irq_enable();
  1827. - if (cached_flags & _TIF_NEED_RESCHED)
  1828. + if (cached_flags & _TIF_NEED_RESCHED_MASK)
  1829. schedule();
  1830. +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
  1831. + if (unlikely(current->forced_info.si_signo)) {
  1832. + struct task_struct *t = current;
  1833. + force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
  1834. + t->forced_info.si_signo = 0;
  1835. + }
  1836. +#endif
  1837. if (cached_flags & _TIF_UPROBE)
  1838. uprobe_notify_resume(regs);
  1839. diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
  1840. index edba8606b99a..4a3389535fc6 100644
  1841. --- a/arch/x86/entry/entry_32.S
  1842. +++ b/arch/x86/entry/entry_32.S
  1843. @@ -308,8 +308,25 @@ END(ret_from_exception)
  1844. ENTRY(resume_kernel)
  1845. DISABLE_INTERRUPTS(CLBR_ANY)
  1846. need_resched:
  1847. + # preempt count == 0 + NEED_RS set?
  1848. cmpl $0, PER_CPU_VAR(__preempt_count)
  1849. +#ifndef CONFIG_PREEMPT_LAZY
  1850. jnz restore_all
  1851. +#else
  1852. + jz test_int_off
  1853. +
  1854. + # atleast preempt count == 0 ?
  1855. + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
  1856. + jne restore_all
  1857. +
  1858. + movl PER_CPU_VAR(current_task), %ebp
  1859. + cmpl $0,TASK_TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ?
  1860. + jnz restore_all
  1861. +
  1862. + testl $_TIF_NEED_RESCHED_LAZY, TASK_TI_flags(%ebp)
  1863. + jz restore_all
  1864. +test_int_off:
  1865. +#endif
  1866. testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
  1867. jz restore_all
  1868. call preempt_schedule_irq
  1869. diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
  1870. index af4e58132d91..22803e2f7495 100644
  1871. --- a/arch/x86/entry/entry_64.S
  1872. +++ b/arch/x86/entry/entry_64.S
  1873. @@ -575,7 +575,23 @@ retint_kernel:
  1874. bt $9, EFLAGS(%rsp) /* were interrupts off? */
  1875. jnc 1f
  1876. 0: cmpl $0, PER_CPU_VAR(__preempt_count)
  1877. +#ifndef CONFIG_PREEMPT_LAZY
  1878. jnz 1f
  1879. +#else
  1880. + jz do_preempt_schedule_irq
  1881. +
  1882. + # atleast preempt count == 0 ?
  1883. + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
  1884. + jnz 1f
  1885. +
  1886. + movq PER_CPU_VAR(current_task), %rcx
  1887. + cmpl $0, TASK_TI_preempt_lazy_count(%rcx)
  1888. + jnz 1f
  1889. +
  1890. + bt $TIF_NEED_RESCHED_LAZY,TASK_TI_flags(%rcx)
  1891. + jnc 1f
  1892. +do_preempt_schedule_irq:
  1893. +#endif
  1894. call preempt_schedule_irq
  1895. jmp 0b
  1896. 1:
  1897. @@ -925,6 +941,7 @@ bad_gs:
  1898. jmp 2b
  1899. .previous
  1900. +#ifndef CONFIG_PREEMPT_RT_FULL
  1901. /* Call softirq on interrupt stack. Interrupts are off. */
  1902. ENTRY(do_softirq_own_stack)
  1903. pushq %rbp
  1904. @@ -937,6 +954,7 @@ ENTRY(do_softirq_own_stack)
  1905. decl PER_CPU_VAR(irq_count)
  1906. ret
  1907. END(do_softirq_own_stack)
  1908. +#endif
  1909. #ifdef CONFIG_XEN
  1910. idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
  1911. diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
  1912. index 17f218645701..11bd1b7ee6eb 100644
  1913. --- a/arch/x86/include/asm/preempt.h
  1914. +++ b/arch/x86/include/asm/preempt.h
  1915. @@ -79,17 +79,46 @@ static __always_inline void __preempt_count_sub(int val)
  1916. * a decrement which hits zero means we have no preempt_count and should
  1917. * reschedule.
  1918. */
  1919. -static __always_inline bool __preempt_count_dec_and_test(void)
  1920. +static __always_inline bool ____preempt_count_dec_and_test(void)
  1921. {
  1922. GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e);
  1923. }
  1924. +static __always_inline bool __preempt_count_dec_and_test(void)
  1925. +{
  1926. + if (____preempt_count_dec_and_test())
  1927. + return true;
  1928. +#ifdef CONFIG_PREEMPT_LAZY
  1929. + if (current_thread_info()->preempt_lazy_count)
  1930. + return false;
  1931. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  1932. +#else
  1933. + return false;
  1934. +#endif
  1935. +}
  1936. +
  1937. /*
  1938. * Returns true when we need to resched and can (barring IRQ state).
  1939. */
  1940. static __always_inline bool should_resched(int preempt_offset)
  1941. {
  1942. +#ifdef CONFIG_PREEMPT_LAZY
  1943. + u32 tmp;
  1944. +
  1945. + tmp = raw_cpu_read_4(__preempt_count);
  1946. + if (tmp == preempt_offset)
  1947. + return true;
  1948. +
  1949. + /* preempt count == 0 ? */
  1950. + tmp &= ~PREEMPT_NEED_RESCHED;
  1951. + if (tmp)
  1952. + return false;
  1953. + if (current_thread_info()->preempt_lazy_count)
  1954. + return false;
  1955. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  1956. +#else
  1957. return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
  1958. +#endif
  1959. }
  1960. #ifdef CONFIG_PREEMPT
  1961. diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
  1962. index 8af22be0fe61..d1328789b759 100644
  1963. --- a/arch/x86/include/asm/signal.h
  1964. +++ b/arch/x86/include/asm/signal.h
  1965. @@ -27,6 +27,19 @@ typedef struct {
  1966. #define SA_IA32_ABI 0x02000000u
  1967. #define SA_X32_ABI 0x01000000u
  1968. +/*
  1969. + * Because some traps use the IST stack, we must keep preemption
  1970. + * disabled while calling do_trap(), but do_trap() may call
  1971. + * force_sig_info() which will grab the signal spin_locks for the
  1972. + * task, which in PREEMPT_RT_FULL are mutexes. By defining
  1973. + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
  1974. + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
  1975. + * trap.
  1976. + */
  1977. +#if defined(CONFIG_PREEMPT_RT_FULL)
  1978. +#define ARCH_RT_DELAYS_SIGNAL_SEND
  1979. +#endif
  1980. +
  1981. #ifndef CONFIG_COMPAT
  1982. typedef sigset_t compat_sigset_t;
  1983. #endif
  1984. diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
  1985. index 58505f01962f..02fa39652cd6 100644
  1986. --- a/arch/x86/include/asm/stackprotector.h
  1987. +++ b/arch/x86/include/asm/stackprotector.h
  1988. @@ -59,7 +59,7 @@
  1989. */
  1990. static __always_inline void boot_init_stack_canary(void)
  1991. {
  1992. - u64 canary;
  1993. + u64 uninitialized_var(canary);
  1994. u64 tsc;
  1995. #ifdef CONFIG_X86_64
  1996. @@ -70,8 +70,15 @@ static __always_inline void boot_init_stack_canary(void)
  1997. * of randomness. The TSC only matters for very early init,
  1998. * there it already has some randomness on most systems. Later
  1999. * on during the bootup the random pool has true entropy too.
  2000. + *
  2001. + * For preempt-rt we need to weaken the randomness a bit, as
  2002. + * we can't call into the random generator from atomic context
  2003. + * due to locking constraints. We just leave canary
  2004. + * uninitialized and use the TSC based randomness on top of it.
  2005. */
  2006. +#ifndef CONFIG_PREEMPT_RT_FULL
  2007. get_random_bytes(&canary, sizeof(canary));
  2008. +#endif
  2009. tsc = rdtsc();
  2010. canary += tsc + (tsc << 32UL);
  2011. diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
  2012. index ad6f5eb07a95..5ceb3a1c2b1a 100644
  2013. --- a/arch/x86/include/asm/thread_info.h
  2014. +++ b/arch/x86/include/asm/thread_info.h
  2015. @@ -54,11 +54,14 @@ struct task_struct;
  2016. struct thread_info {
  2017. unsigned long flags; /* low level flags */
  2018. + int preempt_lazy_count; /* 0 => lazy preemptable
  2019. + <0 => BUG */
  2020. };
  2021. #define INIT_THREAD_INFO(tsk) \
  2022. { \
  2023. .flags = 0, \
  2024. + .preempt_lazy_count = 0, \
  2025. }
  2026. #define init_stack (init_thread_union.stack)
  2027. @@ -67,6 +70,10 @@ struct thread_info {
  2028. #include <asm/asm-offsets.h>
  2029. +#define GET_THREAD_INFO(reg) \
  2030. + _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
  2031. + _ASM_SUB $(THREAD_SIZE),reg ;
  2032. +
  2033. #endif
  2034. /*
  2035. @@ -85,6 +92,7 @@ struct thread_info {
  2036. #define TIF_SYSCALL_EMU 6 /* syscall emulation active */
  2037. #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
  2038. #define TIF_SECCOMP 8 /* secure computing */
  2039. +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
  2040. #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
  2041. #define TIF_UPROBE 12 /* breakpointed or singlestepping */
  2042. #define TIF_NOTSC 16 /* TSC is not accessible in userland */
  2043. @@ -108,6 +116,7 @@ struct thread_info {
  2044. #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
  2045. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  2046. #define _TIF_SECCOMP (1 << TIF_SECCOMP)
  2047. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  2048. #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
  2049. #define _TIF_UPROBE (1 << TIF_UPROBE)
  2050. #define _TIF_NOTSC (1 << TIF_NOTSC)
  2051. @@ -143,6 +152,8 @@ struct thread_info {
  2052. #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
  2053. #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
  2054. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  2055. +
  2056. #define STACK_WARN (THREAD_SIZE/8)
  2057. /*
  2058. diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
  2059. index 57ab86d94d64..35d25e27180f 100644
  2060. --- a/arch/x86/include/asm/uv/uv_bau.h
  2061. +++ b/arch/x86/include/asm/uv/uv_bau.h
  2062. @@ -624,9 +624,9 @@ struct bau_control {
  2063. cycles_t send_message;
  2064. cycles_t period_end;
  2065. cycles_t period_time;
  2066. - spinlock_t uvhub_lock;
  2067. - spinlock_t queue_lock;
  2068. - spinlock_t disable_lock;
  2069. + raw_spinlock_t uvhub_lock;
  2070. + raw_spinlock_t queue_lock;
  2071. + raw_spinlock_t disable_lock;
  2072. /* tunables */
  2073. int max_concurr;
  2074. int max_concurr_const;
  2075. @@ -815,15 +815,15 @@ static inline int atom_asr(short i, struct atomic_short *v)
  2076. * to be lowered below the current 'v'. atomic_add_unless can only stop
  2077. * on equal.
  2078. */
  2079. -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
  2080. +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u)
  2081. {
  2082. - spin_lock(lock);
  2083. + raw_spin_lock(lock);
  2084. if (atomic_read(v) >= u) {
  2085. - spin_unlock(lock);
  2086. + raw_spin_unlock(lock);
  2087. return 0;
  2088. }
  2089. atomic_inc(v);
  2090. - spin_unlock(lock);
  2091. + raw_spin_unlock(lock);
  2092. return 1;
  2093. }
  2094. diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
  2095. index 11cc600f4df0..8cbfc51ce339 100644
  2096. --- a/arch/x86/kernel/acpi/boot.c
  2097. +++ b/arch/x86/kernel/acpi/boot.c
  2098. @@ -87,7 +87,9 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
  2099. * ->ioapic_mutex
  2100. * ->ioapic_lock
  2101. */
  2102. +#ifdef CONFIG_X86_IO_APIC
  2103. static DEFINE_MUTEX(acpi_ioapic_lock);
  2104. +#endif
  2105. /* --------------------------------------------------------------------------
  2106. Boot-time Configuration
  2107. diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
  2108. index cf89928dbd46..18b5ec2a71df 100644
  2109. --- a/arch/x86/kernel/apic/io_apic.c
  2110. +++ b/arch/x86/kernel/apic/io_apic.c
  2111. @@ -1712,7 +1712,8 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data)
  2112. static inline bool ioapic_irqd_mask(struct irq_data *data)
  2113. {
  2114. /* If we are moving the irq we need to mask it */
  2115. - if (unlikely(irqd_is_setaffinity_pending(data))) {
  2116. + if (unlikely(irqd_is_setaffinity_pending(data) &&
  2117. + !irqd_irq_inprogress(data))) {
  2118. mask_ioapic_irq(data);
  2119. return true;
  2120. }
  2121. diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
  2122. index c62e015b126c..0cc71257fca6 100644
  2123. --- a/arch/x86/kernel/asm-offsets.c
  2124. +++ b/arch/x86/kernel/asm-offsets.c
  2125. @@ -36,6 +36,7 @@ void common(void) {
  2126. BLANK();
  2127. OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
  2128. + OFFSET(TASK_TI_preempt_lazy_count, task_struct, thread_info.preempt_lazy_count);
  2129. OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
  2130. BLANK();
  2131. @@ -91,4 +92,5 @@ void common(void) {
  2132. BLANK();
  2133. DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
  2134. + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED);
  2135. }
  2136. diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
  2137. index 8ca5f8ad008e..edcbd18b3189 100644
  2138. --- a/arch/x86/kernel/cpu/mcheck/mce.c
  2139. +++ b/arch/x86/kernel/cpu/mcheck/mce.c
  2140. @@ -41,6 +41,8 @@
  2141. #include <linux/debugfs.h>
  2142. #include <linux/irq_work.h>
  2143. #include <linux/export.h>
  2144. +#include <linux/jiffies.h>
  2145. +#include <linux/swork.h>
  2146. #include <linux/jump_label.h>
  2147. #include <asm/processor.h>
  2148. @@ -1306,7 +1308,7 @@ void mce_log_therm_throt_event(__u64 status)
  2149. static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
  2150. static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
  2151. -static DEFINE_PER_CPU(struct timer_list, mce_timer);
  2152. +static DEFINE_PER_CPU(struct hrtimer, mce_timer);
  2153. static unsigned long mce_adjust_timer_default(unsigned long interval)
  2154. {
  2155. @@ -1315,32 +1317,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
  2156. static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
  2157. -static void __restart_timer(struct timer_list *t, unsigned long interval)
  2158. +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval)
  2159. {
  2160. - unsigned long when = jiffies + interval;
  2161. - unsigned long flags;
  2162. -
  2163. - local_irq_save(flags);
  2164. -
  2165. - if (timer_pending(t)) {
  2166. - if (time_before(when, t->expires))
  2167. - mod_timer(t, when);
  2168. - } else {
  2169. - t->expires = round_jiffies(when);
  2170. - add_timer_on(t, smp_processor_id());
  2171. - }
  2172. -
  2173. - local_irq_restore(flags);
  2174. + if (!interval)
  2175. + return HRTIMER_NORESTART;
  2176. + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval)));
  2177. + return HRTIMER_RESTART;
  2178. }
  2179. -static void mce_timer_fn(unsigned long data)
  2180. +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
  2181. {
  2182. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  2183. - int cpu = smp_processor_id();
  2184. unsigned long iv;
  2185. - WARN_ON(cpu != data);
  2186. -
  2187. iv = __this_cpu_read(mce_next_interval);
  2188. if (mce_available(this_cpu_ptr(&cpu_info))) {
  2189. @@ -1363,7 +1351,7 @@ static void mce_timer_fn(unsigned long data)
  2190. done:
  2191. __this_cpu_write(mce_next_interval, iv);
  2192. - __restart_timer(t, iv);
  2193. + return __restart_timer(timer, iv);
  2194. }
  2195. /*
  2196. @@ -1371,7 +1359,7 @@ static void mce_timer_fn(unsigned long data)
  2197. */
  2198. void mce_timer_kick(unsigned long interval)
  2199. {
  2200. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  2201. + struct hrtimer *t = this_cpu_ptr(&mce_timer);
  2202. unsigned long iv = __this_cpu_read(mce_next_interval);
  2203. __restart_timer(t, interval);
  2204. @@ -1386,7 +1374,7 @@ static void mce_timer_delete_all(void)
  2205. int cpu;
  2206. for_each_online_cpu(cpu)
  2207. - del_timer_sync(&per_cpu(mce_timer, cpu));
  2208. + hrtimer_cancel(&per_cpu(mce_timer, cpu));
  2209. }
  2210. static void mce_do_trigger(struct work_struct *work)
  2211. @@ -1396,6 +1384,56 @@ static void mce_do_trigger(struct work_struct *work)
  2212. static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
  2213. +static void __mce_notify_work(struct swork_event *event)
  2214. +{
  2215. + /* Not more than two messages every minute */
  2216. + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
  2217. +
  2218. + /* wake processes polling /dev/mcelog */
  2219. + wake_up_interruptible(&mce_chrdev_wait);
  2220. +
  2221. + /*
  2222. + * There is no risk of missing notifications because
  2223. + * work_pending is always cleared before the function is
  2224. + * executed.
  2225. + */
  2226. + if (mce_helper[0] && !work_pending(&mce_trigger_work))
  2227. + schedule_work(&mce_trigger_work);
  2228. +
  2229. + if (__ratelimit(&ratelimit))
  2230. + pr_info(HW_ERR "Machine check events logged\n");
  2231. +}
  2232. +
  2233. +#ifdef CONFIG_PREEMPT_RT_FULL
  2234. +static bool notify_work_ready __read_mostly;
  2235. +static struct swork_event notify_work;
  2236. +
  2237. +static int mce_notify_work_init(void)
  2238. +{
  2239. + int err;
  2240. +
  2241. + err = swork_get();
  2242. + if (err)
  2243. + return err;
  2244. +
  2245. + INIT_SWORK(&notify_work, __mce_notify_work);
  2246. + notify_work_ready = true;
  2247. + return 0;
  2248. +}
  2249. +
  2250. +static void mce_notify_work(void)
  2251. +{
  2252. + if (notify_work_ready)
  2253. + swork_queue(&notify_work);
  2254. +}
  2255. +#else
  2256. +static void mce_notify_work(void)
  2257. +{
  2258. + __mce_notify_work(NULL);
  2259. +}
  2260. +static inline int mce_notify_work_init(void) { return 0; }
  2261. +#endif
  2262. +
  2263. /*
  2264. * Notify the user(s) about new machine check events.
  2265. * Can be called from interrupt context, but not from machine check/NMI
  2266. @@ -1403,19 +1441,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
  2267. */
  2268. int mce_notify_irq(void)
  2269. {
  2270. - /* Not more than two messages every minute */
  2271. - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
  2272. -
  2273. if (test_and_clear_bit(0, &mce_need_notify)) {
  2274. - /* wake processes polling /dev/mcelog */
  2275. - wake_up_interruptible(&mce_chrdev_wait);
  2276. -
  2277. - if (mce_helper[0])
  2278. - schedule_work(&mce_trigger_work);
  2279. -
  2280. - if (__ratelimit(&ratelimit))
  2281. - pr_info(HW_ERR "Machine check events logged\n");
  2282. -
  2283. + mce_notify_work();
  2284. return 1;
  2285. }
  2286. return 0;
  2287. @@ -1721,7 +1748,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
  2288. }
  2289. }
  2290. -static void mce_start_timer(unsigned int cpu, struct timer_list *t)
  2291. +static void mce_start_timer(unsigned int cpu, struct hrtimer *t)
  2292. {
  2293. unsigned long iv = check_interval * HZ;
  2294. @@ -1730,16 +1757,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
  2295. per_cpu(mce_next_interval, cpu) = iv;
  2296. - t->expires = round_jiffies(jiffies + iv);
  2297. - add_timer_on(t, cpu);
  2298. + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL),
  2299. + 0, HRTIMER_MODE_REL_PINNED);
  2300. }
  2301. static void __mcheck_cpu_init_timer(void)
  2302. {
  2303. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  2304. + struct hrtimer *t = this_cpu_ptr(&mce_timer);
  2305. unsigned int cpu = smp_processor_id();
  2306. - setup_pinned_timer(t, mce_timer_fn, cpu);
  2307. + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  2308. + t->function = mce_timer_fn;
  2309. mce_start_timer(cpu, t);
  2310. }
  2311. @@ -2464,6 +2492,8 @@ static void mce_disable_cpu(void *h)
  2312. if (!mce_available(raw_cpu_ptr(&cpu_info)))
  2313. return;
  2314. + hrtimer_cancel(this_cpu_ptr(&mce_timer));
  2315. +
  2316. if (!(action & CPU_TASKS_FROZEN))
  2317. cmci_clear();
  2318. @@ -2486,6 +2516,7 @@ static void mce_reenable_cpu(void *h)
  2319. if (b->init)
  2320. wrmsrl(msr_ops.ctl(i), b->ctl);
  2321. }
  2322. + __mcheck_cpu_init_timer();
  2323. }
  2324. /* Get notified when a cpu comes on/off. Be hotplug friendly. */
  2325. @@ -2493,7 +2524,6 @@ static int
  2326. mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
  2327. {
  2328. unsigned int cpu = (unsigned long)hcpu;
  2329. - struct timer_list *t = &per_cpu(mce_timer, cpu);
  2330. switch (action & ~CPU_TASKS_FROZEN) {
  2331. case CPU_ONLINE:
  2332. @@ -2513,11 +2543,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
  2333. break;
  2334. case CPU_DOWN_PREPARE:
  2335. smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
  2336. - del_timer_sync(t);
  2337. break;
  2338. case CPU_DOWN_FAILED:
  2339. smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
  2340. - mce_start_timer(cpu, t);
  2341. break;
  2342. }
  2343. @@ -2556,6 +2584,10 @@ static __init int mcheck_init_device(void)
  2344. goto err_out;
  2345. }
  2346. + err = mce_notify_work_init();
  2347. + if (err)
  2348. + goto err_out;
  2349. +
  2350. if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) {
  2351. err = -ENOMEM;
  2352. goto err_out;
  2353. diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
  2354. index 1f38d9a4d9de..053bf3b2ef39 100644
  2355. --- a/arch/x86/kernel/irq_32.c
  2356. +++ b/arch/x86/kernel/irq_32.c
  2357. @@ -127,6 +127,7 @@ void irq_ctx_init(int cpu)
  2358. cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
  2359. }
  2360. +#ifndef CONFIG_PREEMPT_RT_FULL
  2361. void do_softirq_own_stack(void)
  2362. {
  2363. struct irq_stack *irqstk;
  2364. @@ -143,6 +144,7 @@ void do_softirq_own_stack(void)
  2365. call_on_stack(__do_softirq, isp);
  2366. }
  2367. +#endif
  2368. bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
  2369. {
  2370. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
  2371. index bd7be8efdc4c..b3b0a7f7b1ca 100644
  2372. --- a/arch/x86/kernel/process_32.c
  2373. +++ b/arch/x86/kernel/process_32.c
  2374. @@ -35,6 +35,7 @@
  2375. #include <linux/uaccess.h>
  2376. #include <linux/io.h>
  2377. #include <linux/kdebug.h>
  2378. +#include <linux/highmem.h>
  2379. #include <asm/pgtable.h>
  2380. #include <asm/ldt.h>
  2381. @@ -195,6 +196,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
  2382. }
  2383. EXPORT_SYMBOL_GPL(start_thread);
  2384. +#ifdef CONFIG_PREEMPT_RT_FULL
  2385. +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
  2386. +{
  2387. + int i;
  2388. +
  2389. + /*
  2390. + * Clear @prev's kmap_atomic mappings
  2391. + */
  2392. + for (i = 0; i < prev_p->kmap_idx; i++) {
  2393. + int idx = i + KM_TYPE_NR * smp_processor_id();
  2394. + pte_t *ptep = kmap_pte - idx;
  2395. +
  2396. + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
  2397. + }
  2398. + /*
  2399. + * Restore @next_p's kmap_atomic mappings
  2400. + */
  2401. + for (i = 0; i < next_p->kmap_idx; i++) {
  2402. + int idx = i + KM_TYPE_NR * smp_processor_id();
  2403. +
  2404. + if (!pte_none(next_p->kmap_pte[i]))
  2405. + set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
  2406. + }
  2407. +}
  2408. +#else
  2409. +static inline void
  2410. +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
  2411. +#endif
  2412. +
  2413. /*
  2414. * switch_to(x,y) should switch tasks from x to y.
  2415. @@ -271,6 +301,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
  2416. task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
  2417. __switch_to_xtra(prev_p, next_p, tss);
  2418. + switch_kmaps(prev_p, next_p);
  2419. +
  2420. /*
  2421. * Leave lazy mode, flushing any hypercalls made here.
  2422. * This must be done before restoring TLS segments so
  2423. diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
  2424. index b24b3c6d686e..02a062b0de5d 100644
  2425. --- a/arch/x86/kvm/lapic.c
  2426. +++ b/arch/x86/kvm/lapic.c
  2427. @@ -1944,6 +1944,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
  2428. hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
  2429. HRTIMER_MODE_ABS_PINNED);
  2430. apic->lapic_timer.timer.function = apic_timer_fn;
  2431. + apic->lapic_timer.timer.irqsafe = 1;
  2432. /*
  2433. * APIC is created enabled. This will prevent kvm_lapic_set_base from
  2434. diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
  2435. index 73304b1a03cc..2a0fae2ef089 100644
  2436. --- a/arch/x86/kvm/x86.c
  2437. +++ b/arch/x86/kvm/x86.c
  2438. @@ -5967,6 +5967,13 @@ int kvm_arch_init(void *opaque)
  2439. goto out;
  2440. }
  2441. +#ifdef CONFIG_PREEMPT_RT_FULL
  2442. + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
  2443. + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
  2444. + return -EOPNOTSUPP;
  2445. + }
  2446. +#endif
  2447. +
  2448. r = kvm_mmu_module_init();
  2449. if (r)
  2450. goto out_free_percpu;
  2451. diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
  2452. index 6d18b70ed5a9..f752724c22e8 100644
  2453. --- a/arch/x86/mm/highmem_32.c
  2454. +++ b/arch/x86/mm/highmem_32.c
  2455. @@ -32,10 +32,11 @@ EXPORT_SYMBOL(kunmap);
  2456. */
  2457. void *kmap_atomic_prot(struct page *page, pgprot_t prot)
  2458. {
  2459. + pte_t pte = mk_pte(page, prot);
  2460. unsigned long vaddr;
  2461. int idx, type;
  2462. - preempt_disable();
  2463. + preempt_disable_nort();
  2464. pagefault_disable();
  2465. if (!PageHighMem(page))
  2466. @@ -45,7 +46,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
  2467. idx = type + KM_TYPE_NR*smp_processor_id();
  2468. vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
  2469. BUG_ON(!pte_none(*(kmap_pte-idx)));
  2470. - set_pte(kmap_pte-idx, mk_pte(page, prot));
  2471. +#ifdef CONFIG_PREEMPT_RT_FULL
  2472. + current->kmap_pte[type] = pte;
  2473. +#endif
  2474. + set_pte(kmap_pte-idx, pte);
  2475. arch_flush_lazy_mmu_mode();
  2476. return (void *)vaddr;
  2477. @@ -88,6 +92,9 @@ void __kunmap_atomic(void *kvaddr)
  2478. * is a bad idea also, in case the page changes cacheability
  2479. * attributes or becomes a protected page in a hypervisor.
  2480. */
  2481. +#ifdef CONFIG_PREEMPT_RT_FULL
  2482. + current->kmap_pte[type] = __pte(0);
  2483. +#endif
  2484. kpte_clear_flush(kmap_pte-idx, vaddr);
  2485. kmap_atomic_idx_pop();
  2486. arch_flush_lazy_mmu_mode();
  2487. @@ -100,7 +107,7 @@ void __kunmap_atomic(void *kvaddr)
  2488. #endif
  2489. pagefault_enable();
  2490. - preempt_enable();
  2491. + preempt_enable_nort();
  2492. }
  2493. EXPORT_SYMBOL(__kunmap_atomic);
  2494. diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
  2495. index ada98b39b8ad..585f6829653b 100644
  2496. --- a/arch/x86/mm/iomap_32.c
  2497. +++ b/arch/x86/mm/iomap_32.c
  2498. @@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free);
  2499. void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
  2500. {
  2501. + pte_t pte = pfn_pte(pfn, prot);
  2502. unsigned long vaddr;
  2503. int idx, type;
  2504. @@ -65,7 +66,12 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
  2505. type = kmap_atomic_idx_push();
  2506. idx = type + KM_TYPE_NR * smp_processor_id();
  2507. vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
  2508. - set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
  2509. + WARN_ON(!pte_none(*(kmap_pte - idx)));
  2510. +
  2511. +#ifdef CONFIG_PREEMPT_RT_FULL
  2512. + current->kmap_pte[type] = pte;
  2513. +#endif
  2514. + set_pte(kmap_pte - idx, pte);
  2515. arch_flush_lazy_mmu_mode();
  2516. return (void *)vaddr;
  2517. @@ -113,6 +119,9 @@ iounmap_atomic(void __iomem *kvaddr)
  2518. * is a bad idea also, in case the page changes cacheability
  2519. * attributes or becomes a protected page in a hypervisor.
  2520. */
  2521. +#ifdef CONFIG_PREEMPT_RT_FULL
  2522. + current->kmap_pte[type] = __pte(0);
  2523. +#endif
  2524. kpte_clear_flush(kmap_pte-idx, vaddr);
  2525. kmap_atomic_idx_pop();
  2526. }
  2527. diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
  2528. index 73dcb0e18c1b..c1085c7ee212 100644
  2529. --- a/arch/x86/mm/pageattr.c
  2530. +++ b/arch/x86/mm/pageattr.c
  2531. @@ -215,7 +215,15 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
  2532. int in_flags, struct page **pages)
  2533. {
  2534. unsigned int i, level;
  2535. +#ifdef CONFIG_PREEMPT
  2536. + /*
  2537. + * Avoid wbinvd() because it causes latencies on all CPUs,
  2538. + * regardless of any CPU isolation that may be in effect.
  2539. + */
  2540. + unsigned long do_wbinvd = 0;
  2541. +#else
  2542. unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
  2543. +#endif
  2544. BUG_ON(irqs_disabled());
  2545. diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
  2546. index 0f0175186f1b..39b5d5b2627d 100644
  2547. --- a/arch/x86/platform/uv/tlb_uv.c
  2548. +++ b/arch/x86/platform/uv/tlb_uv.c
  2549. @@ -748,9 +748,9 @@ static void destination_plugged(struct bau_desc *bau_desc,
  2550. quiesce_local_uvhub(hmaster);
  2551. - spin_lock(&hmaster->queue_lock);
  2552. + raw_spin_lock(&hmaster->queue_lock);
  2553. reset_with_ipi(&bau_desc->distribution, bcp);
  2554. - spin_unlock(&hmaster->queue_lock);
  2555. + raw_spin_unlock(&hmaster->queue_lock);
  2556. end_uvhub_quiesce(hmaster);
  2557. @@ -770,9 +770,9 @@ static void destination_timeout(struct bau_desc *bau_desc,
  2558. quiesce_local_uvhub(hmaster);
  2559. - spin_lock(&hmaster->queue_lock);
  2560. + raw_spin_lock(&hmaster->queue_lock);
  2561. reset_with_ipi(&bau_desc->distribution, bcp);
  2562. - spin_unlock(&hmaster->queue_lock);
  2563. + raw_spin_unlock(&hmaster->queue_lock);
  2564. end_uvhub_quiesce(hmaster);
  2565. @@ -793,7 +793,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
  2566. cycles_t tm1;
  2567. hmaster = bcp->uvhub_master;
  2568. - spin_lock(&hmaster->disable_lock);
  2569. + raw_spin_lock(&hmaster->disable_lock);
  2570. if (!bcp->baudisabled) {
  2571. stat->s_bau_disabled++;
  2572. tm1 = get_cycles();
  2573. @@ -806,7 +806,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
  2574. }
  2575. }
  2576. }
  2577. - spin_unlock(&hmaster->disable_lock);
  2578. + raw_spin_unlock(&hmaster->disable_lock);
  2579. }
  2580. static void count_max_concurr(int stat, struct bau_control *bcp,
  2581. @@ -869,7 +869,7 @@ static void record_send_stats(cycles_t time1, cycles_t time2,
  2582. */
  2583. static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
  2584. {
  2585. - spinlock_t *lock = &hmaster->uvhub_lock;
  2586. + raw_spinlock_t *lock = &hmaster->uvhub_lock;
  2587. atomic_t *v;
  2588. v = &hmaster->active_descriptor_count;
  2589. @@ -1002,7 +1002,7 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
  2590. struct bau_control *hmaster;
  2591. hmaster = bcp->uvhub_master;
  2592. - spin_lock(&hmaster->disable_lock);
  2593. + raw_spin_lock(&hmaster->disable_lock);
  2594. if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
  2595. stat->s_bau_reenabled++;
  2596. for_each_present_cpu(tcpu) {
  2597. @@ -1014,10 +1014,10 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
  2598. tbcp->period_giveups = 0;
  2599. }
  2600. }
  2601. - spin_unlock(&hmaster->disable_lock);
  2602. + raw_spin_unlock(&hmaster->disable_lock);
  2603. return 0;
  2604. }
  2605. - spin_unlock(&hmaster->disable_lock);
  2606. + raw_spin_unlock(&hmaster->disable_lock);
  2607. return -1;
  2608. }
  2609. @@ -1939,9 +1939,9 @@ static void __init init_per_cpu_tunables(void)
  2610. bcp->cong_reps = congested_reps;
  2611. bcp->disabled_period = sec_2_cycles(disabled_period);
  2612. bcp->giveup_limit = giveup_limit;
  2613. - spin_lock_init(&bcp->queue_lock);
  2614. - spin_lock_init(&bcp->uvhub_lock);
  2615. - spin_lock_init(&bcp->disable_lock);
  2616. + raw_spin_lock_init(&bcp->queue_lock);
  2617. + raw_spin_lock_init(&bcp->uvhub_lock);
  2618. + raw_spin_lock_init(&bcp->disable_lock);
  2619. }
  2620. }
  2621. diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
  2622. index b333fc45f9ec..8b85916e6986 100644
  2623. --- a/arch/x86/platform/uv/uv_time.c
  2624. +++ b/arch/x86/platform/uv/uv_time.c
  2625. @@ -57,7 +57,7 @@ static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
  2626. /* There is one of these allocated per node */
  2627. struct uv_rtc_timer_head {
  2628. - spinlock_t lock;
  2629. + raw_spinlock_t lock;
  2630. /* next cpu waiting for timer, local node relative: */
  2631. int next_cpu;
  2632. /* number of cpus on this node: */
  2633. @@ -177,7 +177,7 @@ static __init int uv_rtc_allocate_timers(void)
  2634. uv_rtc_deallocate_timers();
  2635. return -ENOMEM;
  2636. }
  2637. - spin_lock_init(&head->lock);
  2638. + raw_spin_lock_init(&head->lock);
  2639. head->ncpus = uv_blade_nr_possible_cpus(bid);
  2640. head->next_cpu = -1;
  2641. blade_info[bid] = head;
  2642. @@ -231,7 +231,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
  2643. unsigned long flags;
  2644. int next_cpu;
  2645. - spin_lock_irqsave(&head->lock, flags);
  2646. + raw_spin_lock_irqsave(&head->lock, flags);
  2647. next_cpu = head->next_cpu;
  2648. *t = expires;
  2649. @@ -243,12 +243,12 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
  2650. if (uv_setup_intr(cpu, expires)) {
  2651. *t = ULLONG_MAX;
  2652. uv_rtc_find_next_timer(head, pnode);
  2653. - spin_unlock_irqrestore(&head->lock, flags);
  2654. + raw_spin_unlock_irqrestore(&head->lock, flags);
  2655. return -ETIME;
  2656. }
  2657. }
  2658. - spin_unlock_irqrestore(&head->lock, flags);
  2659. + raw_spin_unlock_irqrestore(&head->lock, flags);
  2660. return 0;
  2661. }
  2662. @@ -267,7 +267,7 @@ static int uv_rtc_unset_timer(int cpu, int force)
  2663. unsigned long flags;
  2664. int rc = 0;
  2665. - spin_lock_irqsave(&head->lock, flags);
  2666. + raw_spin_lock_irqsave(&head->lock, flags);
  2667. if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
  2668. rc = 1;
  2669. @@ -279,7 +279,7 @@ static int uv_rtc_unset_timer(int cpu, int force)
  2670. uv_rtc_find_next_timer(head, pnode);
  2671. }
  2672. - spin_unlock_irqrestore(&head->lock, flags);
  2673. + raw_spin_unlock_irqrestore(&head->lock, flags);
  2674. return rc;
  2675. }
  2676. @@ -299,13 +299,18 @@ static int uv_rtc_unset_timer(int cpu, int force)
  2677. static cycle_t uv_read_rtc(struct clocksource *cs)
  2678. {
  2679. unsigned long offset;
  2680. + cycle_t cycles;
  2681. + preempt_disable();
  2682. if (uv_get_min_hub_revision_id() == 1)
  2683. offset = 0;
  2684. else
  2685. offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
  2686. - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
  2687. + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
  2688. + preempt_enable();
  2689. +
  2690. + return cycles;
  2691. }
  2692. /*
  2693. diff --git a/block/blk-core.c b/block/blk-core.c
  2694. index 23daf40be371..e8341f78f119 100644
  2695. --- a/block/blk-core.c
  2696. +++ b/block/blk-core.c
  2697. @@ -125,6 +125,9 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
  2698. INIT_LIST_HEAD(&rq->queuelist);
  2699. INIT_LIST_HEAD(&rq->timeout_list);
  2700. +#ifdef CONFIG_PREEMPT_RT_FULL
  2701. + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
  2702. +#endif
  2703. rq->cpu = -1;
  2704. rq->q = q;
  2705. rq->__sector = (sector_t) -1;
  2706. @@ -233,7 +236,7 @@ EXPORT_SYMBOL(blk_start_queue_async);
  2707. **/
  2708. void blk_start_queue(struct request_queue *q)
  2709. {
  2710. - WARN_ON(!in_interrupt() && !irqs_disabled());
  2711. + WARN_ON_NONRT(!in_interrupt() && !irqs_disabled());
  2712. queue_flag_clear(QUEUE_FLAG_STOPPED, q);
  2713. __blk_run_queue(q);
  2714. @@ -660,7 +663,7 @@ int blk_queue_enter(struct request_queue *q, bool nowait)
  2715. if (nowait)
  2716. return -EBUSY;
  2717. - ret = wait_event_interruptible(q->mq_freeze_wq,
  2718. + ret = swait_event_interruptible(q->mq_freeze_wq,
  2719. !atomic_read(&q->mq_freeze_depth) ||
  2720. blk_queue_dying(q));
  2721. if (blk_queue_dying(q))
  2722. @@ -680,7 +683,7 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref)
  2723. struct request_queue *q =
  2724. container_of(ref, struct request_queue, q_usage_counter);
  2725. - wake_up_all(&q->mq_freeze_wq);
  2726. + swake_up_all(&q->mq_freeze_wq);
  2727. }
  2728. static void blk_rq_timed_out_timer(unsigned long data)
  2729. @@ -750,7 +753,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
  2730. q->bypass_depth = 1;
  2731. __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
  2732. - init_waitqueue_head(&q->mq_freeze_wq);
  2733. + init_swait_queue_head(&q->mq_freeze_wq);
  2734. /*
  2735. * Init percpu_ref in atomic mode so that it's faster to shutdown.
  2736. @@ -3202,7 +3205,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
  2737. blk_run_queue_async(q);
  2738. else
  2739. __blk_run_queue(q);
  2740. - spin_unlock(q->queue_lock);
  2741. + spin_unlock_irq(q->queue_lock);
  2742. }
  2743. static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
  2744. @@ -3250,7 +3253,6 @@ EXPORT_SYMBOL(blk_check_plugged);
  2745. void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  2746. {
  2747. struct request_queue *q;
  2748. - unsigned long flags;
  2749. struct request *rq;
  2750. LIST_HEAD(list);
  2751. unsigned int depth;
  2752. @@ -3270,11 +3272,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  2753. q = NULL;
  2754. depth = 0;
  2755. - /*
  2756. - * Save and disable interrupts here, to avoid doing it for every
  2757. - * queue lock we have to take.
  2758. - */
  2759. - local_irq_save(flags);
  2760. while (!list_empty(&list)) {
  2761. rq = list_entry_rq(list.next);
  2762. list_del_init(&rq->queuelist);
  2763. @@ -3287,7 +3284,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  2764. queue_unplugged(q, depth, from_schedule);
  2765. q = rq->q;
  2766. depth = 0;
  2767. - spin_lock(q->queue_lock);
  2768. + spin_lock_irq(q->queue_lock);
  2769. }
  2770. /*
  2771. @@ -3314,8 +3311,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  2772. */
  2773. if (q)
  2774. queue_unplugged(q, depth, from_schedule);
  2775. -
  2776. - local_irq_restore(flags);
  2777. }
  2778. void blk_finish_plug(struct blk_plug *plug)
  2779. diff --git a/block/blk-ioc.c b/block/blk-ioc.c
  2780. index 381cb50a673c..dc8785233d94 100644
  2781. --- a/block/blk-ioc.c
  2782. +++ b/block/blk-ioc.c
  2783. @@ -7,6 +7,7 @@
  2784. #include <linux/bio.h>
  2785. #include <linux/blkdev.h>
  2786. #include <linux/slab.h>
  2787. +#include <linux/delay.h>
  2788. #include "blk.h"
  2789. @@ -109,7 +110,7 @@ static void ioc_release_fn(struct work_struct *work)
  2790. spin_unlock(q->queue_lock);
  2791. } else {
  2792. spin_unlock_irqrestore(&ioc->lock, flags);
  2793. - cpu_relax();
  2794. + cpu_chill();
  2795. spin_lock_irqsave_nested(&ioc->lock, flags, 1);
  2796. }
  2797. }
  2798. @@ -187,7 +188,7 @@ void put_io_context_active(struct io_context *ioc)
  2799. spin_unlock(icq->q->queue_lock);
  2800. } else {
  2801. spin_unlock_irqrestore(&ioc->lock, flags);
  2802. - cpu_relax();
  2803. + cpu_chill();
  2804. goto retry;
  2805. }
  2806. }
  2807. diff --git a/block/blk-mq.c b/block/blk-mq.c
  2808. index 10f8f94b7f20..82500641f37b 100644
  2809. --- a/block/blk-mq.c
  2810. +++ b/block/blk-mq.c
  2811. @@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
  2812. static void blk_mq_freeze_queue_wait(struct request_queue *q)
  2813. {
  2814. - wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
  2815. + swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
  2816. }
  2817. /*
  2818. @@ -110,7 +110,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
  2819. WARN_ON_ONCE(freeze_depth < 0);
  2820. if (!freeze_depth) {
  2821. percpu_ref_reinit(&q->q_usage_counter);
  2822. - wake_up_all(&q->mq_freeze_wq);
  2823. + swake_up_all(&q->mq_freeze_wq);
  2824. }
  2825. }
  2826. EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
  2827. @@ -129,7 +129,7 @@ void blk_mq_wake_waiters(struct request_queue *q)
  2828. * dying, we need to ensure that processes currently waiting on
  2829. * the queue are notified as well.
  2830. */
  2831. - wake_up_all(&q->mq_freeze_wq);
  2832. + swake_up_all(&q->mq_freeze_wq);
  2833. }
  2834. bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
  2835. @@ -177,6 +177,9 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
  2836. rq->resid_len = 0;
  2837. rq->sense = NULL;
  2838. +#ifdef CONFIG_PREEMPT_RT_FULL
  2839. + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
  2840. +#endif
  2841. INIT_LIST_HEAD(&rq->timeout_list);
  2842. rq->timeout = 0;
  2843. @@ -345,6 +348,17 @@ void blk_mq_end_request(struct request *rq, int error)
  2844. }
  2845. EXPORT_SYMBOL(blk_mq_end_request);
  2846. +#ifdef CONFIG_PREEMPT_RT_FULL
  2847. +
  2848. +void __blk_mq_complete_request_remote_work(struct work_struct *work)
  2849. +{
  2850. + struct request *rq = container_of(work, struct request, work);
  2851. +
  2852. + rq->q->softirq_done_fn(rq);
  2853. +}
  2854. +
  2855. +#else
  2856. +
  2857. static void __blk_mq_complete_request_remote(void *data)
  2858. {
  2859. struct request *rq = data;
  2860. @@ -352,6 +366,8 @@ static void __blk_mq_complete_request_remote(void *data)
  2861. rq->q->softirq_done_fn(rq);
  2862. }
  2863. +#endif
  2864. +
  2865. static void blk_mq_ipi_complete_request(struct request *rq)
  2866. {
  2867. struct blk_mq_ctx *ctx = rq->mq_ctx;
  2868. @@ -363,19 +379,23 @@ static void blk_mq_ipi_complete_request(struct request *rq)
  2869. return;
  2870. }
  2871. - cpu = get_cpu();
  2872. + cpu = get_cpu_light();
  2873. if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
  2874. shared = cpus_share_cache(cpu, ctx->cpu);
  2875. if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
  2876. +#ifdef CONFIG_PREEMPT_RT_FULL
  2877. + schedule_work_on(ctx->cpu, &rq->work);
  2878. +#else
  2879. rq->csd.func = __blk_mq_complete_request_remote;
  2880. rq->csd.info = rq;
  2881. rq->csd.flags = 0;
  2882. smp_call_function_single_async(ctx->cpu, &rq->csd);
  2883. +#endif
  2884. } else {
  2885. rq->q->softirq_done_fn(rq);
  2886. }
  2887. - put_cpu();
  2888. + put_cpu_light();
  2889. }
  2890. static void __blk_mq_complete_request(struct request *rq)
  2891. @@ -906,14 +926,14 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
  2892. return;
  2893. if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
  2894. - int cpu = get_cpu();
  2895. + int cpu = get_cpu_light();
  2896. if (cpumask_test_cpu(cpu, hctx->cpumask)) {
  2897. __blk_mq_run_hw_queue(hctx);
  2898. - put_cpu();
  2899. + put_cpu_light();
  2900. return;
  2901. }
  2902. - put_cpu();
  2903. + put_cpu_light();
  2904. }
  2905. kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work);
  2906. diff --git a/block/blk-mq.h b/block/blk-mq.h
  2907. index c55bcf67b956..c26a84d44cc4 100644
  2908. --- a/block/blk-mq.h
  2909. +++ b/block/blk-mq.h
  2910. @@ -73,12 +73,12 @@ static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
  2911. */
  2912. static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
  2913. {
  2914. - return __blk_mq_get_ctx(q, get_cpu());
  2915. + return __blk_mq_get_ctx(q, get_cpu_light());
  2916. }
  2917. static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
  2918. {
  2919. - put_cpu();
  2920. + put_cpu_light();
  2921. }
  2922. struct blk_mq_alloc_data {
  2923. diff --git a/block/blk-softirq.c b/block/blk-softirq.c
  2924. index 06cf9807f49a..c40342643ca0 100644
  2925. --- a/block/blk-softirq.c
  2926. +++ b/block/blk-softirq.c
  2927. @@ -51,6 +51,7 @@ static void trigger_softirq(void *data)
  2928. raise_softirq_irqoff(BLOCK_SOFTIRQ);
  2929. local_irq_restore(flags);
  2930. + preempt_check_resched_rt();
  2931. }
  2932. /*
  2933. @@ -89,6 +90,7 @@ static int blk_softirq_cpu_dead(unsigned int cpu)
  2934. this_cpu_ptr(&blk_cpu_done));
  2935. raise_softirq_irqoff(BLOCK_SOFTIRQ);
  2936. local_irq_enable();
  2937. + preempt_check_resched_rt();
  2938. return 0;
  2939. }
  2940. @@ -141,6 +143,7 @@ void __blk_complete_request(struct request *req)
  2941. goto do_local;
  2942. local_irq_restore(flags);
  2943. + preempt_check_resched_rt();
  2944. }
  2945. /**
  2946. diff --git a/block/bounce.c b/block/bounce.c
  2947. index 1cb5dd3a5da1..2f1ec8a67cbe 100644
  2948. --- a/block/bounce.c
  2949. +++ b/block/bounce.c
  2950. @@ -55,11 +55,11 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
  2951. unsigned long flags;
  2952. unsigned char *vto;
  2953. - local_irq_save(flags);
  2954. + local_irq_save_nort(flags);
  2955. vto = kmap_atomic(to->bv_page);
  2956. memcpy(vto + to->bv_offset, vfrom, to->bv_len);
  2957. kunmap_atomic(vto);
  2958. - local_irq_restore(flags);
  2959. + local_irq_restore_nort(flags);
  2960. }
  2961. #else /* CONFIG_HIGHMEM */
  2962. diff --git a/crypto/algapi.c b/crypto/algapi.c
  2963. index 1fad2a6b3bbb..ecb7315426a9 100644
  2964. --- a/crypto/algapi.c
  2965. +++ b/crypto/algapi.c
  2966. @@ -719,13 +719,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2);
  2967. int crypto_register_notifier(struct notifier_block *nb)
  2968. {
  2969. - return blocking_notifier_chain_register(&crypto_chain, nb);
  2970. + return srcu_notifier_chain_register(&crypto_chain, nb);
  2971. }
  2972. EXPORT_SYMBOL_GPL(crypto_register_notifier);
  2973. int crypto_unregister_notifier(struct notifier_block *nb)
  2974. {
  2975. - return blocking_notifier_chain_unregister(&crypto_chain, nb);
  2976. + return srcu_notifier_chain_unregister(&crypto_chain, nb);
  2977. }
  2978. EXPORT_SYMBOL_GPL(crypto_unregister_notifier);
  2979. diff --git a/crypto/api.c b/crypto/api.c
  2980. index bbc147cb5dec..bc1a848f02ec 100644
  2981. --- a/crypto/api.c
  2982. +++ b/crypto/api.c
  2983. @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(crypto_alg_list);
  2984. DECLARE_RWSEM(crypto_alg_sem);
  2985. EXPORT_SYMBOL_GPL(crypto_alg_sem);
  2986. -BLOCKING_NOTIFIER_HEAD(crypto_chain);
  2987. +SRCU_NOTIFIER_HEAD(crypto_chain);
  2988. EXPORT_SYMBOL_GPL(crypto_chain);
  2989. static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg);
  2990. @@ -236,10 +236,10 @@ int crypto_probing_notify(unsigned long val, void *v)
  2991. {
  2992. int ok;
  2993. - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
  2994. + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
  2995. if (ok == NOTIFY_DONE) {
  2996. request_module("cryptomgr");
  2997. - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
  2998. + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
  2999. }
  3000. return ok;
  3001. diff --git a/crypto/internal.h b/crypto/internal.h
  3002. index 7eefcdb00227..0ecc7f5a2f40 100644
  3003. --- a/crypto/internal.h
  3004. +++ b/crypto/internal.h
  3005. @@ -47,7 +47,7 @@ struct crypto_larval {
  3006. extern struct list_head crypto_alg_list;
  3007. extern struct rw_semaphore crypto_alg_sem;
  3008. -extern struct blocking_notifier_head crypto_chain;
  3009. +extern struct srcu_notifier_head crypto_chain;
  3010. #ifdef CONFIG_PROC_FS
  3011. void __init crypto_init_proc(void);
  3012. @@ -146,7 +146,7 @@ static inline int crypto_is_moribund(struct crypto_alg *alg)
  3013. static inline void crypto_notify(unsigned long val, void *v)
  3014. {
  3015. - blocking_notifier_call_chain(&crypto_chain, val, v);
  3016. + srcu_notifier_call_chain(&crypto_chain, val, v);
  3017. }
  3018. #endif /* _CRYPTO_INTERNAL_H */
  3019. diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
  3020. index 750fa824d42c..441edf51484a 100644
  3021. --- a/drivers/acpi/acpica/acglobal.h
  3022. +++ b/drivers/acpi/acpica/acglobal.h
  3023. @@ -116,7 +116,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pending);
  3024. * interrupt level
  3025. */
  3026. ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */
  3027. -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
  3028. +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
  3029. ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock);
  3030. /* Mutex for _OSI support */
  3031. diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
  3032. index 3b7fb99362b6..696bf8e62afb 100644
  3033. --- a/drivers/acpi/acpica/hwregs.c
  3034. +++ b/drivers/acpi/acpica/hwregs.c
  3035. @@ -363,14 +363,14 @@ acpi_status acpi_hw_clear_acpi_status(void)
  3036. ACPI_BITMASK_ALL_FIXED_STATUS,
  3037. ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address)));
  3038. - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
  3039. + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
  3040. /* Clear the fixed events in PM1 A/B */
  3041. status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
  3042. ACPI_BITMASK_ALL_FIXED_STATUS);
  3043. - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
  3044. + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
  3045. if (ACPI_FAILURE(status)) {
  3046. goto exit;
  3047. diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c
  3048. index 98c26ff39409..6e236f2ea791 100644
  3049. --- a/drivers/acpi/acpica/hwxface.c
  3050. +++ b/drivers/acpi/acpica/hwxface.c
  3051. @@ -373,7 +373,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value)
  3052. return_ACPI_STATUS(AE_BAD_PARAMETER);
  3053. }
  3054. - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
  3055. + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
  3056. /*
  3057. * At this point, we know that the parent register is one of the
  3058. @@ -434,7 +434,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value)
  3059. unlock_and_exit:
  3060. - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
  3061. + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
  3062. return_ACPI_STATUS(status);
  3063. }
  3064. diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c
  3065. index 15073375bd00..357e7ca5a587 100644
  3066. --- a/drivers/acpi/acpica/utmutex.c
  3067. +++ b/drivers/acpi/acpica/utmutex.c
  3068. @@ -88,7 +88,7 @@ acpi_status acpi_ut_mutex_initialize(void)
  3069. return_ACPI_STATUS (status);
  3070. }
  3071. - status = acpi_os_create_lock (&acpi_gbl_hardware_lock);
  3072. + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock);
  3073. if (ACPI_FAILURE (status)) {
  3074. return_ACPI_STATUS (status);
  3075. }
  3076. @@ -145,7 +145,7 @@ void acpi_ut_mutex_terminate(void)
  3077. /* Delete the spinlocks */
  3078. acpi_os_delete_lock(acpi_gbl_gpe_lock);
  3079. - acpi_os_delete_lock(acpi_gbl_hardware_lock);
  3080. + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock);
  3081. acpi_os_delete_lock(acpi_gbl_reference_count_lock);
  3082. /* Delete the reader/writer lock */
  3083. diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
  3084. index 8d22acdf90f0..64fbad747da9 100644
  3085. --- a/drivers/ata/libata-sff.c
  3086. +++ b/drivers/ata/libata-sff.c
  3087. @@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf,
  3088. unsigned long flags;
  3089. unsigned int consumed;
  3090. - local_irq_save(flags);
  3091. + local_irq_save_nort(flags);
  3092. consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
  3093. - local_irq_restore(flags);
  3094. + local_irq_restore_nort(flags);
  3095. return consumed;
  3096. }
  3097. @@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
  3098. unsigned long flags;
  3099. /* FIXME: use a bounce buffer */
  3100. - local_irq_save(flags);
  3101. + local_irq_save_nort(flags);
  3102. buf = kmap_atomic(page);
  3103. /* do the actual data transfer */
  3104. @@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
  3105. do_write);
  3106. kunmap_atomic(buf);
  3107. - local_irq_restore(flags);
  3108. + local_irq_restore_nort(flags);
  3109. } else {
  3110. buf = page_address(page);
  3111. ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
  3112. @@ -864,7 +864,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
  3113. unsigned long flags;
  3114. /* FIXME: use bounce buffer */
  3115. - local_irq_save(flags);
  3116. + local_irq_save_nort(flags);
  3117. buf = kmap_atomic(page);
  3118. /* do the actual data transfer */
  3119. @@ -872,7 +872,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
  3120. count, rw);
  3121. kunmap_atomic(buf);
  3122. - local_irq_restore(flags);
  3123. + local_irq_restore_nort(flags);
  3124. } else {
  3125. buf = page_address(page);
  3126. consumed = ap->ops->sff_data_xfer(dev, buf + offset,
  3127. diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
  3128. index 4b5cd3a7b2b6..8c93ee150ee8 100644
  3129. --- a/drivers/block/zram/zcomp.c
  3130. +++ b/drivers/block/zram/zcomp.c
  3131. @@ -118,12 +118,20 @@ ssize_t zcomp_available_show(const char *comp, char *buf)
  3132. struct zcomp_strm *zcomp_stream_get(struct zcomp *comp)
  3133. {
  3134. - return *get_cpu_ptr(comp->stream);
  3135. + struct zcomp_strm *zstrm;
  3136. +
  3137. + zstrm = *get_local_ptr(comp->stream);
  3138. + spin_lock(&zstrm->zcomp_lock);
  3139. + return zstrm;
  3140. }
  3141. void zcomp_stream_put(struct zcomp *comp)
  3142. {
  3143. - put_cpu_ptr(comp->stream);
  3144. + struct zcomp_strm *zstrm;
  3145. +
  3146. + zstrm = *this_cpu_ptr(comp->stream);
  3147. + spin_unlock(&zstrm->zcomp_lock);
  3148. + put_local_ptr(zstrm);
  3149. }
  3150. int zcomp_compress(struct zcomp_strm *zstrm,
  3151. @@ -174,6 +182,7 @@ static int __zcomp_cpu_notifier(struct zcomp *comp,
  3152. pr_err("Can't allocate a compression stream\n");
  3153. return NOTIFY_BAD;
  3154. }
  3155. + spin_lock_init(&zstrm->zcomp_lock);
  3156. *per_cpu_ptr(comp->stream, cpu) = zstrm;
  3157. break;
  3158. case CPU_DEAD:
  3159. diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
  3160. index 478cac2ed465..f7a6efdc3285 100644
  3161. --- a/drivers/block/zram/zcomp.h
  3162. +++ b/drivers/block/zram/zcomp.h
  3163. @@ -14,6 +14,7 @@ struct zcomp_strm {
  3164. /* compression/decompression buffer */
  3165. void *buffer;
  3166. struct crypto_comp *tfm;
  3167. + spinlock_t zcomp_lock;
  3168. };
  3169. /* dynamic per-device compression frontend */
  3170. diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
  3171. index b7c0b69a02f5..47d033b8a966 100644
  3172. --- a/drivers/block/zram/zram_drv.c
  3173. +++ b/drivers/block/zram/zram_drv.c
  3174. @@ -528,6 +528,8 @@ static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
  3175. goto out_error;
  3176. }
  3177. + zram_meta_init_table_locks(meta, disksize);
  3178. +
  3179. return meta;
  3180. out_error:
  3181. @@ -575,28 +577,28 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
  3182. struct zram_meta *meta = zram->meta;
  3183. unsigned long handle;
  3184. unsigned int size;
  3185. + struct zcomp_strm *zstrm;
  3186. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3187. + zram_lock_table(&meta->table[index]);
  3188. handle = meta->table[index].handle;
  3189. size = zram_get_obj_size(meta, index);
  3190. if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
  3191. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3192. + zram_unlock_table(&meta->table[index]);
  3193. memset(mem, 0, PAGE_SIZE);
  3194. return 0;
  3195. }
  3196. + zstrm = zcomp_stream_get(zram->comp);
  3197. cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
  3198. if (size == PAGE_SIZE) {
  3199. memcpy(mem, cmem, PAGE_SIZE);
  3200. } else {
  3201. - struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
  3202. -
  3203. ret = zcomp_decompress(zstrm, cmem, size, mem);
  3204. - zcomp_stream_put(zram->comp);
  3205. }
  3206. zs_unmap_object(meta->mem_pool, handle);
  3207. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3208. + zcomp_stream_put(zram->comp);
  3209. + zram_unlock_table(&meta->table[index]);
  3210. /* Should NEVER happen. Return bio error if it does. */
  3211. if (unlikely(ret)) {
  3212. @@ -616,14 +618,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
  3213. struct zram_meta *meta = zram->meta;
  3214. page = bvec->bv_page;
  3215. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3216. + zram_lock_table(&meta->table[index]);
  3217. if (unlikely(!meta->table[index].handle) ||
  3218. zram_test_flag(meta, index, ZRAM_ZERO)) {
  3219. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3220. + zram_unlock_table(&meta->table[index]);
  3221. handle_zero_page(bvec);
  3222. return 0;
  3223. }
  3224. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3225. + zram_unlock_table(&meta->table[index]);
  3226. if (is_partial_io(bvec))
  3227. /* Use a temporary buffer to decompress the page */
  3228. @@ -700,10 +702,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
  3229. if (user_mem)
  3230. kunmap_atomic(user_mem);
  3231. /* Free memory associated with this sector now. */
  3232. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3233. + zram_lock_table(&meta->table[index]);
  3234. zram_free_page(zram, index);
  3235. zram_set_flag(meta, index, ZRAM_ZERO);
  3236. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3237. + zram_unlock_table(&meta->table[index]);
  3238. atomic64_inc(&zram->stats.zero_pages);
  3239. ret = 0;
  3240. @@ -794,12 +796,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
  3241. * Free memory associated with this sector
  3242. * before overwriting unused sectors.
  3243. */
  3244. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3245. + zram_lock_table(&meta->table[index]);
  3246. zram_free_page(zram, index);
  3247. meta->table[index].handle = handle;
  3248. zram_set_obj_size(meta, index, clen);
  3249. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3250. + zram_unlock_table(&meta->table[index]);
  3251. /* Update stats */
  3252. atomic64_add(clen, &zram->stats.compr_data_size);
  3253. @@ -842,9 +844,9 @@ static void zram_bio_discard(struct zram *zram, u32 index,
  3254. }
  3255. while (n >= PAGE_SIZE) {
  3256. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3257. + zram_lock_table(&meta->table[index]);
  3258. zram_free_page(zram, index);
  3259. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3260. + zram_unlock_table(&meta->table[index]);
  3261. atomic64_inc(&zram->stats.notify_free);
  3262. index++;
  3263. n -= PAGE_SIZE;
  3264. @@ -973,9 +975,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
  3265. zram = bdev->bd_disk->private_data;
  3266. meta = zram->meta;
  3267. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3268. + zram_lock_table(&meta->table[index]);
  3269. zram_free_page(zram, index);
  3270. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3271. + zram_unlock_table(&meta->table[index]);
  3272. atomic64_inc(&zram->stats.notify_free);
  3273. }
  3274. diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
  3275. index 74fcf10da374..fd4020c99b9e 100644
  3276. --- a/drivers/block/zram/zram_drv.h
  3277. +++ b/drivers/block/zram/zram_drv.h
  3278. @@ -73,6 +73,9 @@ enum zram_pageflags {
  3279. struct zram_table_entry {
  3280. unsigned long handle;
  3281. unsigned long value;
  3282. +#ifdef CONFIG_PREEMPT_RT_BASE
  3283. + spinlock_t lock;
  3284. +#endif
  3285. };
  3286. struct zram_stats {
  3287. @@ -120,4 +123,42 @@ struct zram {
  3288. */
  3289. bool claim; /* Protected by bdev->bd_mutex */
  3290. };
  3291. +
  3292. +#ifndef CONFIG_PREEMPT_RT_BASE
  3293. +static inline void zram_lock_table(struct zram_table_entry *table)
  3294. +{
  3295. + bit_spin_lock(ZRAM_ACCESS, &table->value);
  3296. +}
  3297. +
  3298. +static inline void zram_unlock_table(struct zram_table_entry *table)
  3299. +{
  3300. + bit_spin_unlock(ZRAM_ACCESS, &table->value);
  3301. +}
  3302. +
  3303. +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { }
  3304. +#else /* CONFIG_PREEMPT_RT_BASE */
  3305. +static inline void zram_lock_table(struct zram_table_entry *table)
  3306. +{
  3307. + spin_lock(&table->lock);
  3308. + __set_bit(ZRAM_ACCESS, &table->value);
  3309. +}
  3310. +
  3311. +static inline void zram_unlock_table(struct zram_table_entry *table)
  3312. +{
  3313. + __clear_bit(ZRAM_ACCESS, &table->value);
  3314. + spin_unlock(&table->lock);
  3315. +}
  3316. +
  3317. +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize)
  3318. +{
  3319. + size_t num_pages = disksize >> PAGE_SHIFT;
  3320. + size_t index;
  3321. +
  3322. + for (index = 0; index < num_pages; index++) {
  3323. + spinlock_t *lock = &meta->table[index].lock;
  3324. + spin_lock_init(lock);
  3325. + }
  3326. +}
  3327. +#endif /* CONFIG_PREEMPT_RT_BASE */
  3328. +
  3329. #endif
  3330. diff --git a/drivers/char/random.c b/drivers/char/random.c
  3331. index 08d1dd58c0d2..25ee319dc8e3 100644
  3332. --- a/drivers/char/random.c
  3333. +++ b/drivers/char/random.c
  3334. @@ -262,6 +262,7 @@
  3335. #include <linux/syscalls.h>
  3336. #include <linux/completion.h>
  3337. #include <linux/uuid.h>
  3338. +#include <linux/locallock.h>
  3339. #include <crypto/chacha20.h>
  3340. #include <asm/processor.h>
  3341. @@ -1028,8 +1029,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
  3342. } sample;
  3343. long delta, delta2, delta3;
  3344. - preempt_disable();
  3345. -
  3346. sample.jiffies = jiffies;
  3347. sample.cycles = random_get_entropy();
  3348. sample.num = num;
  3349. @@ -1070,7 +1069,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
  3350. */
  3351. credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
  3352. }
  3353. - preempt_enable();
  3354. }
  3355. void add_input_randomness(unsigned int type, unsigned int code,
  3356. @@ -1123,28 +1121,27 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
  3357. return *(ptr + f->reg_idx++);
  3358. }
  3359. -void add_interrupt_randomness(int irq, int irq_flags)
  3360. +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip)
  3361. {
  3362. struct entropy_store *r;
  3363. struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
  3364. - struct pt_regs *regs = get_irq_regs();
  3365. unsigned long now = jiffies;
  3366. cycles_t cycles = random_get_entropy();
  3367. __u32 c_high, j_high;
  3368. - __u64 ip;
  3369. unsigned long seed;
  3370. int credit = 0;
  3371. if (cycles == 0)
  3372. - cycles = get_reg(fast_pool, regs);
  3373. + cycles = get_reg(fast_pool, NULL);
  3374. c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
  3375. j_high = (sizeof(now) > 4) ? now >> 32 : 0;
  3376. fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
  3377. fast_pool->pool[1] ^= now ^ c_high;
  3378. - ip = regs ? instruction_pointer(regs) : _RET_IP_;
  3379. + if (!ip)
  3380. + ip = _RET_IP_;
  3381. fast_pool->pool[2] ^= ip;
  3382. fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 :
  3383. - get_reg(fast_pool, regs);
  3384. + get_reg(fast_pool, NULL);
  3385. fast_mix(fast_pool);
  3386. add_interrupt_bench(cycles);
  3387. @@ -2056,6 +2053,7 @@ struct batched_entropy {
  3388. * goal of being quite fast and not depleting entropy.
  3389. */
  3390. static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long);
  3391. +static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_long_lock);
  3392. unsigned long get_random_long(void)
  3393. {
  3394. unsigned long ret;
  3395. @@ -2064,13 +2062,13 @@ unsigned long get_random_long(void)
  3396. if (arch_get_random_long(&ret))
  3397. return ret;
  3398. - batch = &get_cpu_var(batched_entropy_long);
  3399. + batch = &get_locked_var(batched_entropy_long_lock, batched_entropy_long);
  3400. if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) {
  3401. extract_crng((u8 *)batch->entropy_long);
  3402. batch->position = 0;
  3403. }
  3404. ret = batch->entropy_long[batch->position++];
  3405. - put_cpu_var(batched_entropy_long);
  3406. + put_locked_var(batched_entropy_long_lock, batched_entropy_long);
  3407. return ret;
  3408. }
  3409. EXPORT_SYMBOL(get_random_long);
  3410. @@ -2082,6 +2080,8 @@ unsigned int get_random_int(void)
  3411. }
  3412. #else
  3413. static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int);
  3414. +static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_int_lock);
  3415. +
  3416. unsigned int get_random_int(void)
  3417. {
  3418. unsigned int ret;
  3419. @@ -2090,13 +2090,13 @@ unsigned int get_random_int(void)
  3420. if (arch_get_random_int(&ret))
  3421. return ret;
  3422. - batch = &get_cpu_var(batched_entropy_int);
  3423. + batch = &get_locked_var(batched_entropy_int_lock, batched_entropy_int);
  3424. if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) {
  3425. extract_crng((u8 *)batch->entropy_int);
  3426. batch->position = 0;
  3427. }
  3428. ret = batch->entropy_int[batch->position++];
  3429. - put_cpu_var(batched_entropy_int);
  3430. + put_locked_var(batched_entropy_int_lock, batched_entropy_int);
  3431. return ret;
  3432. }
  3433. #endif
  3434. diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
  3435. index 8022bea27fed..247330efd310 100644
  3436. --- a/drivers/char/tpm/tpm_tis.c
  3437. +++ b/drivers/char/tpm/tpm_tis.c
  3438. @@ -50,6 +50,31 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da
  3439. return container_of(data, struct tpm_tis_tcg_phy, priv);
  3440. }
  3441. +#ifdef CONFIG_PREEMPT_RT_FULL
  3442. +/*
  3443. + * Flushes previous write operations to chip so that a subsequent
  3444. + * ioread*()s won't stall a cpu.
  3445. + */
  3446. +static inline void tpm_tis_flush(void __iomem *iobase)
  3447. +{
  3448. + ioread8(iobase + TPM_ACCESS(0));
  3449. +}
  3450. +#else
  3451. +#define tpm_tis_flush(iobase) do { } while (0)
  3452. +#endif
  3453. +
  3454. +static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr)
  3455. +{
  3456. + iowrite8(b, iobase + addr);
  3457. + tpm_tis_flush(iobase);
  3458. +}
  3459. +
  3460. +static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr)
  3461. +{
  3462. + iowrite32(b, iobase + addr);
  3463. + tpm_tis_flush(iobase);
  3464. +}
  3465. +
  3466. static bool interrupts = true;
  3467. module_param(interrupts, bool, 0444);
  3468. MODULE_PARM_DESC(interrupts, "Enable interrupts");
  3469. @@ -103,7 +128,7 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
  3470. struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
  3471. while (len--)
  3472. - iowrite8(*value++, phy->iobase + addr);
  3473. + tpm_tis_iowrite8(*value++, phy->iobase, addr);
  3474. return 0;
  3475. }
  3476. @@ -127,7 +152,7 @@ static int tpm_tcg_write32(struct tpm_tis_data *data, u32 addr, u32 value)
  3477. {
  3478. struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
  3479. - iowrite32(value, phy->iobase + addr);
  3480. + tpm_tis_iowrite32(value, phy->iobase, addr);
  3481. return 0;
  3482. }
  3483. diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
  3484. index 4da2af9694a2..5b6f57f500b8 100644
  3485. --- a/drivers/clocksource/tcb_clksrc.c
  3486. +++ b/drivers/clocksource/tcb_clksrc.c
  3487. @@ -23,8 +23,7 @@
  3488. * this 32 bit free-running counter. the second channel is not used.
  3489. *
  3490. * - The third channel may be used to provide a 16-bit clockevent
  3491. - * source, used in either periodic or oneshot mode. This runs
  3492. - * at 32 KiHZ, and can handle delays of up to two seconds.
  3493. + * source, used in either periodic or oneshot mode.
  3494. *
  3495. * A boot clocksource and clockevent source are also currently needed,
  3496. * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
  3497. @@ -74,6 +73,8 @@ static struct clocksource clksrc = {
  3498. struct tc_clkevt_device {
  3499. struct clock_event_device clkevt;
  3500. struct clk *clk;
  3501. + bool clk_enabled;
  3502. + u32 freq;
  3503. void __iomem *regs;
  3504. };
  3505. @@ -82,15 +83,26 @@ static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt)
  3506. return container_of(clkevt, struct tc_clkevt_device, clkevt);
  3507. }
  3508. -/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
  3509. - * because using one of the divided clocks would usually mean the
  3510. - * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
  3511. - *
  3512. - * A divided clock could be good for high resolution timers, since
  3513. - * 30.5 usec resolution can seem "low".
  3514. - */
  3515. static u32 timer_clock;
  3516. +static void tc_clk_disable(struct clock_event_device *d)
  3517. +{
  3518. + struct tc_clkevt_device *tcd = to_tc_clkevt(d);
  3519. +
  3520. + clk_disable(tcd->clk);
  3521. + tcd->clk_enabled = false;
  3522. +}
  3523. +
  3524. +static void tc_clk_enable(struct clock_event_device *d)
  3525. +{
  3526. + struct tc_clkevt_device *tcd = to_tc_clkevt(d);
  3527. +
  3528. + if (tcd->clk_enabled)
  3529. + return;
  3530. + clk_enable(tcd->clk);
  3531. + tcd->clk_enabled = true;
  3532. +}
  3533. +
  3534. static int tc_shutdown(struct clock_event_device *d)
  3535. {
  3536. struct tc_clkevt_device *tcd = to_tc_clkevt(d);
  3537. @@ -98,8 +110,14 @@ static int tc_shutdown(struct clock_event_device *d)
  3538. __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR));
  3539. __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
  3540. + return 0;
  3541. +}
  3542. +
  3543. +static int tc_shutdown_clk_off(struct clock_event_device *d)
  3544. +{
  3545. + tc_shutdown(d);
  3546. if (!clockevent_state_detached(d))
  3547. - clk_disable(tcd->clk);
  3548. + tc_clk_disable(d);
  3549. return 0;
  3550. }
  3551. @@ -112,9 +130,9 @@ static int tc_set_oneshot(struct clock_event_device *d)
  3552. if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
  3553. tc_shutdown(d);
  3554. - clk_enable(tcd->clk);
  3555. + tc_clk_enable(d);
  3556. - /* slow clock, count up to RC, then irq and stop */
  3557. + /* count up to RC, then irq and stop */
  3558. __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE |
  3559. ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR));
  3560. __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
  3561. @@ -134,12 +152,12 @@ static int tc_set_periodic(struct clock_event_device *d)
  3562. /* By not making the gentime core emulate periodic mode on top
  3563. * of oneshot, we get lower overhead and improved accuracy.
  3564. */
  3565. - clk_enable(tcd->clk);
  3566. + tc_clk_enable(d);
  3567. - /* slow clock, count up to RC, then irq and restart */
  3568. + /* count up to RC, then irq and restart */
  3569. __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
  3570. regs + ATMEL_TC_REG(2, CMR));
  3571. - __raw_writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
  3572. + __raw_writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
  3573. /* Enable clock and interrupts on RC compare */
  3574. __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
  3575. @@ -166,9 +184,13 @@ static struct tc_clkevt_device clkevt = {
  3576. .features = CLOCK_EVT_FEAT_PERIODIC |
  3577. CLOCK_EVT_FEAT_ONESHOT,
  3578. /* Should be lower than at91rm9200's system timer */
  3579. +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  3580. .rating = 125,
  3581. +#else
  3582. + .rating = 200,
  3583. +#endif
  3584. .set_next_event = tc_next_event,
  3585. - .set_state_shutdown = tc_shutdown,
  3586. + .set_state_shutdown = tc_shutdown_clk_off,
  3587. .set_state_periodic = tc_set_periodic,
  3588. .set_state_oneshot = tc_set_oneshot,
  3589. },
  3590. @@ -188,8 +210,9 @@ static irqreturn_t ch2_irq(int irq, void *handle)
  3591. return IRQ_NONE;
  3592. }
  3593. -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
  3594. +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
  3595. {
  3596. + unsigned divisor = atmel_tc_divisors[divisor_idx];
  3597. int ret;
  3598. struct clk *t2_clk = tc->clk[2];
  3599. int irq = tc->irq[2];
  3600. @@ -210,7 +233,11 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
  3601. clkevt.regs = tc->regs;
  3602. clkevt.clk = t2_clk;
  3603. - timer_clock = clk32k_divisor_idx;
  3604. + timer_clock = divisor_idx;
  3605. + if (!divisor)
  3606. + clkevt.freq = 32768;
  3607. + else
  3608. + clkevt.freq = clk_get_rate(t2_clk) / divisor;
  3609. clkevt.clkevt.cpumask = cpumask_of(0);
  3610. @@ -221,7 +248,7 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
  3611. return ret;
  3612. }
  3613. - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
  3614. + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff);
  3615. return ret;
  3616. }
  3617. @@ -358,7 +385,11 @@ static int __init tcb_clksrc_init(void)
  3618. goto err_disable_t1;
  3619. /* channel 2: periodic and oneshot timer support */
  3620. +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  3621. ret = setup_clkevents(tc, clk32k_divisor_idx);
  3622. +#else
  3623. + ret = setup_clkevents(tc, best_divisor_idx);
  3624. +#endif
  3625. if (ret)
  3626. goto err_unregister_clksrc;
  3627. diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c
  3628. index 6555821bbdae..93288849b2bd 100644
  3629. --- a/drivers/clocksource/timer-atmel-pit.c
  3630. +++ b/drivers/clocksource/timer-atmel-pit.c
  3631. @@ -46,6 +46,7 @@ struct pit_data {
  3632. u32 cycle;
  3633. u32 cnt;
  3634. unsigned int irq;
  3635. + bool irq_requested;
  3636. struct clk *mck;
  3637. };
  3638. @@ -96,15 +97,29 @@ static int pit_clkevt_shutdown(struct clock_event_device *dev)
  3639. /* disable irq, leaving the clocksource active */
  3640. pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN);
  3641. + if (data->irq_requested) {
  3642. + free_irq(data->irq, data);
  3643. + data->irq_requested = false;
  3644. + }
  3645. return 0;
  3646. }
  3647. +static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id);
  3648. /*
  3649. * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16)
  3650. */
  3651. static int pit_clkevt_set_periodic(struct clock_event_device *dev)
  3652. {
  3653. struct pit_data *data = clkevt_to_pit_data(dev);
  3654. + int ret;
  3655. +
  3656. + ret = request_irq(data->irq, at91sam926x_pit_interrupt,
  3657. + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3658. + "at91_tick", data);
  3659. + if (ret)
  3660. + panic(pr_fmt("Unable to setup IRQ\n"));
  3661. +
  3662. + data->irq_requested = true;
  3663. /* update clocksource counter */
  3664. data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR));
  3665. @@ -230,15 +245,6 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node)
  3666. return ret;
  3667. }
  3668. - /* Set up irq handler */
  3669. - ret = request_irq(data->irq, at91sam926x_pit_interrupt,
  3670. - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3671. - "at91_tick", data);
  3672. - if (ret) {
  3673. - pr_err("Unable to setup IRQ\n");
  3674. - return ret;
  3675. - }
  3676. -
  3677. /* Set up and register clockevents */
  3678. data->clkevt.name = "pit";
  3679. data->clkevt.features = CLOCK_EVT_FEAT_PERIODIC;
  3680. diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c
  3681. index e90ab5b63a90..9e124087c55f 100644
  3682. --- a/drivers/clocksource/timer-atmel-st.c
  3683. +++ b/drivers/clocksource/timer-atmel-st.c
  3684. @@ -115,18 +115,29 @@ static void clkdev32k_disable_and_flush_irq(void)
  3685. last_crtr = read_CRTR();
  3686. }
  3687. +static int atmel_st_irq;
  3688. +
  3689. static int clkevt32k_shutdown(struct clock_event_device *evt)
  3690. {
  3691. clkdev32k_disable_and_flush_irq();
  3692. irqmask = 0;
  3693. regmap_write(regmap_st, AT91_ST_IER, irqmask);
  3694. + free_irq(atmel_st_irq, regmap_st);
  3695. return 0;
  3696. }
  3697. static int clkevt32k_set_oneshot(struct clock_event_device *dev)
  3698. {
  3699. + int ret;
  3700. +
  3701. clkdev32k_disable_and_flush_irq();
  3702. + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt,
  3703. + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3704. + "at91_tick", regmap_st);
  3705. + if (ret)
  3706. + panic(pr_fmt("Unable to setup IRQ\n"));
  3707. +
  3708. /*
  3709. * ALM for oneshot irqs, set by next_event()
  3710. * before 32 seconds have passed.
  3711. @@ -139,8 +150,16 @@ static int clkevt32k_set_oneshot(struct clock_event_device *dev)
  3712. static int clkevt32k_set_periodic(struct clock_event_device *dev)
  3713. {
  3714. + int ret;
  3715. +
  3716. clkdev32k_disable_and_flush_irq();
  3717. + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt,
  3718. + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3719. + "at91_tick", regmap_st);
  3720. + if (ret)
  3721. + panic(pr_fmt("Unable to setup IRQ\n"));
  3722. +
  3723. /* PIT for periodic irqs; fixed rate of 1/HZ */
  3724. irqmask = AT91_ST_PITS;
  3725. regmap_write(regmap_st, AT91_ST_PIMR, timer_latch);
  3726. @@ -198,7 +217,7 @@ static int __init atmel_st_timer_init(struct device_node *node)
  3727. {
  3728. struct clk *sclk;
  3729. unsigned int sclk_rate, val;
  3730. - int irq, ret;
  3731. + int ret;
  3732. regmap_st = syscon_node_to_regmap(node);
  3733. if (IS_ERR(regmap_st)) {
  3734. @@ -212,21 +231,12 @@ static int __init atmel_st_timer_init(struct device_node *node)
  3735. regmap_read(regmap_st, AT91_ST_SR, &val);
  3736. /* Get the interrupts property */
  3737. - irq = irq_of_parse_and_map(node, 0);
  3738. - if (!irq) {
  3739. + atmel_st_irq = irq_of_parse_and_map(node, 0);
  3740. + if (!atmel_st_irq) {
  3741. pr_err("Unable to get IRQ from DT\n");
  3742. return -EINVAL;
  3743. }
  3744. - /* Make IRQs happen for the system timer */
  3745. - ret = request_irq(irq, at91rm9200_timer_interrupt,
  3746. - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3747. - "at91_tick", regmap_st);
  3748. - if (ret) {
  3749. - pr_err("Unable to setup IRQ\n");
  3750. - return ret;
  3751. - }
  3752. -
  3753. sclk = of_clk_get(node, 0);
  3754. if (IS_ERR(sclk)) {
  3755. pr_err("Unable to get slow clock\n");
  3756. diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
  3757. index a782ce87715c..19d265948526 100644
  3758. --- a/drivers/connector/cn_proc.c
  3759. +++ b/drivers/connector/cn_proc.c
  3760. @@ -32,6 +32,7 @@
  3761. #include <linux/pid_namespace.h>
  3762. #include <linux/cn_proc.h>
  3763. +#include <linux/locallock.h>
  3764. /*
  3765. * Size of a cn_msg followed by a proc_event structure. Since the
  3766. @@ -54,10 +55,11 @@ static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC };
  3767. /* proc_event_counts is used as the sequence number of the netlink message */
  3768. static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 };
  3769. +static DEFINE_LOCAL_IRQ_LOCK(send_msg_lock);
  3770. static inline void send_msg(struct cn_msg *msg)
  3771. {
  3772. - preempt_disable();
  3773. + local_lock(send_msg_lock);
  3774. msg->seq = __this_cpu_inc_return(proc_event_counts) - 1;
  3775. ((struct proc_event *)msg->data)->cpu = smp_processor_id();
  3776. @@ -70,7 +72,7 @@ static inline void send_msg(struct cn_msg *msg)
  3777. */
  3778. cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT);
  3779. - preempt_enable();
  3780. + local_unlock(send_msg_lock);
  3781. }
  3782. void proc_fork_connector(struct task_struct *task)
  3783. diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
  3784. index adbd1de1cea5..1fac5074f2cf 100644
  3785. --- a/drivers/cpufreq/Kconfig.x86
  3786. +++ b/drivers/cpufreq/Kconfig.x86
  3787. @@ -124,7 +124,7 @@ config X86_POWERNOW_K7_ACPI
  3788. config X86_POWERNOW_K8
  3789. tristate "AMD Opteron/Athlon64 PowerNow!"
  3790. - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ
  3791. + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE
  3792. help
  3793. This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors.
  3794. Support for K10 and newer processors is now in acpi-cpufreq.
  3795. diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  3796. index 2117f172d7a2..96c15501b0c8 100644
  3797. --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  3798. +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  3799. @@ -1489,7 +1489,9 @@ execbuf_submit(struct i915_execbuffer_params *params,
  3800. if (ret)
  3801. return ret;
  3802. +#ifndef CONFIG_PREEMPT_RT_BASE
  3803. trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
  3804. +#endif
  3805. i915_gem_execbuffer_move_to_active(vmas, params->request);
  3806. diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
  3807. index 755d78832a66..97fb03dc4971 100644
  3808. --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
  3809. +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
  3810. @@ -40,7 +40,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
  3811. if (!mutex_is_locked(mutex))
  3812. return false;
  3813. -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
  3814. +#if (defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)) && !defined(CONFIG_PREEMPT_RT_BASE)
  3815. return mutex->owner == task;
  3816. #else
  3817. /* Since UP may be pre-empted, we cannot assume that we own the lock */
  3818. diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
  3819. index 02908e37c228..05c0480576e1 100644
  3820. --- a/drivers/gpu/drm/i915/i915_irq.c
  3821. +++ b/drivers/gpu/drm/i915/i915_irq.c
  3822. @@ -812,6 +812,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
  3823. spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  3824. /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
  3825. + preempt_disable_rt();
  3826. /* Get optional system timestamp before query. */
  3827. if (stime)
  3828. @@ -863,6 +864,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
  3829. *etime = ktime_get();
  3830. /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
  3831. + preempt_enable_rt();
  3832. spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
  3833. diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
  3834. index ce32303b3013..c0a53bf2e952 100644
  3835. --- a/drivers/gpu/drm/i915/intel_display.c
  3836. +++ b/drivers/gpu/drm/i915/intel_display.c
  3837. @@ -12138,7 +12138,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe)
  3838. struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
  3839. struct intel_flip_work *work;
  3840. - WARN_ON(!in_interrupt());
  3841. + WARN_ON_NONRT(!in_interrupt());
  3842. if (crtc == NULL)
  3843. return;
  3844. diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
  3845. index 64f4e2e18594..aebf1e9eabcb 100644
  3846. --- a/drivers/gpu/drm/i915/intel_sprite.c
  3847. +++ b/drivers/gpu/drm/i915/intel_sprite.c
  3848. @@ -35,6 +35,7 @@
  3849. #include <drm/drm_rect.h>
  3850. #include <drm/drm_atomic.h>
  3851. #include <drm/drm_plane_helper.h>
  3852. +#include <linux/locallock.h>
  3853. #include "intel_drv.h"
  3854. #include "intel_frontbuffer.h"
  3855. #include <drm/i915_drm.h>
  3856. @@ -65,6 +66,8 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode,
  3857. 1000 * adjusted_mode->crtc_htotal);
  3858. }
  3859. +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock);
  3860. +
  3861. /**
  3862. * intel_pipe_update_start() - start update of a set of display registers
  3863. * @crtc: the crtc of which the registers are going to be updated
  3864. @@ -98,7 +101,7 @@ void intel_pipe_update_start(struct intel_crtc *crtc)
  3865. min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, 100);
  3866. max = vblank_start - 1;
  3867. - local_irq_disable();
  3868. + local_lock_irq(pipe_update_lock);
  3869. if (min <= 0 || max <= 0)
  3870. return;
  3871. @@ -128,11 +131,11 @@ void intel_pipe_update_start(struct intel_crtc *crtc)
  3872. break;
  3873. }
  3874. - local_irq_enable();
  3875. + local_unlock_irq(pipe_update_lock);
  3876. timeout = schedule_timeout(timeout);
  3877. - local_irq_disable();
  3878. + local_lock_irq(pipe_update_lock);
  3879. }
  3880. finish_wait(wq, &wait);
  3881. @@ -202,7 +205,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work
  3882. crtc->base.state->event = NULL;
  3883. }
  3884. - local_irq_enable();
  3885. + local_unlock_irq(pipe_update_lock);
  3886. if (crtc->debug.start_vbl_count &&
  3887. crtc->debug.start_vbl_count != end_vbl_count) {
  3888. diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c
  3889. index 192b2d3a79cb..d5372a207326 100644
  3890. --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c
  3891. +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c
  3892. @@ -23,7 +23,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
  3893. if (!mutex_is_locked(mutex))
  3894. return false;
  3895. -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
  3896. +#if (defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)) && !defined(CONFIG_PREEMPT_RT_BASE)
  3897. return mutex->owner == task;
  3898. #else
  3899. /* Since UP may be pre-empted, we cannot assume that we own the lock */
  3900. diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
  3901. index cdb8cb568c15..b6d7fd964cbc 100644
  3902. --- a/drivers/gpu/drm/radeon/radeon_display.c
  3903. +++ b/drivers/gpu/drm/radeon/radeon_display.c
  3904. @@ -1845,6 +1845,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
  3905. struct radeon_device *rdev = dev->dev_private;
  3906. /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
  3907. + preempt_disable_rt();
  3908. /* Get optional system timestamp before query. */
  3909. if (stime)
  3910. @@ -1937,6 +1938,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
  3911. *etime = ktime_get();
  3912. /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
  3913. + preempt_enable_rt();
  3914. /* Decode into vertical and horizontal scanout position. */
  3915. *vpos = position & 0x1fff;
  3916. diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
  3917. index 0276d2ef06ee..8868045eabde 100644
  3918. --- a/drivers/hv/vmbus_drv.c
  3919. +++ b/drivers/hv/vmbus_drv.c
  3920. @@ -761,6 +761,8 @@ static void vmbus_isr(void)
  3921. void *page_addr;
  3922. struct hv_message *msg;
  3923. union hv_synic_event_flags *event;
  3924. + struct pt_regs *regs = get_irq_regs();
  3925. + u64 ip = regs ? instruction_pointer(regs) : 0;
  3926. bool handled = false;
  3927. page_addr = hv_context.synic_event_page[cpu];
  3928. @@ -808,7 +810,7 @@ static void vmbus_isr(void)
  3929. tasklet_schedule(hv_context.msg_dpc[cpu]);
  3930. }
  3931. - add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
  3932. + add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, ip);
  3933. }
  3934. diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
  3935. index 36f76e28a0bf..394f142f90c7 100644
  3936. --- a/drivers/ide/alim15x3.c
  3937. +++ b/drivers/ide/alim15x3.c
  3938. @@ -234,7 +234,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev)
  3939. isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
  3940. - local_irq_save(flags);
  3941. + local_irq_save_nort(flags);
  3942. if (m5229_revision < 0xC2) {
  3943. /*
  3944. @@ -325,7 +325,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev)
  3945. }
  3946. pci_dev_put(north);
  3947. pci_dev_put(isa_dev);
  3948. - local_irq_restore(flags);
  3949. + local_irq_restore_nort(flags);
  3950. return 0;
  3951. }
  3952. diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
  3953. index 0ceae5cbd89a..c212e85d7f3e 100644
  3954. --- a/drivers/ide/hpt366.c
  3955. +++ b/drivers/ide/hpt366.c
  3956. @@ -1236,7 +1236,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif,
  3957. dma_old = inb(base + 2);
  3958. - local_irq_save(flags);
  3959. + local_irq_save_nort(flags);
  3960. dma_new = dma_old;
  3961. pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
  3962. @@ -1247,7 +1247,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif,
  3963. if (dma_new != dma_old)
  3964. outb(dma_new, base + 2);
  3965. - local_irq_restore(flags);
  3966. + local_irq_restore_nort(flags);
  3967. printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n",
  3968. hwif->name, base, base + 7);
  3969. diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c
  3970. index 19763977568c..4169433faab5 100644
  3971. --- a/drivers/ide/ide-io-std.c
  3972. +++ b/drivers/ide/ide-io-std.c
  3973. @@ -175,7 +175,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
  3974. unsigned long uninitialized_var(flags);
  3975. if ((io_32bit & 2) && !mmio) {
  3976. - local_irq_save(flags);
  3977. + local_irq_save_nort(flags);
  3978. ata_vlb_sync(io_ports->nsect_addr);
  3979. }
  3980. @@ -186,7 +186,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
  3981. insl(data_addr, buf, words);
  3982. if ((io_32bit & 2) && !mmio)
  3983. - local_irq_restore(flags);
  3984. + local_irq_restore_nort(flags);
  3985. if (((len + 1) & 3) < 2)
  3986. return;
  3987. @@ -219,7 +219,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
  3988. unsigned long uninitialized_var(flags);
  3989. if ((io_32bit & 2) && !mmio) {
  3990. - local_irq_save(flags);
  3991. + local_irq_save_nort(flags);
  3992. ata_vlb_sync(io_ports->nsect_addr);
  3993. }
  3994. @@ -230,7 +230,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
  3995. outsl(data_addr, buf, words);
  3996. if ((io_32bit & 2) && !mmio)
  3997. - local_irq_restore(flags);
  3998. + local_irq_restore_nort(flags);
  3999. if (((len + 1) & 3) < 2)
  4000. return;
  4001. diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
  4002. index 669ea1e45795..e12e43e62245 100644
  4003. --- a/drivers/ide/ide-io.c
  4004. +++ b/drivers/ide/ide-io.c
  4005. @@ -659,7 +659,7 @@ void ide_timer_expiry (unsigned long data)
  4006. /* disable_irq_nosync ?? */
  4007. disable_irq(hwif->irq);
  4008. /* local CPU only, as if we were handling an interrupt */
  4009. - local_irq_disable();
  4010. + local_irq_disable_nort();
  4011. if (hwif->polling) {
  4012. startstop = handler(drive);
  4013. } else if (drive_is_ready(drive)) {
  4014. diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
  4015. index 376f2dc410c5..f014dd1b73dc 100644
  4016. --- a/drivers/ide/ide-iops.c
  4017. +++ b/drivers/ide/ide-iops.c
  4018. @@ -129,12 +129,12 @@ int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad,
  4019. if ((stat & ATA_BUSY) == 0)
  4020. break;
  4021. - local_irq_restore(flags);
  4022. + local_irq_restore_nort(flags);
  4023. *rstat = stat;
  4024. return -EBUSY;
  4025. }
  4026. }
  4027. - local_irq_restore(flags);
  4028. + local_irq_restore_nort(flags);
  4029. }
  4030. /*
  4031. * Allow status to settle, then read it again.
  4032. diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
  4033. index 0b63facd1d87..4ceba37afc0c 100644
  4034. --- a/drivers/ide/ide-probe.c
  4035. +++ b/drivers/ide/ide-probe.c
  4036. @@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id)
  4037. int bswap = 1;
  4038. /* local CPU only; some systems need this */
  4039. - local_irq_save(flags);
  4040. + local_irq_save_nort(flags);
  4041. /* read 512 bytes of id info */
  4042. hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
  4043. - local_irq_restore(flags);
  4044. + local_irq_restore_nort(flags);
  4045. drive->dev_flags |= IDE_DFLAG_ID_READ;
  4046. #ifdef DEBUG
  4047. diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
  4048. index a716693417a3..be0568c722d6 100644
  4049. --- a/drivers/ide/ide-taskfile.c
  4050. +++ b/drivers/ide/ide-taskfile.c
  4051. @@ -250,7 +250,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd,
  4052. page_is_high = PageHighMem(page);
  4053. if (page_is_high)
  4054. - local_irq_save(flags);
  4055. + local_irq_save_nort(flags);
  4056. buf = kmap_atomic(page) + offset;
  4057. @@ -271,7 +271,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd,
  4058. kunmap_atomic(buf);
  4059. if (page_is_high)
  4060. - local_irq_restore(flags);
  4061. + local_irq_restore_nort(flags);
  4062. len -= nr_bytes;
  4063. }
  4064. @@ -414,7 +414,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive,
  4065. }
  4066. if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
  4067. - local_irq_disable();
  4068. + local_irq_disable_nort();
  4069. ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
  4070. diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  4071. index fddff403d5d2..cca1bb4fbfe3 100644
  4072. --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  4073. +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  4074. @@ -902,7 +902,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
  4075. ipoib_dbg_mcast(priv, "restarting multicast task\n");
  4076. - local_irq_save(flags);
  4077. + local_irq_save_nort(flags);
  4078. netif_addr_lock(dev);
  4079. spin_lock(&priv->lock);
  4080. @@ -984,7 +984,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
  4081. spin_unlock(&priv->lock);
  4082. netif_addr_unlock(dev);
  4083. - local_irq_restore(flags);
  4084. + local_irq_restore_nort(flags);
  4085. /*
  4086. * make sure the in-flight joins have finished before we attempt
  4087. diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
  4088. index 4a2a9e370be7..e970d9afd179 100644
  4089. --- a/drivers/input/gameport/gameport.c
  4090. +++ b/drivers/input/gameport/gameport.c
  4091. @@ -91,13 +91,13 @@ static int gameport_measure_speed(struct gameport *gameport)
  4092. tx = ~0;
  4093. for (i = 0; i < 50; i++) {
  4094. - local_irq_save(flags);
  4095. + local_irq_save_nort(flags);
  4096. t1 = ktime_get_ns();
  4097. for (t = 0; t < 50; t++)
  4098. gameport_read(gameport);
  4099. t2 = ktime_get_ns();
  4100. t3 = ktime_get_ns();
  4101. - local_irq_restore(flags);
  4102. + local_irq_restore_nort(flags);
  4103. udelay(i * 10);
  4104. t = (t2 - t1) - (t3 - t2);
  4105. if (t < tx)
  4106. @@ -124,12 +124,12 @@ static int old_gameport_measure_speed(struct gameport *gameport)
  4107. tx = 1 << 30;
  4108. for(i = 0; i < 50; i++) {
  4109. - local_irq_save(flags);
  4110. + local_irq_save_nort(flags);
  4111. GET_TIME(t1);
  4112. for (t = 0; t < 50; t++) gameport_read(gameport);
  4113. GET_TIME(t2);
  4114. GET_TIME(t3);
  4115. - local_irq_restore(flags);
  4116. + local_irq_restore_nort(flags);
  4117. udelay(i * 10);
  4118. if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t;
  4119. }
  4120. @@ -148,11 +148,11 @@ static int old_gameport_measure_speed(struct gameport *gameport)
  4121. tx = 1 << 30;
  4122. for(i = 0; i < 50; i++) {
  4123. - local_irq_save(flags);
  4124. + local_irq_save_nort(flags);
  4125. t1 = rdtsc();
  4126. for (t = 0; t < 50; t++) gameport_read(gameport);
  4127. t2 = rdtsc();
  4128. - local_irq_restore(flags);
  4129. + local_irq_restore_nort(flags);
  4130. udelay(i * 10);
  4131. if (t2 - t1 < tx) tx = t2 - t1;
  4132. }
  4133. diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
  4134. index 0c910a863581..3408e5dd1b93 100644
  4135. --- a/drivers/iommu/amd_iommu.c
  4136. +++ b/drivers/iommu/amd_iommu.c
  4137. @@ -1923,10 +1923,10 @@ static int __attach_device(struct iommu_dev_data *dev_data,
  4138. int ret;
  4139. /*
  4140. - * Must be called with IRQs disabled. Warn here to detect early
  4141. - * when its not.
  4142. + * Must be called with IRQs disabled on a non RT kernel. Warn here to
  4143. + * detect early when its not.
  4144. */
  4145. - WARN_ON(!irqs_disabled());
  4146. + WARN_ON_NONRT(!irqs_disabled());
  4147. /* lock domain */
  4148. spin_lock(&domain->lock);
  4149. @@ -2094,10 +2094,10 @@ static void __detach_device(struct iommu_dev_data *dev_data)
  4150. struct protection_domain *domain;
  4151. /*
  4152. - * Must be called with IRQs disabled. Warn here to detect early
  4153. - * when its not.
  4154. + * Must be called with IRQs disabled on a non RT kernel. Warn here to
  4155. + * detect early when its not.
  4156. */
  4157. - WARN_ON(!irqs_disabled());
  4158. + WARN_ON_NONRT(!irqs_disabled());
  4159. if (WARN_ON(!dev_data->domain))
  4160. return;
  4161. @@ -2283,7 +2283,7 @@ static void queue_add(struct dma_ops_domain *dma_dom,
  4162. pages = __roundup_pow_of_two(pages);
  4163. address >>= PAGE_SHIFT;
  4164. - queue = get_cpu_ptr(&flush_queue);
  4165. + queue = raw_cpu_ptr(&flush_queue);
  4166. spin_lock_irqsave(&queue->lock, flags);
  4167. if (queue->next == FLUSH_QUEUE_SIZE)
  4168. @@ -2300,8 +2300,6 @@ static void queue_add(struct dma_ops_domain *dma_dom,
  4169. if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0)
  4170. mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10));
  4171. -
  4172. - put_cpu_ptr(&flush_queue);
  4173. }
  4174. diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
  4175. index 88bbc8ccc5e3..8a1a8432a6bd 100644
  4176. --- a/drivers/iommu/intel-iommu.c
  4177. +++ b/drivers/iommu/intel-iommu.c
  4178. @@ -479,7 +479,7 @@ struct deferred_flush_data {
  4179. struct deferred_flush_table *tables;
  4180. };
  4181. -DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
  4182. +static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
  4183. /* bitmap for indexing intel_iommus */
  4184. static int g_num_of_iommus;
  4185. @@ -3721,10 +3721,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
  4186. struct intel_iommu *iommu;
  4187. struct deferred_flush_entry *entry;
  4188. struct deferred_flush_data *flush_data;
  4189. - unsigned int cpuid;
  4190. - cpuid = get_cpu();
  4191. - flush_data = per_cpu_ptr(&deferred_flush, cpuid);
  4192. + flush_data = raw_cpu_ptr(&deferred_flush);
  4193. /* Flush all CPUs' entries to avoid deferring too much. If
  4194. * this becomes a bottleneck, can just flush us, and rely on
  4195. @@ -3757,8 +3755,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
  4196. }
  4197. flush_data->size++;
  4198. spin_unlock_irqrestore(&flush_data->lock, flags);
  4199. -
  4200. - put_cpu();
  4201. }
  4202. static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
  4203. diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
  4204. index e23001bfcfee..359d5d169ec0 100644
  4205. --- a/drivers/iommu/iova.c
  4206. +++ b/drivers/iommu/iova.c
  4207. @@ -22,6 +22,7 @@
  4208. #include <linux/slab.h>
  4209. #include <linux/smp.h>
  4210. #include <linux/bitops.h>
  4211. +#include <linux/cpu.h>
  4212. static bool iova_rcache_insert(struct iova_domain *iovad,
  4213. unsigned long pfn,
  4214. @@ -420,10 +421,8 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
  4215. /* Try replenishing IOVAs by flushing rcache. */
  4216. flushed_rcache = true;
  4217. - preempt_disable();
  4218. for_each_online_cpu(cpu)
  4219. free_cpu_cached_iovas(cpu, iovad);
  4220. - preempt_enable();
  4221. goto retry;
  4222. }
  4223. @@ -751,7 +750,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
  4224. bool can_insert = false;
  4225. unsigned long flags;
  4226. - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
  4227. + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
  4228. spin_lock_irqsave(&cpu_rcache->lock, flags);
  4229. if (!iova_magazine_full(cpu_rcache->loaded)) {
  4230. @@ -781,7 +780,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
  4231. iova_magazine_push(cpu_rcache->loaded, iova_pfn);
  4232. spin_unlock_irqrestore(&cpu_rcache->lock, flags);
  4233. - put_cpu_ptr(rcache->cpu_rcaches);
  4234. if (mag_to_free) {
  4235. iova_magazine_free_pfns(mag_to_free, iovad);
  4236. @@ -815,7 +813,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
  4237. bool has_pfn = false;
  4238. unsigned long flags;
  4239. - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
  4240. + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
  4241. spin_lock_irqsave(&cpu_rcache->lock, flags);
  4242. if (!iova_magazine_empty(cpu_rcache->loaded)) {
  4243. @@ -837,7 +835,6 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
  4244. iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
  4245. spin_unlock_irqrestore(&cpu_rcache->lock, flags);
  4246. - put_cpu_ptr(rcache->cpu_rcaches);
  4247. return iova_pfn;
  4248. }
  4249. diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
  4250. index 3f9ddb9fafa7..09da5b6b44a1 100644
  4251. --- a/drivers/leds/trigger/Kconfig
  4252. +++ b/drivers/leds/trigger/Kconfig
  4253. @@ -69,7 +69,7 @@ config LEDS_TRIGGER_BACKLIGHT
  4254. config LEDS_TRIGGER_CPU
  4255. bool "LED CPU Trigger"
  4256. - depends on LEDS_TRIGGERS
  4257. + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE
  4258. help
  4259. This allows LEDs to be controlled by active CPUs. This shows
  4260. the active CPUs across an array of LEDs so you can see which
  4261. diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
  4262. index 4d200883c505..98b64ed5cb81 100644
  4263. --- a/drivers/md/bcache/Kconfig
  4264. +++ b/drivers/md/bcache/Kconfig
  4265. @@ -1,6 +1,7 @@
  4266. config BCACHE
  4267. tristate "Block device as cache"
  4268. + depends on !PREEMPT_RT_FULL
  4269. ---help---
  4270. Allows a block device to be used as cache for other devices; uses
  4271. a btree for indexing and the layout is optimized for SSDs.
  4272. diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
  4273. index ba7c4c685db3..834ec328f217 100644
  4274. --- a/drivers/md/dm-rq.c
  4275. +++ b/drivers/md/dm-rq.c
  4276. @@ -842,7 +842,7 @@ static void dm_old_request_fn(struct request_queue *q)
  4277. /* Establish tio->ti before queuing work (map_tio_request) */
  4278. tio->ti = ti;
  4279. kthread_queue_work(&md->kworker, &tio->work);
  4280. - BUG_ON(!irqs_disabled());
  4281. + BUG_ON_NONRT(!irqs_disabled());
  4282. }
  4283. }
  4284. diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
  4285. index 475a7a1bcfe0..8d2c9d70042e 100644
  4286. --- a/drivers/md/raid5.c
  4287. +++ b/drivers/md/raid5.c
  4288. @@ -429,7 +429,7 @@ void raid5_release_stripe(struct stripe_head *sh)
  4289. md_wakeup_thread(conf->mddev->thread);
  4290. return;
  4291. slow_path:
  4292. - local_irq_save(flags);
  4293. + local_irq_save_nort(flags);
  4294. /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
  4295. if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
  4296. INIT_LIST_HEAD(&list);
  4297. @@ -438,7 +438,7 @@ void raid5_release_stripe(struct stripe_head *sh)
  4298. spin_unlock(&conf->device_lock);
  4299. release_inactive_stripe_list(conf, &list, hash);
  4300. }
  4301. - local_irq_restore(flags);
  4302. + local_irq_restore_nort(flags);
  4303. }
  4304. static inline void remove_hash(struct stripe_head *sh)
  4305. @@ -1937,8 +1937,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
  4306. struct raid5_percpu *percpu;
  4307. unsigned long cpu;
  4308. - cpu = get_cpu();
  4309. + cpu = get_cpu_light();
  4310. percpu = per_cpu_ptr(conf->percpu, cpu);
  4311. + spin_lock(&percpu->lock);
  4312. if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
  4313. ops_run_biofill(sh);
  4314. overlap_clear++;
  4315. @@ -1994,7 +1995,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
  4316. if (test_and_clear_bit(R5_Overlap, &dev->flags))
  4317. wake_up(&sh->raid_conf->wait_for_overlap);
  4318. }
  4319. - put_cpu();
  4320. + spin_unlock(&percpu->lock);
  4321. + put_cpu_light();
  4322. }
  4323. static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
  4324. @@ -6410,6 +6412,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
  4325. __func__, cpu);
  4326. return -ENOMEM;
  4327. }
  4328. + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
  4329. return 0;
  4330. }
  4331. @@ -6420,7 +6423,6 @@ static int raid5_alloc_percpu(struct r5conf *conf)
  4332. conf->percpu = alloc_percpu(struct raid5_percpu);
  4333. if (!conf->percpu)
  4334. return -ENOMEM;
  4335. -
  4336. err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node);
  4337. if (!err) {
  4338. conf->scribble_disks = max(conf->raid_disks,
  4339. diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
  4340. index 57ec49f0839e..0739604990b7 100644
  4341. --- a/drivers/md/raid5.h
  4342. +++ b/drivers/md/raid5.h
  4343. @@ -504,6 +504,7 @@ struct r5conf {
  4344. int recovery_disabled;
  4345. /* per cpu variables */
  4346. struct raid5_percpu {
  4347. + spinlock_t lock; /* Protection for -RT */
  4348. struct page *spare_page; /* Used when checking P/Q in raid6 */
  4349. struct flex_array *scribble; /* space for constructing buffer
  4350. * lists and performing address
  4351. diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
  4352. index 64971baf11fa..215e91e36198 100644
  4353. --- a/drivers/misc/Kconfig
  4354. +++ b/drivers/misc/Kconfig
  4355. @@ -54,6 +54,7 @@ config AD525X_DPOT_SPI
  4356. config ATMEL_TCLIB
  4357. bool "Atmel AT32/AT91 Timer/Counter Library"
  4358. depends on (AVR32 || ARCH_AT91)
  4359. + default y if PREEMPT_RT_FULL
  4360. help
  4361. Select this if you want a library to allocate the Timer/Counter
  4362. blocks found on many Atmel processors. This facilitates using
  4363. @@ -69,8 +70,7 @@ config ATMEL_TCB_CLKSRC
  4364. are combined to make a single 32-bit timer.
  4365. When GENERIC_CLOCKEVENTS is defined, the third timer channel
  4366. - may be used as a clock event device supporting oneshot mode
  4367. - (delays of up to two seconds) based on the 32 KiHz clock.
  4368. + may be used as a clock event device supporting oneshot mode.
  4369. config ATMEL_TCB_CLKSRC_BLOCK
  4370. int
  4371. @@ -84,6 +84,15 @@ config ATMEL_TCB_CLKSRC_BLOCK
  4372. TC can be used for other purposes, such as PWM generation and
  4373. interval timing.
  4374. +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  4375. + bool "TC Block use 32 KiHz clock"
  4376. + depends on ATMEL_TCB_CLKSRC
  4377. + default y if !PREEMPT_RT_FULL
  4378. + help
  4379. + Select this to use 32 KiHz base clock rate as TC block clock
  4380. + source for clock events.
  4381. +
  4382. +
  4383. config DUMMY_IRQ
  4384. tristate "Dummy IRQ handler"
  4385. default n
  4386. diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
  4387. index df990bb8c873..1a162709a85e 100644
  4388. --- a/drivers/mmc/host/mmci.c
  4389. +++ b/drivers/mmc/host/mmci.c
  4390. @@ -1147,15 +1147,12 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id)
  4391. struct sg_mapping_iter *sg_miter = &host->sg_miter;
  4392. struct variant_data *variant = host->variant;
  4393. void __iomem *base = host->base;
  4394. - unsigned long flags;
  4395. u32 status;
  4396. status = readl(base + MMCISTATUS);
  4397. dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
  4398. - local_irq_save(flags);
  4399. -
  4400. do {
  4401. unsigned int remain, len;
  4402. char *buffer;
  4403. @@ -1195,8 +1192,6 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id)
  4404. sg_miter_stop(sg_miter);
  4405. - local_irq_restore(flags);
  4406. -
  4407. /*
  4408. * If we have less than the fifo 'half-full' threshold to transfer,
  4409. * trigger a PIO interrupt as soon as any data is available.
  4410. diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
  4411. index 9133e7926da5..63afb921ed40 100644
  4412. --- a/drivers/net/ethernet/3com/3c59x.c
  4413. +++ b/drivers/net/ethernet/3com/3c59x.c
  4414. @@ -842,9 +842,9 @@ static void poll_vortex(struct net_device *dev)
  4415. {
  4416. struct vortex_private *vp = netdev_priv(dev);
  4417. unsigned long flags;
  4418. - local_irq_save(flags);
  4419. + local_irq_save_nort(flags);
  4420. (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
  4421. - local_irq_restore(flags);
  4422. + local_irq_restore_nort(flags);
  4423. }
  4424. #endif
  4425. @@ -1910,12 +1910,12 @@ static void vortex_tx_timeout(struct net_device *dev)
  4426. * Block interrupts because vortex_interrupt does a bare spin_lock()
  4427. */
  4428. unsigned long flags;
  4429. - local_irq_save(flags);
  4430. + local_irq_save_nort(flags);
  4431. if (vp->full_bus_master_tx)
  4432. boomerang_interrupt(dev->irq, dev);
  4433. else
  4434. vortex_interrupt(dev->irq, dev);
  4435. - local_irq_restore(flags);
  4436. + local_irq_restore_nort(flags);
  4437. }
  4438. }
  4439. diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
  4440. index da4c2d8a4173..1420dfb56bac 100644
  4441. --- a/drivers/net/ethernet/realtek/8139too.c
  4442. +++ b/drivers/net/ethernet/realtek/8139too.c
  4443. @@ -2233,7 +2233,7 @@ static void rtl8139_poll_controller(struct net_device *dev)
  4444. struct rtl8139_private *tp = netdev_priv(dev);
  4445. const int irq = tp->pci_dev->irq;
  4446. - disable_irq(irq);
  4447. + disable_irq_nosync(irq);
  4448. rtl8139_interrupt(irq, dev);
  4449. enable_irq(irq);
  4450. }
  4451. diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
  4452. index bca6935a94db..d7a35ee34d03 100644
  4453. --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
  4454. +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
  4455. @@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ezusb_priv *upriv,
  4456. while (!ctx->done.done && msecs--)
  4457. udelay(1000);
  4458. } else {
  4459. - wait_event_interruptible(ctx->done.wait,
  4460. + swait_event_interruptible(ctx->done.wait,
  4461. ctx->done.done);
  4462. }
  4463. break;
  4464. diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
  4465. index bedce3453dd3..faf038978650 100644
  4466. --- a/drivers/pinctrl/qcom/pinctrl-msm.c
  4467. +++ b/drivers/pinctrl/qcom/pinctrl-msm.c
  4468. @@ -61,7 +61,7 @@ struct msm_pinctrl {
  4469. struct notifier_block restart_nb;
  4470. int irq;
  4471. - spinlock_t lock;
  4472. + raw_spinlock_t lock;
  4473. DECLARE_BITMAP(dual_edge_irqs, MAX_NR_GPIO);
  4474. DECLARE_BITMAP(enabled_irqs, MAX_NR_GPIO);
  4475. @@ -153,14 +153,14 @@ static int msm_pinmux_set_mux(struct pinctrl_dev *pctldev,
  4476. if (WARN_ON(i == g->nfuncs))
  4477. return -EINVAL;
  4478. - spin_lock_irqsave(&pctrl->lock, flags);
  4479. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  4480. val = readl(pctrl->regs + g->ctl_reg);
  4481. val &= ~mask;
  4482. val |= i << g->mux_bit;
  4483. writel(val, pctrl->regs + g->ctl_reg);
  4484. - spin_unlock_irqrestore(&pctrl->lock, flags);
  4485. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  4486. return 0;
  4487. }
  4488. @@ -323,14 +323,14 @@ static int msm_config_group_set(struct pinctrl_dev *pctldev,
  4489. break;
  4490. case PIN_CONFIG_OUTPUT:
  4491. /* set output value */
  4492. - spin_lock_irqsave(&pctrl->lock, flags);
  4493. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  4494. val = readl(pctrl->regs + g->io_reg);
  4495. if (arg)
  4496. val |= BIT(g->out_bit);
  4497. else
  4498. val &= ~BIT(g->out_bit);
  4499. writel(val, pctrl->regs + g->io_reg);
  4500. - spin_unlock_irqrestore(&pctrl->lock, flags);
  4501. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  4502. /* enable output */
  4503. arg = 1;
  4504. @@ -351,12 +351,12 @@ static int msm_config_group_set(struct pinctrl_dev *pctldev,
  4505. return -EINVAL;
  4506. }
  4507. - spin_lock_irqsave(&pctrl->lock, flags);
  4508. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  4509. val = readl(pctrl->regs + g->ctl_reg);
  4510. val &= ~(mask << bit);
  4511. val |= arg << bit;
  4512. writel(val, pctrl->regs + g->ctl_reg);
  4513. - spin_unlock_irqrestore(&pctrl->lock, flags);
  4514. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  4515. }
  4516. return 0;
  4517. @@ -384,13 +384,13 @@ static int msm_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
  4518. g = &pctrl->soc->groups[offset];
  4519. - spin_lock_irqsave(&pctrl->lock, flags);
  4520. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  4521. val = readl(pctrl->regs + g->ctl_reg);
  4522. val &= ~BIT(g->oe_bit);
  4523. writel(val, pctrl->regs + g->ctl_reg);
  4524. - spin_unlock_irqrestore(&pctrl->lock, flags);
  4525. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  4526. return 0;
  4527. }
  4528. @@ -404,7 +404,7 @@ static int msm_gpio_direction_output(struct gpio_chip *chip, unsigned offset, in
  4529. g = &pctrl->soc->groups[offset];
  4530. - spin_lock_irqsave(&pctrl->lock, flags);
  4531. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  4532. val = readl(pctrl->regs + g->io_reg);
  4533. if (value)
  4534. @@ -417,7 +417,7 @@ static int msm_gpio_direction_output(struct gpio_chip *chip, unsigned offset, in
  4535. val |= BIT(g->oe_bit);
  4536. writel(val, pctrl->regs + g->ctl_reg);
  4537. - spin_unlock_irqrestore(&pctrl->lock, flags);
  4538. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  4539. return 0;
  4540. }
  4541. @@ -443,7 +443,7 @@ static void msm_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
  4542. g = &pctrl->soc->groups[offset];
  4543. - spin_lock_irqsave(&pctrl->lock, flags);
  4544. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  4545. val = readl(pctrl->regs + g->io_reg);
  4546. if (value)
  4547. @@ -452,7 +452,7 @@ static void msm_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
  4548. val &= ~BIT(g->out_bit);
  4549. writel(val, pctrl->regs + g->io_reg);
  4550. - spin_unlock_irqrestore(&pctrl->lock, flags);
  4551. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  4552. }
  4553. #ifdef CONFIG_DEBUG_FS
  4554. @@ -571,7 +571,7 @@ static void msm_gpio_irq_mask(struct irq_data *d)
  4555. g = &pctrl->soc->groups[d->hwirq];
  4556. - spin_lock_irqsave(&pctrl->lock, flags);
  4557. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  4558. val = readl(pctrl->regs + g->intr_cfg_reg);
  4559. val &= ~BIT(g->intr_enable_bit);
  4560. @@ -579,7 +579,7 @@ static void msm_gpio_irq_mask(struct irq_data *d)
  4561. clear_bit(d->hwirq, pctrl->enabled_irqs);
  4562. - spin_unlock_irqrestore(&pctrl->lock, flags);
  4563. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  4564. }
  4565. static void msm_gpio_irq_unmask(struct irq_data *d)
  4566. @@ -592,7 +592,7 @@ static void msm_gpio_irq_unmask(struct irq_data *d)
  4567. g = &pctrl->soc->groups[d->hwirq];
  4568. - spin_lock_irqsave(&pctrl->lock, flags);
  4569. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  4570. val = readl(pctrl->regs + g->intr_cfg_reg);
  4571. val |= BIT(g->intr_enable_bit);
  4572. @@ -600,7 +600,7 @@ static void msm_gpio_irq_unmask(struct irq_data *d)
  4573. set_bit(d->hwirq, pctrl->enabled_irqs);
  4574. - spin_unlock_irqrestore(&pctrl->lock, flags);
  4575. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  4576. }
  4577. static void msm_gpio_irq_ack(struct irq_data *d)
  4578. @@ -613,7 +613,7 @@ static void msm_gpio_irq_ack(struct irq_data *d)
  4579. g = &pctrl->soc->groups[d->hwirq];
  4580. - spin_lock_irqsave(&pctrl->lock, flags);
  4581. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  4582. val = readl(pctrl->regs + g->intr_status_reg);
  4583. if (g->intr_ack_high)
  4584. @@ -625,7 +625,7 @@ static void msm_gpio_irq_ack(struct irq_data *d)
  4585. if (test_bit(d->hwirq, pctrl->dual_edge_irqs))
  4586. msm_gpio_update_dual_edge_pos(pctrl, g, d);
  4587. - spin_unlock_irqrestore(&pctrl->lock, flags);
  4588. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  4589. }
  4590. static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
  4591. @@ -638,7 +638,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
  4592. g = &pctrl->soc->groups[d->hwirq];
  4593. - spin_lock_irqsave(&pctrl->lock, flags);
  4594. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  4595. /*
  4596. * For hw without possibility of detecting both edges
  4597. @@ -712,7 +712,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
  4598. if (test_bit(d->hwirq, pctrl->dual_edge_irqs))
  4599. msm_gpio_update_dual_edge_pos(pctrl, g, d);
  4600. - spin_unlock_irqrestore(&pctrl->lock, flags);
  4601. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  4602. if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
  4603. irq_set_handler_locked(d, handle_level_irq);
  4604. @@ -728,11 +728,11 @@ static int msm_gpio_irq_set_wake(struct irq_data *d, unsigned int on)
  4605. struct msm_pinctrl *pctrl = gpiochip_get_data(gc);
  4606. unsigned long flags;
  4607. - spin_lock_irqsave(&pctrl->lock, flags);
  4608. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  4609. irq_set_irq_wake(pctrl->irq, on);
  4610. - spin_unlock_irqrestore(&pctrl->lock, flags);
  4611. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  4612. return 0;
  4613. }
  4614. @@ -878,7 +878,7 @@ int msm_pinctrl_probe(struct platform_device *pdev,
  4615. pctrl->soc = soc_data;
  4616. pctrl->chip = msm_gpio_template;
  4617. - spin_lock_init(&pctrl->lock);
  4618. + raw_spin_lock_init(&pctrl->lock);
  4619. res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
  4620. pctrl->regs = devm_ioremap_resource(&pdev->dev, res);
  4621. diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
  4622. index 9bd41a35a78a..8e2d436c2e3f 100644
  4623. --- a/drivers/scsi/fcoe/fcoe.c
  4624. +++ b/drivers/scsi/fcoe/fcoe.c
  4625. @@ -1455,11 +1455,11 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
  4626. static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
  4627. {
  4628. struct fcoe_percpu_s *fps;
  4629. - int rc;
  4630. + int rc, cpu = get_cpu_light();
  4631. - fps = &get_cpu_var(fcoe_percpu);
  4632. + fps = &per_cpu(fcoe_percpu, cpu);
  4633. rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
  4634. - put_cpu_var(fcoe_percpu);
  4635. + put_cpu_light();
  4636. return rc;
  4637. }
  4638. @@ -1646,11 +1646,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport,
  4639. return 0;
  4640. }
  4641. - stats = per_cpu_ptr(lport->stats, get_cpu());
  4642. + stats = per_cpu_ptr(lport->stats, get_cpu_light());
  4643. stats->InvalidCRCCount++;
  4644. if (stats->InvalidCRCCount < 5)
  4645. printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
  4646. - put_cpu();
  4647. + put_cpu_light();
  4648. return -EINVAL;
  4649. }
  4650. @@ -1693,7 +1693,7 @@ static void fcoe_recv_frame(struct sk_buff *skb)
  4651. */
  4652. hp = (struct fcoe_hdr *) skb_network_header(skb);
  4653. - stats = per_cpu_ptr(lport->stats, get_cpu());
  4654. + stats = per_cpu_ptr(lport->stats, get_cpu_light());
  4655. if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) {
  4656. if (stats->ErrorFrames < 5)
  4657. printk(KERN_WARNING "fcoe: FCoE version "
  4658. @@ -1725,13 +1725,13 @@ static void fcoe_recv_frame(struct sk_buff *skb)
  4659. goto drop;
  4660. if (!fcoe_filter_frames(lport, fp)) {
  4661. - put_cpu();
  4662. + put_cpu_light();
  4663. fc_exch_recv(lport, fp);
  4664. return;
  4665. }
  4666. drop:
  4667. stats->ErrorFrames++;
  4668. - put_cpu();
  4669. + put_cpu_light();
  4670. kfree_skb(skb);
  4671. }
  4672. diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
  4673. index dcf36537a767..1a1f2e46452c 100644
  4674. --- a/drivers/scsi/fcoe/fcoe_ctlr.c
  4675. +++ b/drivers/scsi/fcoe/fcoe_ctlr.c
  4676. @@ -834,7 +834,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
  4677. INIT_LIST_HEAD(&del_list);
  4678. - stats = per_cpu_ptr(fip->lp->stats, get_cpu());
  4679. + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
  4680. list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
  4681. deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
  4682. @@ -870,7 +870,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
  4683. sel_time = fcf->time;
  4684. }
  4685. }
  4686. - put_cpu();
  4687. + put_cpu_light();
  4688. list_for_each_entry_safe(fcf, next, &del_list, list) {
  4689. /* Removes fcf from current list */
  4690. diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
  4691. index 16ca31ad5ec0..c3987347e762 100644
  4692. --- a/drivers/scsi/libfc/fc_exch.c
  4693. +++ b/drivers/scsi/libfc/fc_exch.c
  4694. @@ -814,10 +814,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
  4695. }
  4696. memset(ep, 0, sizeof(*ep));
  4697. - cpu = get_cpu();
  4698. + cpu = get_cpu_light();
  4699. pool = per_cpu_ptr(mp->pool, cpu);
  4700. spin_lock_bh(&pool->lock);
  4701. - put_cpu();
  4702. + put_cpu_light();
  4703. /* peek cache of free slot */
  4704. if (pool->left != FC_XID_UNKNOWN) {
  4705. diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
  4706. index 87f5e694dbed..23c0a50fb6aa 100644
  4707. --- a/drivers/scsi/libsas/sas_ata.c
  4708. +++ b/drivers/scsi/libsas/sas_ata.c
  4709. @@ -190,7 +190,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
  4710. /* TODO: audit callers to ensure they are ready for qc_issue to
  4711. * unconditionally re-enable interrupts
  4712. */
  4713. - local_irq_save(flags);
  4714. + local_irq_save_nort(flags);
  4715. spin_unlock(ap->lock);
  4716. /* If the device fell off, no sense in issuing commands */
  4717. @@ -252,7 +252,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
  4718. out:
  4719. spin_lock(ap->lock);
  4720. - local_irq_restore(flags);
  4721. + local_irq_restore_nort(flags);
  4722. return ret;
  4723. }
  4724. diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
  4725. index edc48f3b8230..ee5c6f9dfb6f 100644
  4726. --- a/drivers/scsi/qla2xxx/qla_inline.h
  4727. +++ b/drivers/scsi/qla2xxx/qla_inline.h
  4728. @@ -59,12 +59,12 @@ qla2x00_poll(struct rsp_que *rsp)
  4729. {
  4730. unsigned long flags;
  4731. struct qla_hw_data *ha = rsp->hw;
  4732. - local_irq_save(flags);
  4733. + local_irq_save_nort(flags);
  4734. if (IS_P3P_TYPE(ha))
  4735. qla82xx_poll(0, rsp);
  4736. else
  4737. ha->isp_ops->intr_handler(0, rsp);
  4738. - local_irq_restore(flags);
  4739. + local_irq_restore_nort(flags);
  4740. }
  4741. static inline uint8_t *
  4742. diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
  4743. index bddaabb288d4..8de0ec4222fe 100644
  4744. --- a/drivers/scsi/qla2xxx/qla_isr.c
  4745. +++ b/drivers/scsi/qla2xxx/qla_isr.c
  4746. @@ -3129,7 +3129,11 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp)
  4747. * kref_put().
  4748. */
  4749. kref_get(&qentry->irq_notify.kref);
  4750. +#ifdef CONFIG_PREEMPT_RT_BASE
  4751. + swork_queue(&qentry->irq_notify.swork);
  4752. +#else
  4753. schedule_work(&qentry->irq_notify.work);
  4754. +#endif
  4755. }
  4756. /*
  4757. diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c
  4758. index 95f4c1bcdb4c..0be934799bff 100644
  4759. --- a/drivers/thermal/x86_pkg_temp_thermal.c
  4760. +++ b/drivers/thermal/x86_pkg_temp_thermal.c
  4761. @@ -29,6 +29,7 @@
  4762. #include <linux/pm.h>
  4763. #include <linux/thermal.h>
  4764. #include <linux/debugfs.h>
  4765. +#include <linux/swork.h>
  4766. #include <asm/cpu_device_id.h>
  4767. #include <asm/mce.h>
  4768. @@ -353,7 +354,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
  4769. }
  4770. }
  4771. -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  4772. +static void platform_thermal_notify_work(struct swork_event *event)
  4773. {
  4774. unsigned long flags;
  4775. int cpu = smp_processor_id();
  4776. @@ -370,7 +371,7 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  4777. pkg_work_scheduled[phy_id]) {
  4778. disable_pkg_thres_interrupt();
  4779. spin_unlock_irqrestore(&pkg_work_lock, flags);
  4780. - return -EINVAL;
  4781. + return;
  4782. }
  4783. pkg_work_scheduled[phy_id] = 1;
  4784. spin_unlock_irqrestore(&pkg_work_lock, flags);
  4785. @@ -379,9 +380,48 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  4786. schedule_delayed_work_on(cpu,
  4787. &per_cpu(pkg_temp_thermal_threshold_work, cpu),
  4788. msecs_to_jiffies(notify_delay_ms));
  4789. +}
  4790. +
  4791. +#ifdef CONFIG_PREEMPT_RT_FULL
  4792. +static struct swork_event notify_work;
  4793. +
  4794. +static int thermal_notify_work_init(void)
  4795. +{
  4796. + int err;
  4797. +
  4798. + err = swork_get();
  4799. + if (err)
  4800. + return err;
  4801. +
  4802. + INIT_SWORK(&notify_work, platform_thermal_notify_work);
  4803. return 0;
  4804. }
  4805. +static void thermal_notify_work_cleanup(void)
  4806. +{
  4807. + swork_put();
  4808. +}
  4809. +
  4810. +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  4811. +{
  4812. + swork_queue(&notify_work);
  4813. + return 0;
  4814. +}
  4815. +
  4816. +#else /* !CONFIG_PREEMPT_RT_FULL */
  4817. +
  4818. +static int thermal_notify_work_init(void) { return 0; }
  4819. +
  4820. +static void thermal_notify_work_cleanup(void) { }
  4821. +
  4822. +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  4823. +{
  4824. + platform_thermal_notify_work(NULL);
  4825. +
  4826. + return 0;
  4827. +}
  4828. +#endif /* CONFIG_PREEMPT_RT_FULL */
  4829. +
  4830. static int find_siblings_cpu(int cpu)
  4831. {
  4832. int i;
  4833. @@ -585,6 +625,9 @@ static int __init pkg_temp_thermal_init(void)
  4834. if (!x86_match_cpu(pkg_temp_thermal_ids))
  4835. return -ENODEV;
  4836. + if (!thermal_notify_work_init())
  4837. + return -ENODEV;
  4838. +
  4839. spin_lock_init(&pkg_work_lock);
  4840. platform_thermal_package_notify =
  4841. pkg_temp_thermal_platform_thermal_notify;
  4842. @@ -609,7 +652,7 @@ static int __init pkg_temp_thermal_init(void)
  4843. kfree(pkg_work_scheduled);
  4844. platform_thermal_package_notify = NULL;
  4845. platform_thermal_package_rate_control = NULL;
  4846. -
  4847. + thermal_notify_work_cleanup();
  4848. return -ENODEV;
  4849. }
  4850. @@ -634,6 +677,7 @@ static void __exit pkg_temp_thermal_exit(void)
  4851. mutex_unlock(&phy_dev_list_mutex);
  4852. platform_thermal_package_notify = NULL;
  4853. platform_thermal_package_rate_control = NULL;
  4854. + thermal_notify_work_cleanup();
  4855. for_each_online_cpu(i)
  4856. cancel_delayed_work_sync(
  4857. &per_cpu(pkg_temp_thermal_threshold_work, i));
  4858. diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
  4859. index e8819aa20415..dd7f9bf45d6c 100644
  4860. --- a/drivers/tty/serial/8250/8250_core.c
  4861. +++ b/drivers/tty/serial/8250/8250_core.c
  4862. @@ -58,7 +58,16 @@ static struct uart_driver serial8250_reg;
  4863. static unsigned int skip_txen_test; /* force skip of txen test at init time */
  4864. -#define PASS_LIMIT 512
  4865. +/*
  4866. + * On -rt we can have a more delays, and legitimately
  4867. + * so - so don't drop work spuriously and spam the
  4868. + * syslog:
  4869. + */
  4870. +#ifdef CONFIG_PREEMPT_RT_FULL
  4871. +# define PASS_LIMIT 1000000
  4872. +#else
  4873. +# define PASS_LIMIT 512
  4874. +#endif
  4875. #include <asm/serial.h>
  4876. /*
  4877. diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
  4878. index f6e4373a8850..4620b51b0e7c 100644
  4879. --- a/drivers/tty/serial/8250/8250_port.c
  4880. +++ b/drivers/tty/serial/8250/8250_port.c
  4881. @@ -35,6 +35,7 @@
  4882. #include <linux/nmi.h>
  4883. #include <linux/mutex.h>
  4884. #include <linux/slab.h>
  4885. +#include <linux/kdb.h>
  4886. #include <linux/uaccess.h>
  4887. #include <linux/pm_runtime.h>
  4888. #include <linux/timer.h>
  4889. @@ -3143,9 +3144,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
  4890. serial8250_rpm_get(up);
  4891. - if (port->sysrq)
  4892. + if (port->sysrq || oops_in_progress)
  4893. locked = 0;
  4894. - else if (oops_in_progress)
  4895. + else if (in_kdb_printk())
  4896. locked = spin_trylock_irqsave(&port->lock, flags);
  4897. else
  4898. spin_lock_irqsave(&port->lock, flags);
  4899. diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
  4900. index e2c33b9528d8..53af53c43e8c 100644
  4901. --- a/drivers/tty/serial/amba-pl011.c
  4902. +++ b/drivers/tty/serial/amba-pl011.c
  4903. @@ -2194,13 +2194,19 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
  4904. clk_enable(uap->clk);
  4905. - local_irq_save(flags);
  4906. + /*
  4907. + * local_irq_save(flags);
  4908. + *
  4909. + * This local_irq_save() is nonsense. If we come in via sysrq
  4910. + * handling then interrupts are already disabled. Aside of
  4911. + * that the port.sysrq check is racy on SMP regardless.
  4912. + */
  4913. if (uap->port.sysrq)
  4914. locked = 0;
  4915. else if (oops_in_progress)
  4916. - locked = spin_trylock(&uap->port.lock);
  4917. + locked = spin_trylock_irqsave(&uap->port.lock, flags);
  4918. else
  4919. - spin_lock(&uap->port.lock);
  4920. + spin_lock_irqsave(&uap->port.lock, flags);
  4921. /*
  4922. * First save the CR then disable the interrupts
  4923. @@ -2224,8 +2230,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
  4924. pl011_write(old_cr, uap, REG_CR);
  4925. if (locked)
  4926. - spin_unlock(&uap->port.lock);
  4927. - local_irq_restore(flags);
  4928. + spin_unlock_irqrestore(&uap->port.lock, flags);
  4929. clk_disable(uap->clk);
  4930. }
  4931. diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
  4932. index 472ba3c813c1..e654cb421fb7 100644
  4933. --- a/drivers/tty/serial/omap-serial.c
  4934. +++ b/drivers/tty/serial/omap-serial.c
  4935. @@ -1257,13 +1257,10 @@ serial_omap_console_write(struct console *co, const char *s,
  4936. pm_runtime_get_sync(up->dev);
  4937. - local_irq_save(flags);
  4938. - if (up->port.sysrq)
  4939. - locked = 0;
  4940. - else if (oops_in_progress)
  4941. - locked = spin_trylock(&up->port.lock);
  4942. + if (up->port.sysrq || oops_in_progress)
  4943. + locked = spin_trylock_irqsave(&up->port.lock, flags);
  4944. else
  4945. - spin_lock(&up->port.lock);
  4946. + spin_lock_irqsave(&up->port.lock, flags);
  4947. /*
  4948. * First save the IER then disable the interrupts
  4949. @@ -1292,8 +1289,7 @@ serial_omap_console_write(struct console *co, const char *s,
  4950. pm_runtime_mark_last_busy(up->dev);
  4951. pm_runtime_put_autosuspend(up->dev);
  4952. if (locked)
  4953. - spin_unlock(&up->port.lock);
  4954. - local_irq_restore(flags);
  4955. + spin_unlock_irqrestore(&up->port.lock, flags);
  4956. }
  4957. static int __init
  4958. diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
  4959. index fcc7aa248ce7..fb2c38d875f9 100644
  4960. --- a/drivers/usb/core/hcd.c
  4961. +++ b/drivers/usb/core/hcd.c
  4962. @@ -1764,9 +1764,9 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
  4963. * and no one may trigger the above deadlock situation when
  4964. * running complete() in tasklet.
  4965. */
  4966. - local_irq_save(flags);
  4967. + local_irq_save_nort(flags);
  4968. urb->complete(urb);
  4969. - local_irq_restore(flags);
  4970. + local_irq_restore_nort(flags);
  4971. usb_anchor_resume_wakeups(anchor);
  4972. atomic_dec(&urb->use_count);
  4973. diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
  4974. index 7b107e43b1c4..f1e8534a1748 100644
  4975. --- a/drivers/usb/gadget/function/f_fs.c
  4976. +++ b/drivers/usb/gadget/function/f_fs.c
  4977. @@ -1593,7 +1593,7 @@ static void ffs_data_put(struct ffs_data *ffs)
  4978. pr_info("%s(): freeing\n", __func__);
  4979. ffs_data_clear(ffs);
  4980. BUG_ON(waitqueue_active(&ffs->ev.waitq) ||
  4981. - waitqueue_active(&ffs->ep0req_completion.wait));
  4982. + swait_active(&ffs->ep0req_completion.wait));
  4983. kfree(ffs->dev_name);
  4984. kfree(ffs);
  4985. }
  4986. diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
  4987. index b8534d3f8bb0..8fcaf02e21b0 100644
  4988. --- a/drivers/usb/gadget/legacy/inode.c
  4989. +++ b/drivers/usb/gadget/legacy/inode.c
  4990. @@ -347,7 +347,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
  4991. spin_unlock_irq (&epdata->dev->lock);
  4992. if (likely (value == 0)) {
  4993. - value = wait_event_interruptible (done.wait, done.done);
  4994. + value = swait_event_interruptible (done.wait, done.done);
  4995. if (value != 0) {
  4996. spin_lock_irq (&epdata->dev->lock);
  4997. if (likely (epdata->ep != NULL)) {
  4998. @@ -356,7 +356,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
  4999. usb_ep_dequeue (epdata->ep, epdata->req);
  5000. spin_unlock_irq (&epdata->dev->lock);
  5001. - wait_event (done.wait, done.done);
  5002. + swait_event (done.wait, done.done);
  5003. if (epdata->status == -ECONNRESET)
  5004. epdata->status = -EINTR;
  5005. } else {
  5006. diff --git a/fs/aio.c b/fs/aio.c
  5007. index 0fcb49ad67d4..211ebc21e4db 100644
  5008. --- a/fs/aio.c
  5009. +++ b/fs/aio.c
  5010. @@ -40,6 +40,7 @@
  5011. #include <linux/ramfs.h>
  5012. #include <linux/percpu-refcount.h>
  5013. #include <linux/mount.h>
  5014. +#include <linux/swork.h>
  5015. #include <asm/kmap_types.h>
  5016. #include <asm/uaccess.h>
  5017. @@ -115,7 +116,7 @@ struct kioctx {
  5018. struct page **ring_pages;
  5019. long nr_pages;
  5020. - struct work_struct free_work;
  5021. + struct swork_event free_work;
  5022. /*
  5023. * signals when all in-flight requests are done
  5024. @@ -258,6 +259,7 @@ static int __init aio_setup(void)
  5025. .mount = aio_mount,
  5026. .kill_sb = kill_anon_super,
  5027. };
  5028. + BUG_ON(swork_get());
  5029. aio_mnt = kern_mount(&aio_fs);
  5030. if (IS_ERR(aio_mnt))
  5031. panic("Failed to create aio fs mount.");
  5032. @@ -581,9 +583,9 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
  5033. return cancel(&kiocb->common);
  5034. }
  5035. -static void free_ioctx(struct work_struct *work)
  5036. +static void free_ioctx(struct swork_event *sev)
  5037. {
  5038. - struct kioctx *ctx = container_of(work, struct kioctx, free_work);
  5039. + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
  5040. pr_debug("freeing %p\n", ctx);
  5041. @@ -602,8 +604,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
  5042. if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
  5043. complete(&ctx->rq_wait->comp);
  5044. - INIT_WORK(&ctx->free_work, free_ioctx);
  5045. - schedule_work(&ctx->free_work);
  5046. + INIT_SWORK(&ctx->free_work, free_ioctx);
  5047. + swork_queue(&ctx->free_work);
  5048. }
  5049. /*
  5050. @@ -611,9 +613,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
  5051. * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
  5052. * now it's safe to cancel any that need to be.
  5053. */
  5054. -static void free_ioctx_users(struct percpu_ref *ref)
  5055. +static void free_ioctx_users_work(struct swork_event *sev)
  5056. {
  5057. - struct kioctx *ctx = container_of(ref, struct kioctx, users);
  5058. + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
  5059. struct aio_kiocb *req;
  5060. spin_lock_irq(&ctx->ctx_lock);
  5061. @@ -632,6 +634,14 @@ static void free_ioctx_users(struct percpu_ref *ref)
  5062. percpu_ref_put(&ctx->reqs);
  5063. }
  5064. +static void free_ioctx_users(struct percpu_ref *ref)
  5065. +{
  5066. + struct kioctx *ctx = container_of(ref, struct kioctx, users);
  5067. +
  5068. + INIT_SWORK(&ctx->free_work, free_ioctx_users_work);
  5069. + swork_queue(&ctx->free_work);
  5070. +}
  5071. +
  5072. static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
  5073. {
  5074. unsigned i, new_nr;
  5075. diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
  5076. index a1fba4285277..3796769b4cd1 100644
  5077. --- a/fs/autofs4/autofs_i.h
  5078. +++ b/fs/autofs4/autofs_i.h
  5079. @@ -31,6 +31,7 @@
  5080. #include <linux/sched.h>
  5081. #include <linux/mount.h>
  5082. #include <linux/namei.h>
  5083. +#include <linux/delay.h>
  5084. #include <asm/current.h>
  5085. #include <linux/uaccess.h>
  5086. diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
  5087. index d8e6d421c27f..2e689ab1306b 100644
  5088. --- a/fs/autofs4/expire.c
  5089. +++ b/fs/autofs4/expire.c
  5090. @@ -148,7 +148,7 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev,
  5091. parent = p->d_parent;
  5092. if (!spin_trylock(&parent->d_lock)) {
  5093. spin_unlock(&p->d_lock);
  5094. - cpu_relax();
  5095. + cpu_chill();
  5096. goto relock;
  5097. }
  5098. spin_unlock(&p->d_lock);
  5099. diff --git a/fs/buffer.c b/fs/buffer.c
  5100. index 5d8f496d624e..48074bd91ea3 100644
  5101. --- a/fs/buffer.c
  5102. +++ b/fs/buffer.c
  5103. @@ -301,8 +301,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
  5104. * decide that the page is now completely done.
  5105. */
  5106. first = page_buffers(page);
  5107. - local_irq_save(flags);
  5108. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  5109. + flags = bh_uptodate_lock_irqsave(first);
  5110. clear_buffer_async_read(bh);
  5111. unlock_buffer(bh);
  5112. tmp = bh;
  5113. @@ -315,8 +314,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
  5114. }
  5115. tmp = tmp->b_this_page;
  5116. } while (tmp != bh);
  5117. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  5118. - local_irq_restore(flags);
  5119. + bh_uptodate_unlock_irqrestore(first, flags);
  5120. /*
  5121. * If none of the buffers had errors and they are all
  5122. @@ -328,9 +326,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
  5123. return;
  5124. still_busy:
  5125. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  5126. - local_irq_restore(flags);
  5127. - return;
  5128. + bh_uptodate_unlock_irqrestore(first, flags);
  5129. }
  5130. /*
  5131. @@ -358,8 +354,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
  5132. }
  5133. first = page_buffers(page);
  5134. - local_irq_save(flags);
  5135. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  5136. + flags = bh_uptodate_lock_irqsave(first);
  5137. clear_buffer_async_write(bh);
  5138. unlock_buffer(bh);
  5139. @@ -371,15 +366,12 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
  5140. }
  5141. tmp = tmp->b_this_page;
  5142. }
  5143. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  5144. - local_irq_restore(flags);
  5145. + bh_uptodate_unlock_irqrestore(first, flags);
  5146. end_page_writeback(page);
  5147. return;
  5148. still_busy:
  5149. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  5150. - local_irq_restore(flags);
  5151. - return;
  5152. + bh_uptodate_unlock_irqrestore(first, flags);
  5153. }
  5154. EXPORT_SYMBOL(end_buffer_async_write);
  5155. @@ -3383,6 +3375,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
  5156. struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
  5157. if (ret) {
  5158. INIT_LIST_HEAD(&ret->b_assoc_buffers);
  5159. + buffer_head_init_locks(ret);
  5160. preempt_disable();
  5161. __this_cpu_inc(bh_accounting.nr);
  5162. recalc_bh_state();
  5163. diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
  5164. index a27fc8791551..791aecb7c1ac 100644
  5165. --- a/fs/cifs/readdir.c
  5166. +++ b/fs/cifs/readdir.c
  5167. @@ -80,7 +80,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
  5168. struct inode *inode;
  5169. struct super_block *sb = parent->d_sb;
  5170. struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
  5171. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  5172. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  5173. cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
  5174. diff --git a/fs/dcache.c b/fs/dcache.c
  5175. index 67957f5b325c..f0719b2f1be5 100644
  5176. --- a/fs/dcache.c
  5177. +++ b/fs/dcache.c
  5178. @@ -19,6 +19,7 @@
  5179. #include <linux/mm.h>
  5180. #include <linux/fs.h>
  5181. #include <linux/fsnotify.h>
  5182. +#include <linux/delay.h>
  5183. #include <linux/slab.h>
  5184. #include <linux/init.h>
  5185. #include <linux/hash.h>
  5186. @@ -777,6 +778,8 @@ static inline bool fast_dput(struct dentry *dentry)
  5187. */
  5188. void dput(struct dentry *dentry)
  5189. {
  5190. + struct dentry *parent;
  5191. +
  5192. if (unlikely(!dentry))
  5193. return;
  5194. @@ -815,9 +818,18 @@ void dput(struct dentry *dentry)
  5195. return;
  5196. kill_it:
  5197. - dentry = dentry_kill(dentry);
  5198. - if (dentry) {
  5199. - cond_resched();
  5200. + parent = dentry_kill(dentry);
  5201. + if (parent) {
  5202. + int r;
  5203. +
  5204. + if (parent == dentry) {
  5205. + /* the task with the highest priority won't schedule */
  5206. + r = cond_resched();
  5207. + if (!r)
  5208. + cpu_chill();
  5209. + } else {
  5210. + dentry = parent;
  5211. + }
  5212. goto repeat;
  5213. }
  5214. }
  5215. @@ -2352,7 +2364,7 @@ void d_delete(struct dentry * dentry)
  5216. if (dentry->d_lockref.count == 1) {
  5217. if (!spin_trylock(&inode->i_lock)) {
  5218. spin_unlock(&dentry->d_lock);
  5219. - cpu_relax();
  5220. + cpu_chill();
  5221. goto again;
  5222. }
  5223. dentry->d_flags &= ~DCACHE_CANT_MOUNT;
  5224. @@ -2397,9 +2409,10 @@ EXPORT_SYMBOL(d_rehash);
  5225. static inline unsigned start_dir_add(struct inode *dir)
  5226. {
  5227. + preempt_disable_rt();
  5228. for (;;) {
  5229. - unsigned n = dir->i_dir_seq;
  5230. - if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
  5231. + unsigned n = dir->__i_dir_seq;
  5232. + if (!(n & 1) && cmpxchg(&dir->__i_dir_seq, n, n + 1) == n)
  5233. return n;
  5234. cpu_relax();
  5235. }
  5236. @@ -2407,26 +2420,30 @@ static inline unsigned start_dir_add(struct inode *dir)
  5237. static inline void end_dir_add(struct inode *dir, unsigned n)
  5238. {
  5239. - smp_store_release(&dir->i_dir_seq, n + 2);
  5240. + smp_store_release(&dir->__i_dir_seq, n + 2);
  5241. + preempt_enable_rt();
  5242. }
  5243. static void d_wait_lookup(struct dentry *dentry)
  5244. {
  5245. - if (d_in_lookup(dentry)) {
  5246. - DECLARE_WAITQUEUE(wait, current);
  5247. - add_wait_queue(dentry->d_wait, &wait);
  5248. - do {
  5249. - set_current_state(TASK_UNINTERRUPTIBLE);
  5250. - spin_unlock(&dentry->d_lock);
  5251. - schedule();
  5252. - spin_lock(&dentry->d_lock);
  5253. - } while (d_in_lookup(dentry));
  5254. - }
  5255. + struct swait_queue __wait;
  5256. +
  5257. + if (!d_in_lookup(dentry))
  5258. + return;
  5259. +
  5260. + INIT_LIST_HEAD(&__wait.task_list);
  5261. + do {
  5262. + prepare_to_swait(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE);
  5263. + spin_unlock(&dentry->d_lock);
  5264. + schedule();
  5265. + spin_lock(&dentry->d_lock);
  5266. + } while (d_in_lookup(dentry));
  5267. + finish_swait(dentry->d_wait, &__wait);
  5268. }
  5269. struct dentry *d_alloc_parallel(struct dentry *parent,
  5270. const struct qstr *name,
  5271. - wait_queue_head_t *wq)
  5272. + struct swait_queue_head *wq)
  5273. {
  5274. unsigned int hash = name->hash;
  5275. struct hlist_bl_head *b = in_lookup_hash(parent, hash);
  5276. @@ -2440,7 +2457,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
  5277. retry:
  5278. rcu_read_lock();
  5279. - seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1;
  5280. + seq = smp_load_acquire(&parent->d_inode->__i_dir_seq) & ~1;
  5281. r_seq = read_seqbegin(&rename_lock);
  5282. dentry = __d_lookup_rcu(parent, name, &d_seq);
  5283. if (unlikely(dentry)) {
  5284. @@ -2462,7 +2479,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
  5285. goto retry;
  5286. }
  5287. hlist_bl_lock(b);
  5288. - if (unlikely(parent->d_inode->i_dir_seq != seq)) {
  5289. + if (unlikely(parent->d_inode->__i_dir_seq != seq)) {
  5290. hlist_bl_unlock(b);
  5291. rcu_read_unlock();
  5292. goto retry;
  5293. @@ -2535,7 +2552,7 @@ void __d_lookup_done(struct dentry *dentry)
  5294. hlist_bl_lock(b);
  5295. dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
  5296. __hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
  5297. - wake_up_all(dentry->d_wait);
  5298. + swake_up_all(dentry->d_wait);
  5299. dentry->d_wait = NULL;
  5300. hlist_bl_unlock(b);
  5301. INIT_HLIST_NODE(&dentry->d_u.d_alias);
  5302. @@ -3632,6 +3649,11 @@ EXPORT_SYMBOL(d_genocide);
  5303. void __init vfs_caches_init_early(void)
  5304. {
  5305. + int i;
  5306. +
  5307. + for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++)
  5308. + INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]);
  5309. +
  5310. dcache_init_early();
  5311. inode_init_early();
  5312. }
  5313. diff --git a/fs/eventpoll.c b/fs/eventpoll.c
  5314. index 3cbc30413add..41a94f552aab 100644
  5315. --- a/fs/eventpoll.c
  5316. +++ b/fs/eventpoll.c
  5317. @@ -510,12 +510,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
  5318. */
  5319. static void ep_poll_safewake(wait_queue_head_t *wq)
  5320. {
  5321. - int this_cpu = get_cpu();
  5322. + int this_cpu = get_cpu_light();
  5323. ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
  5324. ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
  5325. - put_cpu();
  5326. + put_cpu_light();
  5327. }
  5328. static void ep_remove_wait_queue(struct eppoll_entry *pwq)
  5329. diff --git a/fs/exec.c b/fs/exec.c
  5330. index b8c43be24751..71f4c6ec2bb8 100644
  5331. --- a/fs/exec.c
  5332. +++ b/fs/exec.c
  5333. @@ -1038,12 +1038,14 @@ static int exec_mmap(struct mm_struct *mm)
  5334. }
  5335. }
  5336. task_lock(tsk);
  5337. + preempt_disable_rt();
  5338. active_mm = tsk->active_mm;
  5339. tsk->mm = mm;
  5340. tsk->active_mm = mm;
  5341. activate_mm(active_mm, mm);
  5342. tsk->mm->vmacache_seqnum = 0;
  5343. vmacache_flush(tsk);
  5344. + preempt_enable_rt();
  5345. task_unlock(tsk);
  5346. if (old_mm) {
  5347. up_read(&old_mm->mmap_sem);
  5348. diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
  5349. index 0094923e5ebf..37fa06ef5417 100644
  5350. --- a/fs/ext4/page-io.c
  5351. +++ b/fs/ext4/page-io.c
  5352. @@ -95,8 +95,7 @@ static void ext4_finish_bio(struct bio *bio)
  5353. * We check all buffers in the page under BH_Uptodate_Lock
  5354. * to avoid races with other end io clearing async_write flags
  5355. */
  5356. - local_irq_save(flags);
  5357. - bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
  5358. + flags = bh_uptodate_lock_irqsave(head);
  5359. do {
  5360. if (bh_offset(bh) < bio_start ||
  5361. bh_offset(bh) + bh->b_size > bio_end) {
  5362. @@ -108,8 +107,7 @@ static void ext4_finish_bio(struct bio *bio)
  5363. if (bio->bi_error)
  5364. buffer_io_error(bh);
  5365. } while ((bh = bh->b_this_page) != head);
  5366. - bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
  5367. - local_irq_restore(flags);
  5368. + bh_uptodate_unlock_irqrestore(head, flags);
  5369. if (!under_io) {
  5370. #ifdef CONFIG_EXT4_FS_ENCRYPTION
  5371. if (data_page)
  5372. diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
  5373. index 4bbad745415a..5f91ca248ab0 100644
  5374. --- a/fs/fuse/dir.c
  5375. +++ b/fs/fuse/dir.c
  5376. @@ -1191,7 +1191,7 @@ static int fuse_direntplus_link(struct file *file,
  5377. struct inode *dir = d_inode(parent);
  5378. struct fuse_conn *fc;
  5379. struct inode *inode;
  5380. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  5381. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  5382. if (!o->nodeid) {
  5383. /*
  5384. diff --git a/fs/inode.c b/fs/inode.c
  5385. index 920aa0b1c6b0..3d6b5fd1bf06 100644
  5386. --- a/fs/inode.c
  5387. +++ b/fs/inode.c
  5388. @@ -153,7 +153,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
  5389. inode->i_bdev = NULL;
  5390. inode->i_cdev = NULL;
  5391. inode->i_link = NULL;
  5392. - inode->i_dir_seq = 0;
  5393. + inode->__i_dir_seq = 0;
  5394. inode->i_rdev = 0;
  5395. inode->dirtied_when = 0;
  5396. diff --git a/fs/libfs.c b/fs/libfs.c
  5397. index 9588780ad43e..9b37abd354c9 100644
  5398. --- a/fs/libfs.c
  5399. +++ b/fs/libfs.c
  5400. @@ -89,7 +89,7 @@ static struct dentry *next_positive(struct dentry *parent,
  5401. struct list_head *from,
  5402. int count)
  5403. {
  5404. - unsigned *seq = &parent->d_inode->i_dir_seq, n;
  5405. + unsigned *seq = &parent->d_inode->__i_dir_seq, n;
  5406. struct dentry *res;
  5407. struct list_head *p;
  5408. bool skipped;
  5409. @@ -122,8 +122,9 @@ static struct dentry *next_positive(struct dentry *parent,
  5410. static void move_cursor(struct dentry *cursor, struct list_head *after)
  5411. {
  5412. struct dentry *parent = cursor->d_parent;
  5413. - unsigned n, *seq = &parent->d_inode->i_dir_seq;
  5414. + unsigned n, *seq = &parent->d_inode->__i_dir_seq;
  5415. spin_lock(&parent->d_lock);
  5416. + preempt_disable_rt();
  5417. for (;;) {
  5418. n = *seq;
  5419. if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
  5420. @@ -136,6 +137,7 @@ static void move_cursor(struct dentry *cursor, struct list_head *after)
  5421. else
  5422. list_add_tail(&cursor->d_child, &parent->d_subdirs);
  5423. smp_store_release(seq, n + 2);
  5424. + preempt_enable_rt();
  5425. spin_unlock(&parent->d_lock);
  5426. }
  5427. diff --git a/fs/locks.c b/fs/locks.c
  5428. index 22c5b4aa4961..269c6a44449a 100644
  5429. --- a/fs/locks.c
  5430. +++ b/fs/locks.c
  5431. @@ -935,7 +935,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request)
  5432. return -ENOMEM;
  5433. }
  5434. - percpu_down_read_preempt_disable(&file_rwsem);
  5435. + percpu_down_read(&file_rwsem);
  5436. spin_lock(&ctx->flc_lock);
  5437. if (request->fl_flags & FL_ACCESS)
  5438. goto find_conflict;
  5439. @@ -976,7 +976,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request)
  5440. out:
  5441. spin_unlock(&ctx->flc_lock);
  5442. - percpu_up_read_preempt_enable(&file_rwsem);
  5443. + percpu_up_read(&file_rwsem);
  5444. if (new_fl)
  5445. locks_free_lock(new_fl);
  5446. locks_dispose_list(&dispose);
  5447. @@ -1013,7 +1013,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
  5448. new_fl2 = locks_alloc_lock();
  5449. }
  5450. - percpu_down_read_preempt_disable(&file_rwsem);
  5451. + percpu_down_read(&file_rwsem);
  5452. spin_lock(&ctx->flc_lock);
  5453. /*
  5454. * New lock request. Walk all POSIX locks and look for conflicts. If
  5455. @@ -1185,7 +1185,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
  5456. }
  5457. out:
  5458. spin_unlock(&ctx->flc_lock);
  5459. - percpu_up_read_preempt_enable(&file_rwsem);
  5460. + percpu_up_read(&file_rwsem);
  5461. /*
  5462. * Free any unused locks.
  5463. */
  5464. @@ -1460,7 +1460,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
  5465. return error;
  5466. }
  5467. - percpu_down_read_preempt_disable(&file_rwsem);
  5468. + percpu_down_read(&file_rwsem);
  5469. spin_lock(&ctx->flc_lock);
  5470. time_out_leases(inode, &dispose);
  5471. @@ -1512,13 +1512,13 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
  5472. locks_insert_block(fl, new_fl);
  5473. trace_break_lease_block(inode, new_fl);
  5474. spin_unlock(&ctx->flc_lock);
  5475. - percpu_up_read_preempt_enable(&file_rwsem);
  5476. + percpu_up_read(&file_rwsem);
  5477. locks_dispose_list(&dispose);
  5478. error = wait_event_interruptible_timeout(new_fl->fl_wait,
  5479. !new_fl->fl_next, break_time);
  5480. - percpu_down_read_preempt_disable(&file_rwsem);
  5481. + percpu_down_read(&file_rwsem);
  5482. spin_lock(&ctx->flc_lock);
  5483. trace_break_lease_unblock(inode, new_fl);
  5484. locks_delete_block(new_fl);
  5485. @@ -1535,7 +1535,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
  5486. }
  5487. out:
  5488. spin_unlock(&ctx->flc_lock);
  5489. - percpu_up_read_preempt_enable(&file_rwsem);
  5490. + percpu_up_read(&file_rwsem);
  5491. locks_dispose_list(&dispose);
  5492. locks_free_lock(new_fl);
  5493. return error;
  5494. @@ -1609,7 +1609,7 @@ int fcntl_getlease(struct file *filp)
  5495. ctx = smp_load_acquire(&inode->i_flctx);
  5496. if (ctx && !list_empty_careful(&ctx->flc_lease)) {
  5497. - percpu_down_read_preempt_disable(&file_rwsem);
  5498. + percpu_down_read(&file_rwsem);
  5499. spin_lock(&ctx->flc_lock);
  5500. time_out_leases(inode, &dispose);
  5501. list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
  5502. @@ -1619,7 +1619,7 @@ int fcntl_getlease(struct file *filp)
  5503. break;
  5504. }
  5505. spin_unlock(&ctx->flc_lock);
  5506. - percpu_up_read_preempt_enable(&file_rwsem);
  5507. + percpu_up_read(&file_rwsem);
  5508. locks_dispose_list(&dispose);
  5509. }
  5510. @@ -1694,7 +1694,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
  5511. return -EINVAL;
  5512. }
  5513. - percpu_down_read_preempt_disable(&file_rwsem);
  5514. + percpu_down_read(&file_rwsem);
  5515. spin_lock(&ctx->flc_lock);
  5516. time_out_leases(inode, &dispose);
  5517. error = check_conflicting_open(dentry, arg, lease->fl_flags);
  5518. @@ -1765,7 +1765,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
  5519. lease->fl_lmops->lm_setup(lease, priv);
  5520. out:
  5521. spin_unlock(&ctx->flc_lock);
  5522. - percpu_up_read_preempt_enable(&file_rwsem);
  5523. + percpu_up_read(&file_rwsem);
  5524. locks_dispose_list(&dispose);
  5525. if (is_deleg)
  5526. inode_unlock(inode);
  5527. @@ -1788,7 +1788,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
  5528. return error;
  5529. }
  5530. - percpu_down_read_preempt_disable(&file_rwsem);
  5531. + percpu_down_read(&file_rwsem);
  5532. spin_lock(&ctx->flc_lock);
  5533. list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
  5534. if (fl->fl_file == filp &&
  5535. @@ -1801,7 +1801,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
  5536. if (victim)
  5537. error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
  5538. spin_unlock(&ctx->flc_lock);
  5539. - percpu_up_read_preempt_enable(&file_rwsem);
  5540. + percpu_up_read(&file_rwsem);
  5541. locks_dispose_list(&dispose);
  5542. return error;
  5543. }
  5544. @@ -2532,13 +2532,13 @@ locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
  5545. if (list_empty(&ctx->flc_lease))
  5546. return;
  5547. - percpu_down_read_preempt_disable(&file_rwsem);
  5548. + percpu_down_read(&file_rwsem);
  5549. spin_lock(&ctx->flc_lock);
  5550. list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list)
  5551. if (filp == fl->fl_file)
  5552. lease_modify(fl, F_UNLCK, &dispose);
  5553. spin_unlock(&ctx->flc_lock);
  5554. - percpu_up_read_preempt_enable(&file_rwsem);
  5555. + percpu_up_read(&file_rwsem);
  5556. locks_dispose_list(&dispose);
  5557. }
  5558. diff --git a/fs/namei.c b/fs/namei.c
  5559. index e7d125c23aa6..072a2f724437 100644
  5560. --- a/fs/namei.c
  5561. +++ b/fs/namei.c
  5562. @@ -1626,7 +1626,7 @@ static struct dentry *lookup_slow(const struct qstr *name,
  5563. {
  5564. struct dentry *dentry = ERR_PTR(-ENOENT), *old;
  5565. struct inode *inode = dir->d_inode;
  5566. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  5567. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  5568. inode_lock_shared(inode);
  5569. /* Don't go there if it's already dead */
  5570. @@ -3089,7 +3089,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
  5571. struct dentry *dentry;
  5572. int error, create_error = 0;
  5573. umode_t mode = op->mode;
  5574. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  5575. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  5576. if (unlikely(IS_DEADDIR(dir_inode)))
  5577. return -ENOENT;
  5578. diff --git a/fs/namespace.c b/fs/namespace.c
  5579. index d7360f9897b4..da188c6966a3 100644
  5580. --- a/fs/namespace.c
  5581. +++ b/fs/namespace.c
  5582. @@ -14,6 +14,7 @@
  5583. #include <linux/mnt_namespace.h>
  5584. #include <linux/user_namespace.h>
  5585. #include <linux/namei.h>
  5586. +#include <linux/delay.h>
  5587. #include <linux/security.h>
  5588. #include <linux/idr.h>
  5589. #include <linux/init.h> /* init_rootfs */
  5590. @@ -357,8 +358,11 @@ int __mnt_want_write(struct vfsmount *m)
  5591. * incremented count after it has set MNT_WRITE_HOLD.
  5592. */
  5593. smp_mb();
  5594. - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
  5595. - cpu_relax();
  5596. + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
  5597. + preempt_enable();
  5598. + cpu_chill();
  5599. + preempt_disable();
  5600. + }
  5601. /*
  5602. * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
  5603. * be set to match its requirements. So we must not load that until
  5604. diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
  5605. index dff600ae0d74..d726d2e09353 100644
  5606. --- a/fs/nfs/delegation.c
  5607. +++ b/fs/nfs/delegation.c
  5608. @@ -150,11 +150,11 @@ static int nfs_delegation_claim_opens(struct inode *inode,
  5609. sp = state->owner;
  5610. /* Block nfs4_proc_unlck */
  5611. mutex_lock(&sp->so_delegreturn_mutex);
  5612. - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
  5613. + seq = read_seqbegin(&sp->so_reclaim_seqlock);
  5614. err = nfs4_open_delegation_recall(ctx, state, stateid, type);
  5615. if (!err)
  5616. err = nfs_delegation_claim_locks(ctx, state, stateid);
  5617. - if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
  5618. + if (!err && read_seqretry(&sp->so_reclaim_seqlock, seq))
  5619. err = -EAGAIN;
  5620. mutex_unlock(&sp->so_delegreturn_mutex);
  5621. put_nfs_open_context(ctx);
  5622. diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
  5623. index 1e5321d1ed22..2510f2be8557 100644
  5624. --- a/fs/nfs/dir.c
  5625. +++ b/fs/nfs/dir.c
  5626. @@ -485,7 +485,7 @@ static
  5627. void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
  5628. {
  5629. struct qstr filename = QSTR_INIT(entry->name, entry->len);
  5630. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  5631. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  5632. struct dentry *dentry;
  5633. struct dentry *alias;
  5634. struct inode *dir = d_inode(parent);
  5635. @@ -1492,7 +1492,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
  5636. struct file *file, unsigned open_flags,
  5637. umode_t mode, int *opened)
  5638. {
  5639. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  5640. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  5641. struct nfs_open_context *ctx;
  5642. struct dentry *res;
  5643. struct iattr attr = { .ia_valid = ATTR_OPEN };
  5644. @@ -1807,7 +1807,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
  5645. trace_nfs_rmdir_enter(dir, dentry);
  5646. if (d_really_is_positive(dentry)) {
  5647. +#ifdef CONFIG_PREEMPT_RT_BASE
  5648. + down(&NFS_I(d_inode(dentry))->rmdir_sem);
  5649. +#else
  5650. down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
  5651. +#endif
  5652. error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
  5653. /* Ensure the VFS deletes this inode */
  5654. switch (error) {
  5655. @@ -1817,7 +1821,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
  5656. case -ENOENT:
  5657. nfs_dentry_handle_enoent(dentry);
  5658. }
  5659. +#ifdef CONFIG_PREEMPT_RT_BASE
  5660. + up(&NFS_I(d_inode(dentry))->rmdir_sem);
  5661. +#else
  5662. up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
  5663. +#endif
  5664. } else
  5665. error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
  5666. trace_nfs_rmdir_exit(dir, dentry, error);
  5667. diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
  5668. index 76ae25661d3f..89159d298278 100644
  5669. --- a/fs/nfs/inode.c
  5670. +++ b/fs/nfs/inode.c
  5671. @@ -1957,7 +1957,11 @@ static void init_once(void *foo)
  5672. nfsi->nrequests = 0;
  5673. nfsi->commit_info.ncommit = 0;
  5674. atomic_set(&nfsi->commit_info.rpcs_out, 0);
  5675. +#ifdef CONFIG_PREEMPT_RT_BASE
  5676. + sema_init(&nfsi->rmdir_sem, 1);
  5677. +#else
  5678. init_rwsem(&nfsi->rmdir_sem);
  5679. +#endif
  5680. nfs4_init_once(nfsi);
  5681. }
  5682. diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
  5683. index 1452177c822d..f43b01d54c59 100644
  5684. --- a/fs/nfs/nfs4_fs.h
  5685. +++ b/fs/nfs/nfs4_fs.h
  5686. @@ -111,7 +111,7 @@ struct nfs4_state_owner {
  5687. unsigned long so_flags;
  5688. struct list_head so_states;
  5689. struct nfs_seqid_counter so_seqid;
  5690. - seqcount_t so_reclaim_seqcount;
  5691. + seqlock_t so_reclaim_seqlock;
  5692. struct mutex so_delegreturn_mutex;
  5693. };
  5694. diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
  5695. index 4638654e26f3..5dd6fd555c72 100644
  5696. --- a/fs/nfs/nfs4proc.c
  5697. +++ b/fs/nfs/nfs4proc.c
  5698. @@ -2691,7 +2691,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
  5699. unsigned int seq;
  5700. int ret;
  5701. - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
  5702. + seq = raw_seqcount_begin(&sp->so_reclaim_seqlock.seqcount);
  5703. ret = _nfs4_proc_open(opendata);
  5704. if (ret != 0)
  5705. @@ -2729,7 +2729,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
  5706. if (d_inode(dentry) == state->inode) {
  5707. nfs_inode_attach_open_context(ctx);
  5708. - if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
  5709. + if (read_seqretry(&sp->so_reclaim_seqlock, seq))
  5710. nfs4_schedule_stateid_recovery(server, state);
  5711. }
  5712. out:
  5713. diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
  5714. index 71deeae6eefd..4be6999299dc 100644
  5715. --- a/fs/nfs/nfs4state.c
  5716. +++ b/fs/nfs/nfs4state.c
  5717. @@ -488,7 +488,7 @@ nfs4_alloc_state_owner(struct nfs_server *server,
  5718. nfs4_init_seqid_counter(&sp->so_seqid);
  5719. atomic_set(&sp->so_count, 1);
  5720. INIT_LIST_HEAD(&sp->so_lru);
  5721. - seqcount_init(&sp->so_reclaim_seqcount);
  5722. + seqlock_init(&sp->so_reclaim_seqlock);
  5723. mutex_init(&sp->so_delegreturn_mutex);
  5724. return sp;
  5725. }
  5726. @@ -1498,8 +1498,12 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
  5727. * recovering after a network partition or a reboot from a
  5728. * server that doesn't support a grace period.
  5729. */
  5730. +#ifdef CONFIG_PREEMPT_RT_FULL
  5731. + write_seqlock(&sp->so_reclaim_seqlock);
  5732. +#else
  5733. + write_seqcount_begin(&sp->so_reclaim_seqlock.seqcount);
  5734. +#endif
  5735. spin_lock(&sp->so_lock);
  5736. - raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
  5737. restart:
  5738. list_for_each_entry(state, &sp->so_states, open_states) {
  5739. if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
  5740. @@ -1568,14 +1572,20 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
  5741. spin_lock(&sp->so_lock);
  5742. goto restart;
  5743. }
  5744. - raw_write_seqcount_end(&sp->so_reclaim_seqcount);
  5745. spin_unlock(&sp->so_lock);
  5746. +#ifdef CONFIG_PREEMPT_RT_FULL
  5747. + write_sequnlock(&sp->so_reclaim_seqlock);
  5748. +#else
  5749. + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount);
  5750. +#endif
  5751. return 0;
  5752. out_err:
  5753. nfs4_put_open_state(state);
  5754. - spin_lock(&sp->so_lock);
  5755. - raw_write_seqcount_end(&sp->so_reclaim_seqcount);
  5756. - spin_unlock(&sp->so_lock);
  5757. +#ifdef CONFIG_PREEMPT_RT_FULL
  5758. + write_sequnlock(&sp->so_reclaim_seqlock);
  5759. +#else
  5760. + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount);
  5761. +#endif
  5762. return status;
  5763. }
  5764. diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
  5765. index 191aa577dd1f..58990c8f52e0 100644
  5766. --- a/fs/nfs/unlink.c
  5767. +++ b/fs/nfs/unlink.c
  5768. @@ -12,7 +12,7 @@
  5769. #include <linux/sunrpc/clnt.h>
  5770. #include <linux/nfs_fs.h>
  5771. #include <linux/sched.h>
  5772. -#include <linux/wait.h>
  5773. +#include <linux/swait.h>
  5774. #include <linux/namei.h>
  5775. #include <linux/fsnotify.h>
  5776. @@ -51,6 +51,29 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
  5777. rpc_restart_call_prepare(task);
  5778. }
  5779. +#ifdef CONFIG_PREEMPT_RT_BASE
  5780. +static void nfs_down_anon(struct semaphore *sema)
  5781. +{
  5782. + down(sema);
  5783. +}
  5784. +
  5785. +static void nfs_up_anon(struct semaphore *sema)
  5786. +{
  5787. + up(sema);
  5788. +}
  5789. +
  5790. +#else
  5791. +static void nfs_down_anon(struct rw_semaphore *rwsem)
  5792. +{
  5793. + down_read_non_owner(rwsem);
  5794. +}
  5795. +
  5796. +static void nfs_up_anon(struct rw_semaphore *rwsem)
  5797. +{
  5798. + up_read_non_owner(rwsem);
  5799. +}
  5800. +#endif
  5801. +
  5802. /**
  5803. * nfs_async_unlink_release - Release the sillydelete data.
  5804. * @task: rpc_task of the sillydelete
  5805. @@ -64,7 +87,7 @@ static void nfs_async_unlink_release(void *calldata)
  5806. struct dentry *dentry = data->dentry;
  5807. struct super_block *sb = dentry->d_sb;
  5808. - up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem);
  5809. + nfs_up_anon(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem);
  5810. d_lookup_done(dentry);
  5811. nfs_free_unlinkdata(data);
  5812. dput(dentry);
  5813. @@ -117,10 +140,10 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
  5814. struct inode *dir = d_inode(dentry->d_parent);
  5815. struct dentry *alias;
  5816. - down_read_non_owner(&NFS_I(dir)->rmdir_sem);
  5817. + nfs_down_anon(&NFS_I(dir)->rmdir_sem);
  5818. alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq);
  5819. if (IS_ERR(alias)) {
  5820. - up_read_non_owner(&NFS_I(dir)->rmdir_sem);
  5821. + nfs_up_anon(&NFS_I(dir)->rmdir_sem);
  5822. return 0;
  5823. }
  5824. if (!d_in_lookup(alias)) {
  5825. @@ -142,7 +165,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
  5826. ret = 0;
  5827. spin_unlock(&alias->d_lock);
  5828. dput(alias);
  5829. - up_read_non_owner(&NFS_I(dir)->rmdir_sem);
  5830. + nfs_up_anon(&NFS_I(dir)->rmdir_sem);
  5831. /*
  5832. * If we'd displaced old cached devname, free it. At that
  5833. * point dentry is definitely not a root, so we won't need
  5834. @@ -182,7 +205,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name)
  5835. goto out_free_name;
  5836. }
  5837. data->res.dir_attr = &data->dir_attr;
  5838. - init_waitqueue_head(&data->wq);
  5839. + init_swait_queue_head(&data->wq);
  5840. status = -EBUSY;
  5841. spin_lock(&dentry->d_lock);
  5842. diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
  5843. index fe251f187ff8..e89da4fb14c2 100644
  5844. --- a/fs/ntfs/aops.c
  5845. +++ b/fs/ntfs/aops.c
  5846. @@ -92,13 +92,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  5847. ofs = 0;
  5848. if (file_ofs < init_size)
  5849. ofs = init_size - file_ofs;
  5850. - local_irq_save(flags);
  5851. + local_irq_save_nort(flags);
  5852. kaddr = kmap_atomic(page);
  5853. memset(kaddr + bh_offset(bh) + ofs, 0,
  5854. bh->b_size - ofs);
  5855. flush_dcache_page(page);
  5856. kunmap_atomic(kaddr);
  5857. - local_irq_restore(flags);
  5858. + local_irq_restore_nort(flags);
  5859. }
  5860. } else {
  5861. clear_buffer_uptodate(bh);
  5862. @@ -107,8 +107,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  5863. "0x%llx.", (unsigned long long)bh->b_blocknr);
  5864. }
  5865. first = page_buffers(page);
  5866. - local_irq_save(flags);
  5867. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  5868. + flags = bh_uptodate_lock_irqsave(first);
  5869. clear_buffer_async_read(bh);
  5870. unlock_buffer(bh);
  5871. tmp = bh;
  5872. @@ -123,8 +122,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  5873. }
  5874. tmp = tmp->b_this_page;
  5875. } while (tmp != bh);
  5876. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  5877. - local_irq_restore(flags);
  5878. + bh_uptodate_unlock_irqrestore(first, flags);
  5879. /*
  5880. * If none of the buffers had errors then we can set the page uptodate,
  5881. * but we first have to perform the post read mst fixups, if the
  5882. @@ -145,13 +143,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  5883. recs = PAGE_SIZE / rec_size;
  5884. /* Should have been verified before we got here... */
  5885. BUG_ON(!recs);
  5886. - local_irq_save(flags);
  5887. + local_irq_save_nort(flags);
  5888. kaddr = kmap_atomic(page);
  5889. for (i = 0; i < recs; i++)
  5890. post_read_mst_fixup((NTFS_RECORD*)(kaddr +
  5891. i * rec_size), rec_size);
  5892. kunmap_atomic(kaddr);
  5893. - local_irq_restore(flags);
  5894. + local_irq_restore_nort(flags);
  5895. flush_dcache_page(page);
  5896. if (likely(page_uptodate && !PageError(page)))
  5897. SetPageUptodate(page);
  5898. @@ -159,9 +157,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  5899. unlock_page(page);
  5900. return;
  5901. still_busy:
  5902. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  5903. - local_irq_restore(flags);
  5904. - return;
  5905. + bh_uptodate_unlock_irqrestore(first, flags);
  5906. }
  5907. /**
  5908. diff --git a/fs/proc/base.c b/fs/proc/base.c
  5909. index e67fec3c9856..0edc16f95596 100644
  5910. --- a/fs/proc/base.c
  5911. +++ b/fs/proc/base.c
  5912. @@ -1834,7 +1834,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
  5913. child = d_hash_and_lookup(dir, &qname);
  5914. if (!child) {
  5915. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  5916. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  5917. child = d_alloc_parallel(dir, &qname, &wq);
  5918. if (IS_ERR(child))
  5919. goto end_instantiate;
  5920. diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
  5921. index d4e37acd4821..000cea46434a 100644
  5922. --- a/fs/proc/proc_sysctl.c
  5923. +++ b/fs/proc/proc_sysctl.c
  5924. @@ -632,7 +632,7 @@ static bool proc_sys_fill_cache(struct file *file,
  5925. child = d_lookup(dir, &qname);
  5926. if (!child) {
  5927. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  5928. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  5929. child = d_alloc_parallel(dir, &qname, &wq);
  5930. if (IS_ERR(child))
  5931. return false;
  5932. diff --git a/fs/timerfd.c b/fs/timerfd.c
  5933. index ab8dd1538381..5580853f57dd 100644
  5934. --- a/fs/timerfd.c
  5935. +++ b/fs/timerfd.c
  5936. @@ -471,7 +471,10 @@ static int do_timerfd_settime(int ufd, int flags,
  5937. break;
  5938. }
  5939. spin_unlock_irq(&ctx->wqh.lock);
  5940. - cpu_relax();
  5941. + if (isalarm(ctx))
  5942. + hrtimer_wait_for_timer(&ctx->t.alarm.timer);
  5943. + else
  5944. + hrtimer_wait_for_timer(&ctx->t.tmr);
  5945. }
  5946. /*
  5947. diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
  5948. index d31cd1ebd8e9..5ea3f933a52a 100644
  5949. --- a/fs/xfs/xfs_aops.c
  5950. +++ b/fs/xfs/xfs_aops.c
  5951. @@ -112,8 +112,7 @@ xfs_finish_page_writeback(
  5952. ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE);
  5953. ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
  5954. - local_irq_save(flags);
  5955. - bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
  5956. + flags = bh_uptodate_lock_irqsave(head);
  5957. do {
  5958. if (off >= bvec->bv_offset &&
  5959. off < bvec->bv_offset + bvec->bv_len) {
  5960. @@ -136,8 +135,7 @@ xfs_finish_page_writeback(
  5961. }
  5962. off += bh->b_size;
  5963. } while ((bh = bh->b_this_page) != head);
  5964. - bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
  5965. - local_irq_restore(flags);
  5966. + bh_uptodate_unlock_irqrestore(head, flags);
  5967. if (!busy)
  5968. end_page_writeback(bvec->bv_page);
  5969. diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
  5970. index e861a24f06f2..b5c97d3059c7 100644
  5971. --- a/include/acpi/platform/aclinux.h
  5972. +++ b/include/acpi/platform/aclinux.h
  5973. @@ -133,6 +133,7 @@
  5974. #define acpi_cache_t struct kmem_cache
  5975. #define acpi_spinlock spinlock_t *
  5976. +#define acpi_raw_spinlock raw_spinlock_t *
  5977. #define acpi_cpu_flags unsigned long
  5978. /* Use native linux version of acpi_os_allocate_zeroed */
  5979. @@ -151,6 +152,20 @@
  5980. #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id
  5981. #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock
  5982. +#define acpi_os_create_raw_lock(__handle) \
  5983. +({ \
  5984. + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \
  5985. + \
  5986. + if (lock) { \
  5987. + *(__handle) = lock; \
  5988. + raw_spin_lock_init(*(__handle)); \
  5989. + } \
  5990. + lock ? AE_OK : AE_NO_MEMORY; \
  5991. + })
  5992. +
  5993. +#define acpi_os_delete_raw_lock(__handle) kfree(__handle)
  5994. +
  5995. +
  5996. /*
  5997. * OSL interfaces used by debugger/disassembler
  5998. */
  5999. diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
  6000. index 6f96247226a4..fa53a21263c2 100644
  6001. --- a/include/asm-generic/bug.h
  6002. +++ b/include/asm-generic/bug.h
  6003. @@ -215,6 +215,20 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
  6004. # define WARN_ON_SMP(x) ({0;})
  6005. #endif
  6006. +#ifdef CONFIG_PREEMPT_RT_BASE
  6007. +# define BUG_ON_RT(c) BUG_ON(c)
  6008. +# define BUG_ON_NONRT(c) do { } while (0)
  6009. +# define WARN_ON_RT(condition) WARN_ON(condition)
  6010. +# define WARN_ON_NONRT(condition) do { } while (0)
  6011. +# define WARN_ON_ONCE_NONRT(condition) do { } while (0)
  6012. +#else
  6013. +# define BUG_ON_RT(c) do { } while (0)
  6014. +# define BUG_ON_NONRT(c) BUG_ON(c)
  6015. +# define WARN_ON_RT(condition) do { } while (0)
  6016. +# define WARN_ON_NONRT(condition) WARN_ON(condition)
  6017. +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition)
  6018. +#endif
  6019. +
  6020. #endif /* __ASSEMBLY__ */
  6021. #endif
  6022. diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
  6023. index 535ab2e13d2e..cfc246899473 100644
  6024. --- a/include/linux/blk-mq.h
  6025. +++ b/include/linux/blk-mq.h
  6026. @@ -209,7 +209,7 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
  6027. return unique_tag & BLK_MQ_UNIQUE_TAG_MASK;
  6028. }
  6029. -
  6030. +void __blk_mq_complete_request_remote_work(struct work_struct *work);
  6031. int blk_mq_request_started(struct request *rq);
  6032. void blk_mq_start_request(struct request *rq);
  6033. void blk_mq_end_request(struct request *rq, int error);
  6034. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
  6035. index f6a816129856..ec7a4676f8a8 100644
  6036. --- a/include/linux/blkdev.h
  6037. +++ b/include/linux/blkdev.h
  6038. @@ -89,6 +89,7 @@ struct request {
  6039. struct list_head queuelist;
  6040. union {
  6041. struct call_single_data csd;
  6042. + struct work_struct work;
  6043. u64 fifo_time;
  6044. };
  6045. @@ -467,7 +468,7 @@ struct request_queue {
  6046. struct throtl_data *td;
  6047. #endif
  6048. struct rcu_head rcu_head;
  6049. - wait_queue_head_t mq_freeze_wq;
  6050. + struct swait_queue_head mq_freeze_wq;
  6051. struct percpu_ref q_usage_counter;
  6052. struct list_head all_q_node;
  6053. diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h
  6054. index 8fdcb783197d..d07dbeec7bc1 100644
  6055. --- a/include/linux/bottom_half.h
  6056. +++ b/include/linux/bottom_half.h
  6057. @@ -3,6 +3,39 @@
  6058. #include <linux/preempt.h>
  6059. +#ifdef CONFIG_PREEMPT_RT_FULL
  6060. +
  6061. +extern void __local_bh_disable(void);
  6062. +extern void _local_bh_enable(void);
  6063. +extern void __local_bh_enable(void);
  6064. +
  6065. +static inline void local_bh_disable(void)
  6066. +{
  6067. + __local_bh_disable();
  6068. +}
  6069. +
  6070. +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
  6071. +{
  6072. + __local_bh_disable();
  6073. +}
  6074. +
  6075. +static inline void local_bh_enable(void)
  6076. +{
  6077. + __local_bh_enable();
  6078. +}
  6079. +
  6080. +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
  6081. +{
  6082. + __local_bh_enable();
  6083. +}
  6084. +
  6085. +static inline void local_bh_enable_ip(unsigned long ip)
  6086. +{
  6087. + __local_bh_enable();
  6088. +}
  6089. +
  6090. +#else
  6091. +
  6092. #ifdef CONFIG_TRACE_IRQFLAGS
  6093. extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
  6094. #else
  6095. @@ -30,5 +63,6 @@ static inline void local_bh_enable(void)
  6096. {
  6097. __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
  6098. }
  6099. +#endif
  6100. #endif /* _LINUX_BH_H */
  6101. diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
  6102. index 4431ea2c8802..0744157a97ca 100644
  6103. --- a/include/linux/buffer_head.h
  6104. +++ b/include/linux/buffer_head.h
  6105. @@ -75,8 +75,50 @@ struct buffer_head {
  6106. struct address_space *b_assoc_map; /* mapping this buffer is
  6107. associated with */
  6108. atomic_t b_count; /* users using this buffer_head */
  6109. +#ifdef CONFIG_PREEMPT_RT_BASE
  6110. + spinlock_t b_uptodate_lock;
  6111. +#if IS_ENABLED(CONFIG_JBD2)
  6112. + spinlock_t b_state_lock;
  6113. + spinlock_t b_journal_head_lock;
  6114. +#endif
  6115. +#endif
  6116. };
  6117. +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
  6118. +{
  6119. + unsigned long flags;
  6120. +
  6121. +#ifndef CONFIG_PREEMPT_RT_BASE
  6122. + local_irq_save(flags);
  6123. + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
  6124. +#else
  6125. + spin_lock_irqsave(&bh->b_uptodate_lock, flags);
  6126. +#endif
  6127. + return flags;
  6128. +}
  6129. +
  6130. +static inline void
  6131. +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
  6132. +{
  6133. +#ifndef CONFIG_PREEMPT_RT_BASE
  6134. + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
  6135. + local_irq_restore(flags);
  6136. +#else
  6137. + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
  6138. +#endif
  6139. +}
  6140. +
  6141. +static inline void buffer_head_init_locks(struct buffer_head *bh)
  6142. +{
  6143. +#ifdef CONFIG_PREEMPT_RT_BASE
  6144. + spin_lock_init(&bh->b_uptodate_lock);
  6145. +#if IS_ENABLED(CONFIG_JBD2)
  6146. + spin_lock_init(&bh->b_state_lock);
  6147. + spin_lock_init(&bh->b_journal_head_lock);
  6148. +#endif
  6149. +#endif
  6150. +}
  6151. +
  6152. /*
  6153. * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
  6154. * and buffer_foo() functions.
  6155. diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
  6156. index 6fb1c34cf805..ccd2a5addb56 100644
  6157. --- a/include/linux/cgroup-defs.h
  6158. +++ b/include/linux/cgroup-defs.h
  6159. @@ -16,6 +16,7 @@
  6160. #include <linux/percpu-refcount.h>
  6161. #include <linux/percpu-rwsem.h>
  6162. #include <linux/workqueue.h>
  6163. +#include <linux/swork.h>
  6164. #ifdef CONFIG_CGROUPS
  6165. @@ -138,6 +139,7 @@ struct cgroup_subsys_state {
  6166. /* percpu_ref killing and RCU release */
  6167. struct rcu_head rcu_head;
  6168. struct work_struct destroy_work;
  6169. + struct swork_event destroy_swork;
  6170. };
  6171. /*
  6172. diff --git a/include/linux/completion.h b/include/linux/completion.h
  6173. index 5d5aaae3af43..3bca1590e29f 100644
  6174. --- a/include/linux/completion.h
  6175. +++ b/include/linux/completion.h
  6176. @@ -7,8 +7,7 @@
  6177. * Atomic wait-for-completion handler data structures.
  6178. * See kernel/sched/completion.c for details.
  6179. */
  6180. -
  6181. -#include <linux/wait.h>
  6182. +#include <linux/swait.h>
  6183. /*
  6184. * struct completion - structure used to maintain state for a "completion"
  6185. @@ -24,11 +23,11 @@
  6186. */
  6187. struct completion {
  6188. unsigned int done;
  6189. - wait_queue_head_t wait;
  6190. + struct swait_queue_head wait;
  6191. };
  6192. #define COMPLETION_INITIALIZER(work) \
  6193. - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
  6194. + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
  6195. #define COMPLETION_INITIALIZER_ONSTACK(work) \
  6196. ({ init_completion(&work); work; })
  6197. @@ -73,7 +72,7 @@ struct completion {
  6198. static inline void init_completion(struct completion *x)
  6199. {
  6200. x->done = 0;
  6201. - init_waitqueue_head(&x->wait);
  6202. + init_swait_queue_head(&x->wait);
  6203. }
  6204. /**
  6205. diff --git a/include/linux/cpu.h b/include/linux/cpu.h
  6206. index e571128ad99a..5e52d28c20c1 100644
  6207. --- a/include/linux/cpu.h
  6208. +++ b/include/linux/cpu.h
  6209. @@ -182,6 +182,8 @@ extern void get_online_cpus(void);
  6210. extern void put_online_cpus(void);
  6211. extern void cpu_hotplug_disable(void);
  6212. extern void cpu_hotplug_enable(void);
  6213. +extern void pin_current_cpu(void);
  6214. +extern void unpin_current_cpu(void);
  6215. #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
  6216. #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri)
  6217. #define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
  6218. @@ -199,6 +201,8 @@ static inline void cpu_hotplug_done(void) {}
  6219. #define put_online_cpus() do { } while (0)
  6220. #define cpu_hotplug_disable() do { } while (0)
  6221. #define cpu_hotplug_enable() do { } while (0)
  6222. +static inline void pin_current_cpu(void) { }
  6223. +static inline void unpin_current_cpu(void) { }
  6224. #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
  6225. #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
  6226. /* These aren't inline functions due to a GCC bug. */
  6227. diff --git a/include/linux/dcache.h b/include/linux/dcache.h
  6228. index ff295e166b2c..d532c60f3fb5 100644
  6229. --- a/include/linux/dcache.h
  6230. +++ b/include/linux/dcache.h
  6231. @@ -11,6 +11,7 @@
  6232. #include <linux/rcupdate.h>
  6233. #include <linux/lockref.h>
  6234. #include <linux/stringhash.h>
  6235. +#include <linux/wait.h>
  6236. struct path;
  6237. struct vfsmount;
  6238. @@ -100,7 +101,7 @@ struct dentry {
  6239. union {
  6240. struct list_head d_lru; /* LRU list */
  6241. - wait_queue_head_t *d_wait; /* in-lookup ones only */
  6242. + struct swait_queue_head *d_wait; /* in-lookup ones only */
  6243. };
  6244. struct list_head d_child; /* child of parent list */
  6245. struct list_head d_subdirs; /* our children */
  6246. @@ -230,7 +231,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
  6247. extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
  6248. extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
  6249. extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
  6250. - wait_queue_head_t *);
  6251. + struct swait_queue_head *);
  6252. extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
  6253. extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
  6254. extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
  6255. diff --git a/include/linux/delay.h b/include/linux/delay.h
  6256. index a6ecb34cf547..37caab306336 100644
  6257. --- a/include/linux/delay.h
  6258. +++ b/include/linux/delay.h
  6259. @@ -52,4 +52,10 @@ static inline void ssleep(unsigned int seconds)
  6260. msleep(seconds * 1000);
  6261. }
  6262. +#ifdef CONFIG_PREEMPT_RT_FULL
  6263. +extern void cpu_chill(void);
  6264. +#else
  6265. +# define cpu_chill() cpu_relax()
  6266. +#endif
  6267. +
  6268. #endif /* defined(_LINUX_DELAY_H) */
  6269. diff --git a/include/linux/fs.h b/include/linux/fs.h
  6270. index d705ae084edd..ab1946f4a729 100644
  6271. --- a/include/linux/fs.h
  6272. +++ b/include/linux/fs.h
  6273. @@ -688,7 +688,7 @@ struct inode {
  6274. struct block_device *i_bdev;
  6275. struct cdev *i_cdev;
  6276. char *i_link;
  6277. - unsigned i_dir_seq;
  6278. + unsigned __i_dir_seq;
  6279. };
  6280. __u32 i_generation;
  6281. diff --git a/include/linux/highmem.h b/include/linux/highmem.h
  6282. index bb3f3297062a..a117a33ef72c 100644
  6283. --- a/include/linux/highmem.h
  6284. +++ b/include/linux/highmem.h
  6285. @@ -7,6 +7,7 @@
  6286. #include <linux/mm.h>
  6287. #include <linux/uaccess.h>
  6288. #include <linux/hardirq.h>
  6289. +#include <linux/sched.h>
  6290. #include <asm/cacheflush.h>
  6291. @@ -65,7 +66,7 @@ static inline void kunmap(struct page *page)
  6292. static inline void *kmap_atomic(struct page *page)
  6293. {
  6294. - preempt_disable();
  6295. + preempt_disable_nort();
  6296. pagefault_disable();
  6297. return page_address(page);
  6298. }
  6299. @@ -74,7 +75,7 @@ static inline void *kmap_atomic(struct page *page)
  6300. static inline void __kunmap_atomic(void *addr)
  6301. {
  6302. pagefault_enable();
  6303. - preempt_enable();
  6304. + preempt_enable_nort();
  6305. }
  6306. #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn))
  6307. @@ -86,32 +87,51 @@ static inline void __kunmap_atomic(void *addr)
  6308. #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
  6309. +#ifndef CONFIG_PREEMPT_RT_FULL
  6310. DECLARE_PER_CPU(int, __kmap_atomic_idx);
  6311. +#endif
  6312. static inline int kmap_atomic_idx_push(void)
  6313. {
  6314. +#ifndef CONFIG_PREEMPT_RT_FULL
  6315. int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
  6316. -#ifdef CONFIG_DEBUG_HIGHMEM
  6317. +# ifdef CONFIG_DEBUG_HIGHMEM
  6318. WARN_ON_ONCE(in_irq() && !irqs_disabled());
  6319. BUG_ON(idx >= KM_TYPE_NR);
  6320. -#endif
  6321. +# endif
  6322. return idx;
  6323. +#else
  6324. + current->kmap_idx++;
  6325. + BUG_ON(current->kmap_idx > KM_TYPE_NR);
  6326. + return current->kmap_idx - 1;
  6327. +#endif
  6328. }
  6329. static inline int kmap_atomic_idx(void)
  6330. {
  6331. +#ifndef CONFIG_PREEMPT_RT_FULL
  6332. return __this_cpu_read(__kmap_atomic_idx) - 1;
  6333. +#else
  6334. + return current->kmap_idx - 1;
  6335. +#endif
  6336. }
  6337. static inline void kmap_atomic_idx_pop(void)
  6338. {
  6339. -#ifdef CONFIG_DEBUG_HIGHMEM
  6340. +#ifndef CONFIG_PREEMPT_RT_FULL
  6341. +# ifdef CONFIG_DEBUG_HIGHMEM
  6342. int idx = __this_cpu_dec_return(__kmap_atomic_idx);
  6343. BUG_ON(idx < 0);
  6344. -#else
  6345. +# else
  6346. __this_cpu_dec(__kmap_atomic_idx);
  6347. +# endif
  6348. +#else
  6349. + current->kmap_idx--;
  6350. +# ifdef CONFIG_DEBUG_HIGHMEM
  6351. + BUG_ON(current->kmap_idx < 0);
  6352. +# endif
  6353. #endif
  6354. }
  6355. diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
  6356. index 5e00f80b1535..a34e10b55cde 100644
  6357. --- a/include/linux/hrtimer.h
  6358. +++ b/include/linux/hrtimer.h
  6359. @@ -87,6 +87,9 @@ enum hrtimer_restart {
  6360. * @function: timer expiry callback function
  6361. * @base: pointer to the timer base (per cpu and per clock)
  6362. * @state: state information (See bit values above)
  6363. + * @cb_entry: list entry to defer timers from hardirq context
  6364. + * @irqsafe: timer can run in hardirq context
  6365. + * @praecox: timer expiry time if expired at the time of programming
  6366. * @is_rel: Set if the timer was armed relative
  6367. * @start_pid: timer statistics field to store the pid of the task which
  6368. * started the timer
  6369. @@ -103,6 +106,11 @@ struct hrtimer {
  6370. enum hrtimer_restart (*function)(struct hrtimer *);
  6371. struct hrtimer_clock_base *base;
  6372. u8 state;
  6373. + struct list_head cb_entry;
  6374. + int irqsafe;
  6375. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  6376. + ktime_t praecox;
  6377. +#endif
  6378. u8 is_rel;
  6379. #ifdef CONFIG_TIMER_STATS
  6380. int start_pid;
  6381. @@ -123,11 +131,7 @@ struct hrtimer_sleeper {
  6382. struct task_struct *task;
  6383. };
  6384. -#ifdef CONFIG_64BIT
  6385. # define HRTIMER_CLOCK_BASE_ALIGN 64
  6386. -#else
  6387. -# define HRTIMER_CLOCK_BASE_ALIGN 32
  6388. -#endif
  6389. /**
  6390. * struct hrtimer_clock_base - the timer base for a specific clock
  6391. @@ -136,6 +140,7 @@ struct hrtimer_sleeper {
  6392. * timer to a base on another cpu.
  6393. * @clockid: clock id for per_cpu support
  6394. * @active: red black tree root node for the active timers
  6395. + * @expired: list head for deferred timers.
  6396. * @get_time: function to retrieve the current time of the clock
  6397. * @offset: offset of this clock to the monotonic base
  6398. */
  6399. @@ -144,6 +149,7 @@ struct hrtimer_clock_base {
  6400. int index;
  6401. clockid_t clockid;
  6402. struct timerqueue_head active;
  6403. + struct list_head expired;
  6404. ktime_t (*get_time)(void);
  6405. ktime_t offset;
  6406. } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));
  6407. @@ -187,6 +193,7 @@ struct hrtimer_cpu_base {
  6408. raw_spinlock_t lock;
  6409. seqcount_t seq;
  6410. struct hrtimer *running;
  6411. + struct hrtimer *running_soft;
  6412. unsigned int cpu;
  6413. unsigned int active_bases;
  6414. unsigned int clock_was_set_seq;
  6415. @@ -202,6 +209,9 @@ struct hrtimer_cpu_base {
  6416. unsigned int nr_retries;
  6417. unsigned int nr_hangs;
  6418. unsigned int max_hang_time;
  6419. +#endif
  6420. +#ifdef CONFIG_PREEMPT_RT_BASE
  6421. + wait_queue_head_t wait;
  6422. #endif
  6423. struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
  6424. } ____cacheline_aligned;
  6425. @@ -412,6 +422,13 @@ static inline void hrtimer_restart(struct hrtimer *timer)
  6426. hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
  6427. }
  6428. +/* Softirq preemption could deadlock timer removal */
  6429. +#ifdef CONFIG_PREEMPT_RT_BASE
  6430. + extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
  6431. +#else
  6432. +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
  6433. +#endif
  6434. +
  6435. /* Query timers: */
  6436. extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
  6437. @@ -436,9 +453,15 @@ static inline int hrtimer_is_queued(struct hrtimer *timer)
  6438. * Helper function to check, whether the timer is running the callback
  6439. * function
  6440. */
  6441. -static inline int hrtimer_callback_running(struct hrtimer *timer)
  6442. +static inline int hrtimer_callback_running(const struct hrtimer *timer)
  6443. {
  6444. - return timer->base->cpu_base->running == timer;
  6445. + if (timer->base->cpu_base->running == timer)
  6446. + return 1;
  6447. +#ifdef CONFIG_PREEMPT_RT_BASE
  6448. + if (timer->base->cpu_base->running_soft == timer)
  6449. + return 1;
  6450. +#endif
  6451. + return 0;
  6452. }
  6453. /* Forward a hrtimer so it expires after now: */
  6454. diff --git a/include/linux/idr.h b/include/linux/idr.h
  6455. index 083d61e92706..5899796f50cb 100644
  6456. --- a/include/linux/idr.h
  6457. +++ b/include/linux/idr.h
  6458. @@ -95,10 +95,14 @@ bool idr_is_empty(struct idr *idp);
  6459. * Each idr_preload() should be matched with an invocation of this
  6460. * function. See idr_preload() for details.
  6461. */
  6462. +#ifdef CONFIG_PREEMPT_RT_FULL
  6463. +void idr_preload_end(void);
  6464. +#else
  6465. static inline void idr_preload_end(void)
  6466. {
  6467. preempt_enable();
  6468. }
  6469. +#endif
  6470. /**
  6471. * idr_find - return pointer for given id
  6472. diff --git a/include/linux/init_task.h b/include/linux/init_task.h
  6473. index 325f649d77ff..a56e263f5005 100644
  6474. --- a/include/linux/init_task.h
  6475. +++ b/include/linux/init_task.h
  6476. @@ -150,6 +150,12 @@ extern struct task_group root_task_group;
  6477. # define INIT_PERF_EVENTS(tsk)
  6478. #endif
  6479. +#ifdef CONFIG_PREEMPT_RT_BASE
  6480. +# define INIT_TIMER_LIST .posix_timer_list = NULL,
  6481. +#else
  6482. +# define INIT_TIMER_LIST
  6483. +#endif
  6484. +
  6485. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  6486. # define INIT_VTIME(tsk) \
  6487. .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \
  6488. @@ -164,6 +170,7 @@ extern struct task_group root_task_group;
  6489. #ifdef CONFIG_RT_MUTEXES
  6490. # define INIT_RT_MUTEXES(tsk) \
  6491. .pi_waiters = RB_ROOT, \
  6492. + .pi_top_task = NULL, \
  6493. .pi_waiters_leftmost = NULL,
  6494. #else
  6495. # define INIT_RT_MUTEXES(tsk)
  6496. @@ -250,6 +257,7 @@ extern struct task_group root_task_group;
  6497. .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
  6498. .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
  6499. .timer_slack_ns = 50000, /* 50 usec default slack */ \
  6500. + INIT_TIMER_LIST \
  6501. .pids = { \
  6502. [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
  6503. [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
  6504. diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
  6505. index 72f0721f75e7..480972ae47d3 100644
  6506. --- a/include/linux/interrupt.h
  6507. +++ b/include/linux/interrupt.h
  6508. @@ -14,6 +14,7 @@
  6509. #include <linux/hrtimer.h>
  6510. #include <linux/kref.h>
  6511. #include <linux/workqueue.h>
  6512. +#include <linux/swork.h>
  6513. #include <linux/atomic.h>
  6514. #include <asm/ptrace.h>
  6515. @@ -61,6 +62,7 @@
  6516. * interrupt handler after suspending interrupts. For system
  6517. * wakeup devices users need to implement wakeup detection in
  6518. * their interrupt handlers.
  6519. + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT)
  6520. */
  6521. #define IRQF_SHARED 0x00000080
  6522. #define IRQF_PROBE_SHARED 0x00000100
  6523. @@ -74,6 +76,7 @@
  6524. #define IRQF_NO_THREAD 0x00010000
  6525. #define IRQF_EARLY_RESUME 0x00020000
  6526. #define IRQF_COND_SUSPEND 0x00040000
  6527. +#define IRQF_NO_SOFTIRQ_CALL 0x00080000
  6528. #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
  6529. @@ -196,7 +199,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
  6530. #ifdef CONFIG_LOCKDEP
  6531. # define local_irq_enable_in_hardirq() do { } while (0)
  6532. #else
  6533. -# define local_irq_enable_in_hardirq() local_irq_enable()
  6534. +# define local_irq_enable_in_hardirq() local_irq_enable_nort()
  6535. #endif
  6536. extern void disable_irq_nosync(unsigned int irq);
  6537. @@ -216,6 +219,7 @@ extern void resume_device_irqs(void);
  6538. * struct irq_affinity_notify - context for notification of IRQ affinity changes
  6539. * @irq: Interrupt to which notification applies
  6540. * @kref: Reference count, for internal use
  6541. + * @swork: Swork item, for internal use
  6542. * @work: Work item, for internal use
  6543. * @notify: Function to be called on change. This will be
  6544. * called in process context.
  6545. @@ -227,7 +231,11 @@ extern void resume_device_irqs(void);
  6546. struct irq_affinity_notify {
  6547. unsigned int irq;
  6548. struct kref kref;
  6549. +#ifdef CONFIG_PREEMPT_RT_BASE
  6550. + struct swork_event swork;
  6551. +#else
  6552. struct work_struct work;
  6553. +#endif
  6554. void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
  6555. void (*release)(struct kref *ref);
  6556. };
  6557. @@ -406,9 +414,13 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
  6558. bool state);
  6559. #ifdef CONFIG_IRQ_FORCED_THREADING
  6560. +# ifndef CONFIG_PREEMPT_RT_BASE
  6561. extern bool force_irqthreads;
  6562. +# else
  6563. +# define force_irqthreads (true)
  6564. +# endif
  6565. #else
  6566. -#define force_irqthreads (0)
  6567. +#define force_irqthreads (false)
  6568. #endif
  6569. #ifndef __ARCH_SET_SOFTIRQ_PENDING
  6570. @@ -465,9 +477,10 @@ struct softirq_action
  6571. void (*action)(struct softirq_action *);
  6572. };
  6573. +#ifndef CONFIG_PREEMPT_RT_FULL
  6574. asmlinkage void do_softirq(void);
  6575. asmlinkage void __do_softirq(void);
  6576. -
  6577. +static inline void thread_do_softirq(void) { do_softirq(); }
  6578. #ifdef __ARCH_HAS_DO_SOFTIRQ
  6579. void do_softirq_own_stack(void);
  6580. #else
  6581. @@ -476,13 +489,25 @@ static inline void do_softirq_own_stack(void)
  6582. __do_softirq();
  6583. }
  6584. #endif
  6585. +#else
  6586. +extern void thread_do_softirq(void);
  6587. +#endif
  6588. extern void open_softirq(int nr, void (*action)(struct softirq_action *));
  6589. extern void softirq_init(void);
  6590. extern void __raise_softirq_irqoff(unsigned int nr);
  6591. +#ifdef CONFIG_PREEMPT_RT_FULL
  6592. +extern void __raise_softirq_irqoff_ksoft(unsigned int nr);
  6593. +#else
  6594. +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr)
  6595. +{
  6596. + __raise_softirq_irqoff(nr);
  6597. +}
  6598. +#endif
  6599. extern void raise_softirq_irqoff(unsigned int nr);
  6600. extern void raise_softirq(unsigned int nr);
  6601. +extern void softirq_check_pending_idle(void);
  6602. DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
  6603. @@ -504,8 +529,9 @@ static inline struct task_struct *this_cpu_ksoftirqd(void)
  6604. to be executed on some cpu at least once after this.
  6605. * If the tasklet is already scheduled, but its execution is still not
  6606. started, it will be executed only once.
  6607. - * If this tasklet is already running on another CPU (or schedule is called
  6608. - from tasklet itself), it is rescheduled for later.
  6609. + * If this tasklet is already running on another CPU, it is rescheduled
  6610. + for later.
  6611. + * Schedule must not be called from the tasklet itself (a lockup occurs)
  6612. * Tasklet is strictly serialized wrt itself, but not
  6613. wrt another tasklets. If client needs some intertask synchronization,
  6614. he makes it with spinlocks.
  6615. @@ -530,27 +556,36 @@ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data }
  6616. enum
  6617. {
  6618. TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */
  6619. - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */
  6620. + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */
  6621. + TASKLET_STATE_PENDING /* Tasklet is pending */
  6622. };
  6623. -#ifdef CONFIG_SMP
  6624. +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED)
  6625. +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN)
  6626. +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING)
  6627. +
  6628. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  6629. static inline int tasklet_trylock(struct tasklet_struct *t)
  6630. {
  6631. return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
  6632. }
  6633. +static inline int tasklet_tryunlock(struct tasklet_struct *t)
  6634. +{
  6635. + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN;
  6636. +}
  6637. +
  6638. static inline void tasklet_unlock(struct tasklet_struct *t)
  6639. {
  6640. smp_mb__before_atomic();
  6641. clear_bit(TASKLET_STATE_RUN, &(t)->state);
  6642. }
  6643. -static inline void tasklet_unlock_wait(struct tasklet_struct *t)
  6644. -{
  6645. - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
  6646. -}
  6647. +extern void tasklet_unlock_wait(struct tasklet_struct *t);
  6648. +
  6649. #else
  6650. #define tasklet_trylock(t) 1
  6651. +#define tasklet_tryunlock(t) 1
  6652. #define tasklet_unlock_wait(t) do { } while (0)
  6653. #define tasklet_unlock(t) do { } while (0)
  6654. #endif
  6655. @@ -599,12 +634,7 @@ static inline void tasklet_disable(struct tasklet_struct *t)
  6656. smp_mb();
  6657. }
  6658. -static inline void tasklet_enable(struct tasklet_struct *t)
  6659. -{
  6660. - smp_mb__before_atomic();
  6661. - atomic_dec(&t->count);
  6662. -}
  6663. -
  6664. +extern void tasklet_enable(struct tasklet_struct *t);
  6665. extern void tasklet_kill(struct tasklet_struct *t);
  6666. extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
  6667. extern void tasklet_init(struct tasklet_struct *t,
  6668. @@ -635,6 +665,12 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer)
  6669. tasklet_kill(&ttimer->tasklet);
  6670. }
  6671. +#ifdef CONFIG_PREEMPT_RT_FULL
  6672. +extern void softirq_early_init(void);
  6673. +#else
  6674. +static inline void softirq_early_init(void) { }
  6675. +#endif
  6676. +
  6677. /*
  6678. * Autoprobing for irqs:
  6679. *
  6680. diff --git a/include/linux/irq.h b/include/linux/irq.h
  6681. index 39e3254e5769..8ebac94fbb9f 100644
  6682. --- a/include/linux/irq.h
  6683. +++ b/include/linux/irq.h
  6684. @@ -72,6 +72,7 @@ enum irqchip_irq_state;
  6685. * IRQ_IS_POLLED - Always polled by another interrupt. Exclude
  6686. * it from the spurious interrupt detection
  6687. * mechanism and from core side polling.
  6688. + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT)
  6689. * IRQ_DISABLE_UNLAZY - Disable lazy irq disable
  6690. */
  6691. enum {
  6692. @@ -99,13 +100,14 @@ enum {
  6693. IRQ_PER_CPU_DEVID = (1 << 17),
  6694. IRQ_IS_POLLED = (1 << 18),
  6695. IRQ_DISABLE_UNLAZY = (1 << 19),
  6696. + IRQ_NO_SOFTIRQ_CALL = (1 << 20),
  6697. };
  6698. #define IRQF_MODIFY_MASK \
  6699. (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
  6700. IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
  6701. IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
  6702. - IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY)
  6703. + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_NO_SOFTIRQ_CALL)
  6704. #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
  6705. diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
  6706. index 47b9ebd4a74f..2543aab05daa 100644
  6707. --- a/include/linux/irq_work.h
  6708. +++ b/include/linux/irq_work.h
  6709. @@ -16,6 +16,7 @@
  6710. #define IRQ_WORK_BUSY 2UL
  6711. #define IRQ_WORK_FLAGS 3UL
  6712. #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */
  6713. +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */
  6714. struct irq_work {
  6715. unsigned long flags;
  6716. @@ -51,4 +52,10 @@ static inline bool irq_work_needs_cpu(void) { return false; }
  6717. static inline void irq_work_run(void) { }
  6718. #endif
  6719. +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
  6720. +void irq_work_tick_soft(void);
  6721. +#else
  6722. +static inline void irq_work_tick_soft(void) { }
  6723. +#endif
  6724. +
  6725. #endif /* _LINUX_IRQ_WORK_H */
  6726. diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
  6727. index c9be57931b58..eeeb540971ae 100644
  6728. --- a/include/linux/irqdesc.h
  6729. +++ b/include/linux/irqdesc.h
  6730. @@ -66,6 +66,7 @@ struct irq_desc {
  6731. unsigned int irqs_unhandled;
  6732. atomic_t threads_handled;
  6733. int threads_handled_last;
  6734. + u64 random_ip;
  6735. raw_spinlock_t lock;
  6736. struct cpumask *percpu_enabled;
  6737. const struct cpumask *percpu_affinity;
  6738. diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
  6739. index 5dd1272d1ab2..9b77034f7c5e 100644
  6740. --- a/include/linux/irqflags.h
  6741. +++ b/include/linux/irqflags.h
  6742. @@ -25,8 +25,6 @@
  6743. # define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
  6744. # define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
  6745. # define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
  6746. -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
  6747. -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
  6748. # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
  6749. #else
  6750. # define trace_hardirqs_on() do { } while (0)
  6751. @@ -39,9 +37,15 @@
  6752. # define trace_softirqs_enabled(p) 0
  6753. # define trace_hardirq_enter() do { } while (0)
  6754. # define trace_hardirq_exit() do { } while (0)
  6755. +# define INIT_TRACE_IRQFLAGS
  6756. +#endif
  6757. +
  6758. +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL)
  6759. +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
  6760. +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
  6761. +#else
  6762. # define lockdep_softirq_enter() do { } while (0)
  6763. # define lockdep_softirq_exit() do { } while (0)
  6764. -# define INIT_TRACE_IRQFLAGS
  6765. #endif
  6766. #if defined(CONFIG_IRQSOFF_TRACER) || \
  6767. @@ -148,4 +152,23 @@
  6768. #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
  6769. +/*
  6770. + * local_irq* variants depending on RT/!RT
  6771. + */
  6772. +#ifdef CONFIG_PREEMPT_RT_FULL
  6773. +# define local_irq_disable_nort() do { } while (0)
  6774. +# define local_irq_enable_nort() do { } while (0)
  6775. +# define local_irq_save_nort(flags) local_save_flags(flags)
  6776. +# define local_irq_restore_nort(flags) (void)(flags)
  6777. +# define local_irq_disable_rt() local_irq_disable()
  6778. +# define local_irq_enable_rt() local_irq_enable()
  6779. +#else
  6780. +# define local_irq_disable_nort() local_irq_disable()
  6781. +# define local_irq_enable_nort() local_irq_enable()
  6782. +# define local_irq_save_nort(flags) local_irq_save(flags)
  6783. +# define local_irq_restore_nort(flags) local_irq_restore(flags)
  6784. +# define local_irq_disable_rt() do { } while (0)
  6785. +# define local_irq_enable_rt() do { } while (0)
  6786. +#endif
  6787. +
  6788. #endif
  6789. diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
  6790. index dfaa1f4dcb0c..d57dd06544a1 100644
  6791. --- a/include/linux/jbd2.h
  6792. +++ b/include/linux/jbd2.h
  6793. @@ -347,32 +347,56 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh)
  6794. static inline void jbd_lock_bh_state(struct buffer_head *bh)
  6795. {
  6796. +#ifndef CONFIG_PREEMPT_RT_BASE
  6797. bit_spin_lock(BH_State, &bh->b_state);
  6798. +#else
  6799. + spin_lock(&bh->b_state_lock);
  6800. +#endif
  6801. }
  6802. static inline int jbd_trylock_bh_state(struct buffer_head *bh)
  6803. {
  6804. +#ifndef CONFIG_PREEMPT_RT_BASE
  6805. return bit_spin_trylock(BH_State, &bh->b_state);
  6806. +#else
  6807. + return spin_trylock(&bh->b_state_lock);
  6808. +#endif
  6809. }
  6810. static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
  6811. {
  6812. +#ifndef CONFIG_PREEMPT_RT_BASE
  6813. return bit_spin_is_locked(BH_State, &bh->b_state);
  6814. +#else
  6815. + return spin_is_locked(&bh->b_state_lock);
  6816. +#endif
  6817. }
  6818. static inline void jbd_unlock_bh_state(struct buffer_head *bh)
  6819. {
  6820. +#ifndef CONFIG_PREEMPT_RT_BASE
  6821. bit_spin_unlock(BH_State, &bh->b_state);
  6822. +#else
  6823. + spin_unlock(&bh->b_state_lock);
  6824. +#endif
  6825. }
  6826. static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
  6827. {
  6828. +#ifndef CONFIG_PREEMPT_RT_BASE
  6829. bit_spin_lock(BH_JournalHead, &bh->b_state);
  6830. +#else
  6831. + spin_lock(&bh->b_journal_head_lock);
  6832. +#endif
  6833. }
  6834. static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
  6835. {
  6836. +#ifndef CONFIG_PREEMPT_RT_BASE
  6837. bit_spin_unlock(BH_JournalHead, &bh->b_state);
  6838. +#else
  6839. + spin_unlock(&bh->b_journal_head_lock);
  6840. +#endif
  6841. }
  6842. #define J_ASSERT(assert) BUG_ON(!(assert))
  6843. diff --git a/include/linux/kdb.h b/include/linux/kdb.h
  6844. index 410decacff8f..0861bebfc188 100644
  6845. --- a/include/linux/kdb.h
  6846. +++ b/include/linux/kdb.h
  6847. @@ -167,6 +167,7 @@ extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt,
  6848. extern __printf(1, 2) int kdb_printf(const char *, ...);
  6849. typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
  6850. +#define in_kdb_printk() (kdb_trap_printk)
  6851. extern void kdb_init(int level);
  6852. /* Access to kdb specific polling devices */
  6853. @@ -201,6 +202,7 @@ extern int kdb_register_flags(char *, kdb_func_t, char *, char *,
  6854. extern int kdb_unregister(char *);
  6855. #else /* ! CONFIG_KGDB_KDB */
  6856. static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
  6857. +#define in_kdb_printk() (0)
  6858. static inline void kdb_init(int level) {}
  6859. static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
  6860. char *help, short minlen) { return 0; }
  6861. diff --git a/include/linux/kernel.h b/include/linux/kernel.h
  6862. index bc6ed52a39b9..7894d55e4998 100644
  6863. --- a/include/linux/kernel.h
  6864. +++ b/include/linux/kernel.h
  6865. @@ -194,6 +194,9 @@ extern int _cond_resched(void);
  6866. */
  6867. # define might_sleep() \
  6868. do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
  6869. +
  6870. +# define might_sleep_no_state_check() \
  6871. + do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
  6872. # define sched_annotate_sleep() (current->task_state_change = 0)
  6873. #else
  6874. static inline void ___might_sleep(const char *file, int line,
  6875. @@ -201,6 +204,7 @@ extern int _cond_resched(void);
  6876. static inline void __might_sleep(const char *file, int line,
  6877. int preempt_offset) { }
  6878. # define might_sleep() do { might_resched(); } while (0)
  6879. +# define might_sleep_no_state_check() do { might_resched(); } while (0)
  6880. # define sched_annotate_sleep() do { } while (0)
  6881. #endif
  6882. @@ -488,6 +492,7 @@ extern enum system_states {
  6883. SYSTEM_HALT,
  6884. SYSTEM_POWER_OFF,
  6885. SYSTEM_RESTART,
  6886. + SYSTEM_SUSPEND,
  6887. } system_state;
  6888. #define TAINT_PROPRIETARY_MODULE 0
  6889. diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
  6890. index cb483305e1f5..4e5062316bb6 100644
  6891. --- a/include/linux/list_bl.h
  6892. +++ b/include/linux/list_bl.h
  6893. @@ -2,6 +2,7 @@
  6894. #define _LINUX_LIST_BL_H
  6895. #include <linux/list.h>
  6896. +#include <linux/spinlock.h>
  6897. #include <linux/bit_spinlock.h>
  6898. /*
  6899. @@ -32,13 +33,24 @@
  6900. struct hlist_bl_head {
  6901. struct hlist_bl_node *first;
  6902. +#ifdef CONFIG_PREEMPT_RT_BASE
  6903. + raw_spinlock_t lock;
  6904. +#endif
  6905. };
  6906. struct hlist_bl_node {
  6907. struct hlist_bl_node *next, **pprev;
  6908. };
  6909. -#define INIT_HLIST_BL_HEAD(ptr) \
  6910. - ((ptr)->first = NULL)
  6911. +
  6912. +#ifdef CONFIG_PREEMPT_RT_BASE
  6913. +#define INIT_HLIST_BL_HEAD(h) \
  6914. +do { \
  6915. + (h)->first = NULL; \
  6916. + raw_spin_lock_init(&(h)->lock); \
  6917. +} while (0)
  6918. +#else
  6919. +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL
  6920. +#endif
  6921. static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
  6922. {
  6923. @@ -118,12 +130,26 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n)
  6924. static inline void hlist_bl_lock(struct hlist_bl_head *b)
  6925. {
  6926. +#ifndef CONFIG_PREEMPT_RT_BASE
  6927. bit_spin_lock(0, (unsigned long *)b);
  6928. +#else
  6929. + raw_spin_lock(&b->lock);
  6930. +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  6931. + __set_bit(0, (unsigned long *)b);
  6932. +#endif
  6933. +#endif
  6934. }
  6935. static inline void hlist_bl_unlock(struct hlist_bl_head *b)
  6936. {
  6937. +#ifndef CONFIG_PREEMPT_RT_BASE
  6938. __bit_spin_unlock(0, (unsigned long *)b);
  6939. +#else
  6940. +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  6941. + __clear_bit(0, (unsigned long *)b);
  6942. +#endif
  6943. + raw_spin_unlock(&b->lock);
  6944. +#endif
  6945. }
  6946. static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
  6947. diff --git a/include/linux/locallock.h b/include/linux/locallock.h
  6948. new file mode 100644
  6949. index 000000000000..280f884a05a3
  6950. --- /dev/null
  6951. +++ b/include/linux/locallock.h
  6952. @@ -0,0 +1,287 @@
  6953. +#ifndef _LINUX_LOCALLOCK_H
  6954. +#define _LINUX_LOCALLOCK_H
  6955. +
  6956. +#include <linux/percpu.h>
  6957. +#include <linux/spinlock.h>
  6958. +
  6959. +#ifdef CONFIG_PREEMPT_RT_BASE
  6960. +
  6961. +#ifdef CONFIG_DEBUG_SPINLOCK
  6962. +# define LL_WARN(cond) WARN_ON(cond)
  6963. +#else
  6964. +# define LL_WARN(cond) do { } while (0)
  6965. +#endif
  6966. +
  6967. +/*
  6968. + * per cpu lock based substitute for local_irq_*()
  6969. + */
  6970. +struct local_irq_lock {
  6971. + spinlock_t lock;
  6972. + struct task_struct *owner;
  6973. + int nestcnt;
  6974. + unsigned long flags;
  6975. +};
  6976. +
  6977. +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \
  6978. + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \
  6979. + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
  6980. +
  6981. +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \
  6982. + DECLARE_PER_CPU(struct local_irq_lock, lvar)
  6983. +
  6984. +#define local_irq_lock_init(lvar) \
  6985. + do { \
  6986. + int __cpu; \
  6987. + for_each_possible_cpu(__cpu) \
  6988. + spin_lock_init(&per_cpu(lvar, __cpu).lock); \
  6989. + } while (0)
  6990. +
  6991. +/*
  6992. + * spin_lock|trylock|unlock_local flavour that does not migrate disable
  6993. + * used for __local_lock|trylock|unlock where get_local_var/put_local_var
  6994. + * already takes care of the migrate_disable/enable
  6995. + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls.
  6996. + */
  6997. +#ifdef CONFIG_PREEMPT_RT_FULL
  6998. +# define spin_lock_local(lock) rt_spin_lock__no_mg(lock)
  6999. +# define spin_trylock_local(lock) rt_spin_trylock__no_mg(lock)
  7000. +# define spin_unlock_local(lock) rt_spin_unlock__no_mg(lock)
  7001. +#else
  7002. +# define spin_lock_local(lock) spin_lock(lock)
  7003. +# define spin_trylock_local(lock) spin_trylock(lock)
  7004. +# define spin_unlock_local(lock) spin_unlock(lock)
  7005. +#endif
  7006. +
  7007. +static inline void __local_lock(struct local_irq_lock *lv)
  7008. +{
  7009. + if (lv->owner != current) {
  7010. + spin_lock_local(&lv->lock);
  7011. + LL_WARN(lv->owner);
  7012. + LL_WARN(lv->nestcnt);
  7013. + lv->owner = current;
  7014. + }
  7015. + lv->nestcnt++;
  7016. +}
  7017. +
  7018. +#define local_lock(lvar) \
  7019. + do { __local_lock(&get_local_var(lvar)); } while (0)
  7020. +
  7021. +#define local_lock_on(lvar, cpu) \
  7022. + do { __local_lock(&per_cpu(lvar, cpu)); } while (0)
  7023. +
  7024. +static inline int __local_trylock(struct local_irq_lock *lv)
  7025. +{
  7026. + if (lv->owner != current && spin_trylock_local(&lv->lock)) {
  7027. + LL_WARN(lv->owner);
  7028. + LL_WARN(lv->nestcnt);
  7029. + lv->owner = current;
  7030. + lv->nestcnt = 1;
  7031. + return 1;
  7032. + } else if (lv->owner == current) {
  7033. + lv->nestcnt++;
  7034. + return 1;
  7035. + }
  7036. + return 0;
  7037. +}
  7038. +
  7039. +#define local_trylock(lvar) \
  7040. + ({ \
  7041. + int __locked; \
  7042. + __locked = __local_trylock(&get_local_var(lvar)); \
  7043. + if (!__locked) \
  7044. + put_local_var(lvar); \
  7045. + __locked; \
  7046. + })
  7047. +
  7048. +static inline void __local_unlock(struct local_irq_lock *lv)
  7049. +{
  7050. + LL_WARN(lv->nestcnt == 0);
  7051. + LL_WARN(lv->owner != current);
  7052. + if (--lv->nestcnt)
  7053. + return;
  7054. +
  7055. + lv->owner = NULL;
  7056. + spin_unlock_local(&lv->lock);
  7057. +}
  7058. +
  7059. +#define local_unlock(lvar) \
  7060. + do { \
  7061. + __local_unlock(this_cpu_ptr(&lvar)); \
  7062. + put_local_var(lvar); \
  7063. + } while (0)
  7064. +
  7065. +#define local_unlock_on(lvar, cpu) \
  7066. + do { __local_unlock(&per_cpu(lvar, cpu)); } while (0)
  7067. +
  7068. +static inline void __local_lock_irq(struct local_irq_lock *lv)
  7069. +{
  7070. + spin_lock_irqsave(&lv->lock, lv->flags);
  7071. + LL_WARN(lv->owner);
  7072. + LL_WARN(lv->nestcnt);
  7073. + lv->owner = current;
  7074. + lv->nestcnt = 1;
  7075. +}
  7076. +
  7077. +#define local_lock_irq(lvar) \
  7078. + do { __local_lock_irq(&get_local_var(lvar)); } while (0)
  7079. +
  7080. +#define local_lock_irq_on(lvar, cpu) \
  7081. + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0)
  7082. +
  7083. +static inline void __local_unlock_irq(struct local_irq_lock *lv)
  7084. +{
  7085. + LL_WARN(!lv->nestcnt);
  7086. + LL_WARN(lv->owner != current);
  7087. + lv->owner = NULL;
  7088. + lv->nestcnt = 0;
  7089. + spin_unlock_irq(&lv->lock);
  7090. +}
  7091. +
  7092. +#define local_unlock_irq(lvar) \
  7093. + do { \
  7094. + __local_unlock_irq(this_cpu_ptr(&lvar)); \
  7095. + put_local_var(lvar); \
  7096. + } while (0)
  7097. +
  7098. +#define local_unlock_irq_on(lvar, cpu) \
  7099. + do { \
  7100. + __local_unlock_irq(&per_cpu(lvar, cpu)); \
  7101. + } while (0)
  7102. +
  7103. +static inline int __local_lock_irqsave(struct local_irq_lock *lv)
  7104. +{
  7105. + if (lv->owner != current) {
  7106. + __local_lock_irq(lv);
  7107. + return 0;
  7108. + } else {
  7109. + lv->nestcnt++;
  7110. + return 1;
  7111. + }
  7112. +}
  7113. +
  7114. +#define local_lock_irqsave(lvar, _flags) \
  7115. + do { \
  7116. + if (__local_lock_irqsave(&get_local_var(lvar))) \
  7117. + put_local_var(lvar); \
  7118. + _flags = __this_cpu_read(lvar.flags); \
  7119. + } while (0)
  7120. +
  7121. +#define local_lock_irqsave_on(lvar, _flags, cpu) \
  7122. + do { \
  7123. + __local_lock_irqsave(&per_cpu(lvar, cpu)); \
  7124. + _flags = per_cpu(lvar, cpu).flags; \
  7125. + } while (0)
  7126. +
  7127. +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
  7128. + unsigned long flags)
  7129. +{
  7130. + LL_WARN(!lv->nestcnt);
  7131. + LL_WARN(lv->owner != current);
  7132. + if (--lv->nestcnt)
  7133. + return 0;
  7134. +
  7135. + lv->owner = NULL;
  7136. + spin_unlock_irqrestore(&lv->lock, lv->flags);
  7137. + return 1;
  7138. +}
  7139. +
  7140. +#define local_unlock_irqrestore(lvar, flags) \
  7141. + do { \
  7142. + if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \
  7143. + put_local_var(lvar); \
  7144. + } while (0)
  7145. +
  7146. +#define local_unlock_irqrestore_on(lvar, flags, cpu) \
  7147. + do { \
  7148. + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \
  7149. + } while (0)
  7150. +
  7151. +#define local_spin_trylock_irq(lvar, lock) \
  7152. + ({ \
  7153. + int __locked; \
  7154. + local_lock_irq(lvar); \
  7155. + __locked = spin_trylock(lock); \
  7156. + if (!__locked) \
  7157. + local_unlock_irq(lvar); \
  7158. + __locked; \
  7159. + })
  7160. +
  7161. +#define local_spin_lock_irq(lvar, lock) \
  7162. + do { \
  7163. + local_lock_irq(lvar); \
  7164. + spin_lock(lock); \
  7165. + } while (0)
  7166. +
  7167. +#define local_spin_unlock_irq(lvar, lock) \
  7168. + do { \
  7169. + spin_unlock(lock); \
  7170. + local_unlock_irq(lvar); \
  7171. + } while (0)
  7172. +
  7173. +#define local_spin_lock_irqsave(lvar, lock, flags) \
  7174. + do { \
  7175. + local_lock_irqsave(lvar, flags); \
  7176. + spin_lock(lock); \
  7177. + } while (0)
  7178. +
  7179. +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
  7180. + do { \
  7181. + spin_unlock(lock); \
  7182. + local_unlock_irqrestore(lvar, flags); \
  7183. + } while (0)
  7184. +
  7185. +#define get_locked_var(lvar, var) \
  7186. + (*({ \
  7187. + local_lock(lvar); \
  7188. + this_cpu_ptr(&var); \
  7189. + }))
  7190. +
  7191. +#define put_locked_var(lvar, var) local_unlock(lvar);
  7192. +
  7193. +#define local_lock_cpu(lvar) \
  7194. + ({ \
  7195. + local_lock(lvar); \
  7196. + smp_processor_id(); \
  7197. + })
  7198. +
  7199. +#define local_unlock_cpu(lvar) local_unlock(lvar)
  7200. +
  7201. +#else /* PREEMPT_RT_BASE */
  7202. +
  7203. +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar
  7204. +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar
  7205. +
  7206. +static inline void local_irq_lock_init(int lvar) { }
  7207. +
  7208. +#define local_trylock(lvar) \
  7209. + ({ \
  7210. + preempt_disable(); \
  7211. + 1; \
  7212. + })
  7213. +
  7214. +#define local_lock(lvar) preempt_disable()
  7215. +#define local_unlock(lvar) preempt_enable()
  7216. +#define local_lock_irq(lvar) local_irq_disable()
  7217. +#define local_lock_irq_on(lvar, cpu) local_irq_disable()
  7218. +#define local_unlock_irq(lvar) local_irq_enable()
  7219. +#define local_unlock_irq_on(lvar, cpu) local_irq_enable()
  7220. +#define local_lock_irqsave(lvar, flags) local_irq_save(flags)
  7221. +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags)
  7222. +
  7223. +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock)
  7224. +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock)
  7225. +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock)
  7226. +#define local_spin_lock_irqsave(lvar, lock, flags) \
  7227. + spin_lock_irqsave(lock, flags)
  7228. +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
  7229. + spin_unlock_irqrestore(lock, flags)
  7230. +
  7231. +#define get_locked_var(lvar, var) get_cpu_var(var)
  7232. +#define put_locked_var(lvar, var) put_cpu_var(var)
  7233. +
  7234. +#define local_lock_cpu(lvar) get_cpu()
  7235. +#define local_unlock_cpu(lvar) put_cpu()
  7236. +
  7237. +#endif
  7238. +
  7239. +#endif
  7240. diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
  7241. index e8471c2ca83a..08bde1a7a987 100644
  7242. --- a/include/linux/mm_types.h
  7243. +++ b/include/linux/mm_types.h
  7244. @@ -11,6 +11,7 @@
  7245. #include <linux/completion.h>
  7246. #include <linux/cpumask.h>
  7247. #include <linux/uprobes.h>
  7248. +#include <linux/rcupdate.h>
  7249. #include <linux/page-flags-layout.h>
  7250. #include <linux/workqueue.h>
  7251. #include <asm/page.h>
  7252. @@ -513,6 +514,9 @@ struct mm_struct {
  7253. bool tlb_flush_batched;
  7254. #endif
  7255. struct uprobes_state uprobes_state;
  7256. +#ifdef CONFIG_PREEMPT_RT_BASE
  7257. + struct rcu_head delayed_drop;
  7258. +#endif
  7259. #ifdef CONFIG_X86_INTEL_MPX
  7260. /* address of the bounds directory */
  7261. void __user *bd_addr;
  7262. diff --git a/include/linux/module.h b/include/linux/module.h
  7263. index 0c3207d26ac0..5944baaa3f28 100644
  7264. --- a/include/linux/module.h
  7265. +++ b/include/linux/module.h
  7266. @@ -496,6 +496,7 @@ static inline int module_is_live(struct module *mod)
  7267. struct module *__module_text_address(unsigned long addr);
  7268. struct module *__module_address(unsigned long addr);
  7269. bool is_module_address(unsigned long addr);
  7270. +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr);
  7271. bool is_module_percpu_address(unsigned long addr);
  7272. bool is_module_text_address(unsigned long addr);
  7273. @@ -663,6 +664,11 @@ static inline bool is_module_percpu_address(unsigned long addr)
  7274. return false;
  7275. }
  7276. +static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
  7277. +{
  7278. + return false;
  7279. +}
  7280. +
  7281. static inline bool is_module_text_address(unsigned long addr)
  7282. {
  7283. return false;
  7284. diff --git a/include/linux/mutex.h b/include/linux/mutex.h
  7285. index 2cb7531e7d7a..b3fdfc820216 100644
  7286. --- a/include/linux/mutex.h
  7287. +++ b/include/linux/mutex.h
  7288. @@ -19,6 +19,17 @@
  7289. #include <asm/processor.h>
  7290. #include <linux/osq_lock.h>
  7291. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  7292. +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
  7293. + , .dep_map = { .name = #lockname }
  7294. +#else
  7295. +# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
  7296. +#endif
  7297. +
  7298. +#ifdef CONFIG_PREEMPT_RT_FULL
  7299. +# include <linux/mutex_rt.h>
  7300. +#else
  7301. +
  7302. /*
  7303. * Simple, straightforward mutexes with strict semantics:
  7304. *
  7305. @@ -99,13 +110,6 @@ do { \
  7306. static inline void mutex_destroy(struct mutex *lock) {}
  7307. #endif
  7308. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  7309. -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
  7310. - , .dep_map = { .name = #lockname }
  7311. -#else
  7312. -# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
  7313. -#endif
  7314. -
  7315. #define __MUTEX_INITIALIZER(lockname) \
  7316. { .count = ATOMIC_INIT(1) \
  7317. , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
  7318. @@ -173,6 +177,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
  7319. extern int mutex_trylock(struct mutex *lock);
  7320. extern void mutex_unlock(struct mutex *lock);
  7321. +#endif /* !PREEMPT_RT_FULL */
  7322. +
  7323. extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
  7324. #endif /* __LINUX_MUTEX_H */
  7325. diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h
  7326. new file mode 100644
  7327. index 000000000000..e0284edec655
  7328. --- /dev/null
  7329. +++ b/include/linux/mutex_rt.h
  7330. @@ -0,0 +1,89 @@
  7331. +#ifndef __LINUX_MUTEX_RT_H
  7332. +#define __LINUX_MUTEX_RT_H
  7333. +
  7334. +#ifndef __LINUX_MUTEX_H
  7335. +#error "Please include mutex.h"
  7336. +#endif
  7337. +
  7338. +#include <linux/rtmutex.h>
  7339. +
  7340. +/* FIXME: Just for __lockfunc */
  7341. +#include <linux/spinlock.h>
  7342. +
  7343. +struct mutex {
  7344. + struct rt_mutex lock;
  7345. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  7346. + struct lockdep_map dep_map;
  7347. +#endif
  7348. +};
  7349. +
  7350. +#define __MUTEX_INITIALIZER(mutexname) \
  7351. + { \
  7352. + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \
  7353. + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \
  7354. + }
  7355. +
  7356. +#define DEFINE_MUTEX(mutexname) \
  7357. + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
  7358. +
  7359. +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
  7360. +extern void __lockfunc _mutex_lock(struct mutex *lock);
  7361. +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
  7362. +extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
  7363. +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass);
  7364. +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
  7365. +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass);
  7366. +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass);
  7367. +extern int __lockfunc _mutex_trylock(struct mutex *lock);
  7368. +extern void __lockfunc _mutex_unlock(struct mutex *lock);
  7369. +
  7370. +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock)
  7371. +#define mutex_lock(l) _mutex_lock(l)
  7372. +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l)
  7373. +#define mutex_lock_killable(l) _mutex_lock_killable(l)
  7374. +#define mutex_trylock(l) _mutex_trylock(l)
  7375. +#define mutex_unlock(l) _mutex_unlock(l)
  7376. +
  7377. +#ifdef CONFIG_DEBUG_MUTEXES
  7378. +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock)
  7379. +#else
  7380. +static inline void mutex_destroy(struct mutex *lock) {}
  7381. +#endif
  7382. +
  7383. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  7384. +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s)
  7385. +# define mutex_lock_interruptible_nested(l, s) \
  7386. + _mutex_lock_interruptible_nested(l, s)
  7387. +# define mutex_lock_killable_nested(l, s) \
  7388. + _mutex_lock_killable_nested(l, s)
  7389. +
  7390. +# define mutex_lock_nest_lock(lock, nest_lock) \
  7391. +do { \
  7392. + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
  7393. + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
  7394. +} while (0)
  7395. +
  7396. +#else
  7397. +# define mutex_lock_nested(l, s) _mutex_lock(l)
  7398. +# define mutex_lock_interruptible_nested(l, s) \
  7399. + _mutex_lock_interruptible(l)
  7400. +# define mutex_lock_killable_nested(l, s) \
  7401. + _mutex_lock_killable(l)
  7402. +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
  7403. +#endif
  7404. +
  7405. +# define mutex_init(mutex) \
  7406. +do { \
  7407. + static struct lock_class_key __key; \
  7408. + \
  7409. + rt_mutex_init(&(mutex)->lock); \
  7410. + __mutex_do_init((mutex), #mutex, &__key); \
  7411. +} while (0)
  7412. +
  7413. +# define __mutex_init(mutex, name, key) \
  7414. +do { \
  7415. + rt_mutex_init(&(mutex)->lock); \
  7416. + __mutex_do_init((mutex), name, key); \
  7417. +} while (0)
  7418. +
  7419. +#endif
  7420. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
  7421. index 47c7f5b8f675..85fc72b8a92b 100644
  7422. --- a/include/linux/netdevice.h
  7423. +++ b/include/linux/netdevice.h
  7424. @@ -396,7 +396,19 @@ typedef enum rx_handler_result rx_handler_result_t;
  7425. typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
  7426. void __napi_schedule(struct napi_struct *n);
  7427. +
  7428. +/*
  7429. + * When PREEMPT_RT_FULL is defined, all device interrupt handlers
  7430. + * run as threads, and they can also be preempted (without PREEMPT_RT
  7431. + * interrupt threads can not be preempted). Which means that calling
  7432. + * __napi_schedule_irqoff() from an interrupt handler can be preempted
  7433. + * and can corrupt the napi->poll_list.
  7434. + */
  7435. +#ifdef CONFIG_PREEMPT_RT_FULL
  7436. +#define __napi_schedule_irqoff(n) __napi_schedule(n)
  7437. +#else
  7438. void __napi_schedule_irqoff(struct napi_struct *n);
  7439. +#endif
  7440. static inline bool napi_disable_pending(struct napi_struct *n)
  7441. {
  7442. @@ -2464,14 +2476,53 @@ void netdev_freemem(struct net_device *dev);
  7443. void synchronize_net(void);
  7444. int init_dummy_netdev(struct net_device *dev);
  7445. -DECLARE_PER_CPU(int, xmit_recursion);
  7446. #define XMIT_RECURSION_LIMIT 10
  7447. +#ifdef CONFIG_PREEMPT_RT_FULL
  7448. +static inline int dev_recursion_level(void)
  7449. +{
  7450. + return current->xmit_recursion;
  7451. +}
  7452. +
  7453. +static inline int xmit_rec_read(void)
  7454. +{
  7455. + return current->xmit_recursion;
  7456. +}
  7457. +
  7458. +static inline void xmit_rec_inc(void)
  7459. +{
  7460. + current->xmit_recursion++;
  7461. +}
  7462. +
  7463. +static inline void xmit_rec_dec(void)
  7464. +{
  7465. + current->xmit_recursion--;
  7466. +}
  7467. +
  7468. +#else
  7469. +
  7470. +DECLARE_PER_CPU(int, xmit_recursion);
  7471. static inline int dev_recursion_level(void)
  7472. {
  7473. return this_cpu_read(xmit_recursion);
  7474. }
  7475. +static inline int xmit_rec_read(void)
  7476. +{
  7477. + return __this_cpu_read(xmit_recursion);
  7478. +}
  7479. +
  7480. +static inline void xmit_rec_inc(void)
  7481. +{
  7482. + __this_cpu_inc(xmit_recursion);
  7483. +}
  7484. +
  7485. +static inline void xmit_rec_dec(void)
  7486. +{
  7487. + __this_cpu_dec(xmit_recursion);
  7488. +}
  7489. +#endif
  7490. +
  7491. struct net_device *dev_get_by_index(struct net *net, int ifindex);
  7492. struct net_device *__dev_get_by_index(struct net *net, int ifindex);
  7493. struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
  7494. @@ -2856,6 +2907,7 @@ struct softnet_data {
  7495. unsigned int dropped;
  7496. struct sk_buff_head input_pkt_queue;
  7497. struct napi_struct backlog;
  7498. + struct sk_buff_head tofree_queue;
  7499. };
  7500. diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
  7501. index 2ad1a2b289b5..b4d10155af54 100644
  7502. --- a/include/linux/netfilter/x_tables.h
  7503. +++ b/include/linux/netfilter/x_tables.h
  7504. @@ -4,6 +4,7 @@
  7505. #include <linux/netdevice.h>
  7506. #include <linux/static_key.h>
  7507. +#include <linux/locallock.h>
  7508. #include <uapi/linux/netfilter/x_tables.h>
  7509. /* Test a struct->invflags and a boolean for inequality */
  7510. @@ -300,6 +301,8 @@ void xt_free_table_info(struct xt_table_info *info);
  7511. */
  7512. DECLARE_PER_CPU(seqcount_t, xt_recseq);
  7513. +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock);
  7514. +
  7515. /* xt_tee_enabled - true if x_tables needs to handle reentrancy
  7516. *
  7517. * Enabled if current ip(6)tables ruleset has at least one -j TEE rule.
  7518. @@ -320,6 +323,9 @@ static inline unsigned int xt_write_recseq_begin(void)
  7519. {
  7520. unsigned int addend;
  7521. + /* RT protection */
  7522. + local_lock(xt_write_lock);
  7523. +
  7524. /*
  7525. * Low order bit of sequence is set if we already
  7526. * called xt_write_recseq_begin().
  7527. @@ -350,6 +356,7 @@ static inline void xt_write_recseq_end(unsigned int addend)
  7528. /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
  7529. smp_wmb();
  7530. __this_cpu_add(xt_recseq.sequence, addend);
  7531. + local_unlock(xt_write_lock);
  7532. }
  7533. /*
  7534. diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
  7535. index 810124b33327..d54ca43d571f 100644
  7536. --- a/include/linux/nfs_fs.h
  7537. +++ b/include/linux/nfs_fs.h
  7538. @@ -165,7 +165,11 @@ struct nfs_inode {
  7539. /* Readers: in-flight sillydelete RPC calls */
  7540. /* Writers: rmdir */
  7541. +#ifdef CONFIG_PREEMPT_RT_BASE
  7542. + struct semaphore rmdir_sem;
  7543. +#else
  7544. struct rw_semaphore rmdir_sem;
  7545. +#endif
  7546. #if IS_ENABLED(CONFIG_NFS_V4)
  7547. struct nfs4_cached_acl *nfs4_acl;
  7548. diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
  7549. index 3bf867a0c3b3..71c6bdd14c8a 100644
  7550. --- a/include/linux/nfs_xdr.h
  7551. +++ b/include/linux/nfs_xdr.h
  7552. @@ -1490,7 +1490,7 @@ struct nfs_unlinkdata {
  7553. struct nfs_removeargs args;
  7554. struct nfs_removeres res;
  7555. struct dentry *dentry;
  7556. - wait_queue_head_t wq;
  7557. + struct swait_queue_head wq;
  7558. struct rpc_cred *cred;
  7559. struct nfs_fattr dir_attr;
  7560. long timeout;
  7561. diff --git a/include/linux/notifier.h b/include/linux/notifier.h
  7562. index 4149868de4e6..babe5b9bcb91 100644
  7563. --- a/include/linux/notifier.h
  7564. +++ b/include/linux/notifier.h
  7565. @@ -6,7 +6,7 @@
  7566. *
  7567. * Alan Cox <Alan.Cox@linux.org>
  7568. */
  7569. -
  7570. +
  7571. #ifndef _LINUX_NOTIFIER_H
  7572. #define _LINUX_NOTIFIER_H
  7573. #include <linux/errno.h>
  7574. @@ -42,9 +42,7 @@
  7575. * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
  7576. * As compensation, srcu_notifier_chain_unregister() is rather expensive.
  7577. * SRCU notifier chains should be used when the chain will be called very
  7578. - * often but notifier_blocks will seldom be removed. Also, SRCU notifier
  7579. - * chains are slightly more difficult to use because they require special
  7580. - * runtime initialization.
  7581. + * often but notifier_blocks will seldom be removed.
  7582. */
  7583. struct notifier_block;
  7584. @@ -90,7 +88,7 @@ struct srcu_notifier_head {
  7585. (name)->head = NULL; \
  7586. } while (0)
  7587. -/* srcu_notifier_heads must be initialized and cleaned up dynamically */
  7588. +/* srcu_notifier_heads must be cleaned up dynamically */
  7589. extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
  7590. #define srcu_cleanup_notifier_head(name) \
  7591. cleanup_srcu_struct(&(name)->srcu);
  7592. @@ -103,7 +101,13 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
  7593. .head = NULL }
  7594. #define RAW_NOTIFIER_INIT(name) { \
  7595. .head = NULL }
  7596. -/* srcu_notifier_heads cannot be initialized statically */
  7597. +
  7598. +#define SRCU_NOTIFIER_INIT(name, pcpu) \
  7599. + { \
  7600. + .mutex = __MUTEX_INITIALIZER(name.mutex), \
  7601. + .head = NULL, \
  7602. + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \
  7603. + }
  7604. #define ATOMIC_NOTIFIER_HEAD(name) \
  7605. struct atomic_notifier_head name = \
  7606. @@ -115,6 +119,18 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
  7607. struct raw_notifier_head name = \
  7608. RAW_NOTIFIER_INIT(name)
  7609. +#define _SRCU_NOTIFIER_HEAD(name, mod) \
  7610. + static DEFINE_PER_CPU(struct srcu_struct_array, \
  7611. + name##_head_srcu_array); \
  7612. + mod struct srcu_notifier_head name = \
  7613. + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array)
  7614. +
  7615. +#define SRCU_NOTIFIER_HEAD(name) \
  7616. + _SRCU_NOTIFIER_HEAD(name, )
  7617. +
  7618. +#define SRCU_NOTIFIER_HEAD_STATIC(name) \
  7619. + _SRCU_NOTIFIER_HEAD(name, static)
  7620. +
  7621. #ifdef __KERNEL__
  7622. extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
  7623. @@ -184,12 +200,12 @@ static inline int notifier_to_errno(int ret)
  7624. /*
  7625. * Declared notifiers so far. I can imagine quite a few more chains
  7626. - * over time (eg laptop power reset chains, reboot chain (to clean
  7627. + * over time (eg laptop power reset chains, reboot chain (to clean
  7628. * device units up), device [un]mount chain, module load/unload chain,
  7629. - * low memory chain, screenblank chain (for plug in modular screenblankers)
  7630. + * low memory chain, screenblank chain (for plug in modular screenblankers)
  7631. * VC switch chains (for loadable kernel svgalib VC switch helpers) etc...
  7632. */
  7633. -
  7634. +
  7635. /* CPU notfiers are defined in include/linux/cpu.h. */
  7636. /* netdevice notifiers are defined in include/linux/netdevice.h */
  7637. diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
  7638. index 5b2e6159b744..ea940f451606 100644
  7639. --- a/include/linux/percpu-rwsem.h
  7640. +++ b/include/linux/percpu-rwsem.h
  7641. @@ -4,7 +4,7 @@
  7642. #include <linux/atomic.h>
  7643. #include <linux/rwsem.h>
  7644. #include <linux/percpu.h>
  7645. -#include <linux/wait.h>
  7646. +#include <linux/swait.h>
  7647. #include <linux/rcu_sync.h>
  7648. #include <linux/lockdep.h>
  7649. @@ -12,7 +12,7 @@ struct percpu_rw_semaphore {
  7650. struct rcu_sync rss;
  7651. unsigned int __percpu *read_count;
  7652. struct rw_semaphore rw_sem;
  7653. - wait_queue_head_t writer;
  7654. + struct swait_queue_head writer;
  7655. int readers_block;
  7656. };
  7657. @@ -22,13 +22,13 @@ static struct percpu_rw_semaphore name = { \
  7658. .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \
  7659. .read_count = &__percpu_rwsem_rc_##name, \
  7660. .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
  7661. - .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer), \
  7662. + .writer = __SWAIT_QUEUE_HEAD_INITIALIZER(name.writer), \
  7663. }
  7664. extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
  7665. extern void __percpu_up_read(struct percpu_rw_semaphore *);
  7666. -static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem)
  7667. +static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
  7668. {
  7669. might_sleep();
  7670. @@ -46,16 +46,10 @@ static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *
  7671. __this_cpu_inc(*sem->read_count);
  7672. if (unlikely(!rcu_sync_is_idle(&sem->rss)))
  7673. __percpu_down_read(sem, false); /* Unconditional memory barrier */
  7674. - barrier();
  7675. /*
  7676. - * The barrier() prevents the compiler from
  7677. + * The preempt_enable() prevents the compiler from
  7678. * bleeding the critical section out.
  7679. */
  7680. -}
  7681. -
  7682. -static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
  7683. -{
  7684. - percpu_down_read_preempt_disable(sem);
  7685. preempt_enable();
  7686. }
  7687. @@ -82,13 +76,9 @@ static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
  7688. return ret;
  7689. }
  7690. -static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem)
  7691. +static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
  7692. {
  7693. - /*
  7694. - * The barrier() prevents the compiler from
  7695. - * bleeding the critical section out.
  7696. - */
  7697. - barrier();
  7698. + preempt_disable();
  7699. /*
  7700. * Same as in percpu_down_read().
  7701. */
  7702. @@ -101,12 +91,6 @@ static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem
  7703. rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
  7704. }
  7705. -static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
  7706. -{
  7707. - preempt_disable();
  7708. - percpu_up_read_preempt_enable(sem);
  7709. -}
  7710. -
  7711. extern void percpu_down_write(struct percpu_rw_semaphore *);
  7712. extern void percpu_up_write(struct percpu_rw_semaphore *);
  7713. diff --git a/include/linux/percpu.h b/include/linux/percpu.h
  7714. index 56939d3f6e53..b988bf40ad3e 100644
  7715. --- a/include/linux/percpu.h
  7716. +++ b/include/linux/percpu.h
  7717. @@ -18,6 +18,35 @@
  7718. #define PERCPU_MODULE_RESERVE 0
  7719. #endif
  7720. +#ifdef CONFIG_PREEMPT_RT_FULL
  7721. +
  7722. +#define get_local_var(var) (*({ \
  7723. + migrate_disable(); \
  7724. + this_cpu_ptr(&var); }))
  7725. +
  7726. +#define put_local_var(var) do { \
  7727. + (void)&(var); \
  7728. + migrate_enable(); \
  7729. +} while (0)
  7730. +
  7731. +# define get_local_ptr(var) ({ \
  7732. + migrate_disable(); \
  7733. + this_cpu_ptr(var); })
  7734. +
  7735. +# define put_local_ptr(var) do { \
  7736. + (void)(var); \
  7737. + migrate_enable(); \
  7738. +} while (0)
  7739. +
  7740. +#else
  7741. +
  7742. +#define get_local_var(var) get_cpu_var(var)
  7743. +#define put_local_var(var) put_cpu_var(var)
  7744. +#define get_local_ptr(var) get_cpu_ptr(var)
  7745. +#define put_local_ptr(var) put_cpu_ptr(var)
  7746. +
  7747. +#endif
  7748. +
  7749. /* minimum unit size, also is the maximum supported allocation size */
  7750. #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
  7751. @@ -110,6 +139,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
  7752. #endif
  7753. extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
  7754. +extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr);
  7755. extern bool is_kernel_percpu_address(unsigned long addr);
  7756. #if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
  7757. diff --git a/include/linux/pid.h b/include/linux/pid.h
  7758. index 97b745ddece5..01a5460a0c85 100644
  7759. --- a/include/linux/pid.h
  7760. +++ b/include/linux/pid.h
  7761. @@ -2,6 +2,7 @@
  7762. #define _LINUX_PID_H
  7763. #include <linux/rcupdate.h>
  7764. +#include <linux/atomic.h>
  7765. enum pid_type
  7766. {
  7767. diff --git a/include/linux/preempt.h b/include/linux/preempt.h
  7768. index 7eeceac52dea..f97c54265904 100644
  7769. --- a/include/linux/preempt.h
  7770. +++ b/include/linux/preempt.h
  7771. @@ -50,7 +50,11 @@
  7772. #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
  7773. #define NMI_OFFSET (1UL << NMI_SHIFT)
  7774. -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
  7775. +#ifndef CONFIG_PREEMPT_RT_FULL
  7776. +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
  7777. +#else
  7778. +# define SOFTIRQ_DISABLE_OFFSET (0)
  7779. +#endif
  7780. /* We use the MSB mostly because its available */
  7781. #define PREEMPT_NEED_RESCHED 0x80000000
  7782. @@ -59,9 +63,15 @@
  7783. #include <asm/preempt.h>
  7784. #define hardirq_count() (preempt_count() & HARDIRQ_MASK)
  7785. -#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
  7786. #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
  7787. | NMI_MASK))
  7788. +#ifndef CONFIG_PREEMPT_RT_FULL
  7789. +# define softirq_count() (preempt_count() & SOFTIRQ_MASK)
  7790. +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
  7791. +#else
  7792. +# define softirq_count() (0UL)
  7793. +extern int in_serving_softirq(void);
  7794. +#endif
  7795. /*
  7796. * Are we doing bottom half or hardware interrupt processing?
  7797. @@ -79,7 +89,6 @@
  7798. #define in_irq() (hardirq_count())
  7799. #define in_softirq() (softirq_count())
  7800. #define in_interrupt() (irq_count())
  7801. -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
  7802. #define in_nmi() (preempt_count() & NMI_MASK)
  7803. #define in_task() (!(preempt_count() & \
  7804. (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
  7805. @@ -96,7 +105,11 @@
  7806. /*
  7807. * The preempt_count offset after spin_lock()
  7808. */
  7809. +#if !defined(CONFIG_PREEMPT_RT_FULL)
  7810. #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
  7811. +#else
  7812. +#define PREEMPT_LOCK_OFFSET 0
  7813. +#endif
  7814. /*
  7815. * The preempt_count offset needed for things like:
  7816. @@ -145,6 +158,20 @@ extern void preempt_count_sub(int val);
  7817. #define preempt_count_inc() preempt_count_add(1)
  7818. #define preempt_count_dec() preempt_count_sub(1)
  7819. +#ifdef CONFIG_PREEMPT_LAZY
  7820. +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
  7821. +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
  7822. +#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
  7823. +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
  7824. +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
  7825. +#else
  7826. +#define add_preempt_lazy_count(val) do { } while (0)
  7827. +#define sub_preempt_lazy_count(val) do { } while (0)
  7828. +#define inc_preempt_lazy_count() do { } while (0)
  7829. +#define dec_preempt_lazy_count() do { } while (0)
  7830. +#define preempt_lazy_count() (0)
  7831. +#endif
  7832. +
  7833. #ifdef CONFIG_PREEMPT_COUNT
  7834. #define preempt_disable() \
  7835. @@ -153,13 +180,25 @@ do { \
  7836. barrier(); \
  7837. } while (0)
  7838. +#define preempt_lazy_disable() \
  7839. +do { \
  7840. + inc_preempt_lazy_count(); \
  7841. + barrier(); \
  7842. +} while (0)
  7843. +
  7844. #define sched_preempt_enable_no_resched() \
  7845. do { \
  7846. barrier(); \
  7847. preempt_count_dec(); \
  7848. } while (0)
  7849. -#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
  7850. +#ifdef CONFIG_PREEMPT_RT_BASE
  7851. +# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
  7852. +# define preempt_check_resched_rt() preempt_check_resched()
  7853. +#else
  7854. +# define preempt_enable_no_resched() preempt_enable()
  7855. +# define preempt_check_resched_rt() barrier();
  7856. +#endif
  7857. #define preemptible() (preempt_count() == 0 && !irqs_disabled())
  7858. @@ -184,6 +223,13 @@ do { \
  7859. __preempt_schedule(); \
  7860. } while (0)
  7861. +#define preempt_lazy_enable() \
  7862. +do { \
  7863. + dec_preempt_lazy_count(); \
  7864. + barrier(); \
  7865. + preempt_check_resched(); \
  7866. +} while (0)
  7867. +
  7868. #else /* !CONFIG_PREEMPT */
  7869. #define preempt_enable() \
  7870. do { \
  7871. @@ -229,6 +275,7 @@ do { \
  7872. #define preempt_disable_notrace() barrier()
  7873. #define preempt_enable_no_resched_notrace() barrier()
  7874. #define preempt_enable_notrace() barrier()
  7875. +#define preempt_check_resched_rt() barrier()
  7876. #define preemptible() 0
  7877. #endif /* CONFIG_PREEMPT_COUNT */
  7878. @@ -249,10 +296,31 @@ do { \
  7879. } while (0)
  7880. #define preempt_fold_need_resched() \
  7881. do { \
  7882. - if (tif_need_resched()) \
  7883. + if (tif_need_resched_now()) \
  7884. set_preempt_need_resched(); \
  7885. } while (0)
  7886. +#ifdef CONFIG_PREEMPT_RT_FULL
  7887. +# define preempt_disable_rt() preempt_disable()
  7888. +# define preempt_enable_rt() preempt_enable()
  7889. +# define preempt_disable_nort() barrier()
  7890. +# define preempt_enable_nort() barrier()
  7891. +# ifdef CONFIG_SMP
  7892. + extern void migrate_disable(void);
  7893. + extern void migrate_enable(void);
  7894. +# else /* CONFIG_SMP */
  7895. +# define migrate_disable() barrier()
  7896. +# define migrate_enable() barrier()
  7897. +# endif /* CONFIG_SMP */
  7898. +#else
  7899. +# define preempt_disable_rt() barrier()
  7900. +# define preempt_enable_rt() barrier()
  7901. +# define preempt_disable_nort() preempt_disable()
  7902. +# define preempt_enable_nort() preempt_enable()
  7903. +# define migrate_disable() preempt_disable()
  7904. +# define migrate_enable() preempt_enable()
  7905. +#endif
  7906. +
  7907. #ifdef CONFIG_PREEMPT_NOTIFIERS
  7908. struct preempt_notifier;
  7909. diff --git a/include/linux/printk.h b/include/linux/printk.h
  7910. index eac1af8502bb..37e647af0b0b 100644
  7911. --- a/include/linux/printk.h
  7912. +++ b/include/linux/printk.h
  7913. @@ -126,9 +126,11 @@ struct va_format {
  7914. #ifdef CONFIG_EARLY_PRINTK
  7915. extern asmlinkage __printf(1, 2)
  7916. void early_printk(const char *fmt, ...);
  7917. +extern void printk_kill(void);
  7918. #else
  7919. static inline __printf(1, 2) __cold
  7920. void early_printk(const char *s, ...) { }
  7921. +static inline void printk_kill(void) { }
  7922. #endif
  7923. #ifdef CONFIG_PRINTK_NMI
  7924. diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
  7925. index af3581b8a451..277295039c8f 100644
  7926. --- a/include/linux/radix-tree.h
  7927. +++ b/include/linux/radix-tree.h
  7928. @@ -292,6 +292,8 @@ unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
  7929. int radix_tree_preload(gfp_t gfp_mask);
  7930. int radix_tree_maybe_preload(gfp_t gfp_mask);
  7931. int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order);
  7932. +void radix_tree_preload_end(void);
  7933. +
  7934. void radix_tree_init(void);
  7935. void *radix_tree_tag_set(struct radix_tree_root *root,
  7936. unsigned long index, unsigned int tag);
  7937. @@ -314,11 +316,6 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
  7938. int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
  7939. unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);
  7940. -static inline void radix_tree_preload_end(void)
  7941. -{
  7942. - preempt_enable();
  7943. -}
  7944. -
  7945. /**
  7946. * struct radix_tree_iter - radix tree iterator state
  7947. *
  7948. diff --git a/include/linux/random.h b/include/linux/random.h
  7949. index 16ab429735a7..9d0fecb5b6c2 100644
  7950. --- a/include/linux/random.h
  7951. +++ b/include/linux/random.h
  7952. @@ -31,7 +31,7 @@ static inline void add_latent_entropy(void) {}
  7953. extern void add_input_randomness(unsigned int type, unsigned int code,
  7954. unsigned int value) __latent_entropy;
  7955. -extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy;
  7956. +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) __latent_entropy;
  7957. extern void get_random_bytes(void *buf, int nbytes);
  7958. extern int add_random_ready_callback(struct random_ready_callback *rdy);
  7959. diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
  7960. index e585018498d5..25c64474fc27 100644
  7961. --- a/include/linux/rbtree.h
  7962. +++ b/include/linux/rbtree.h
  7963. @@ -31,7 +31,7 @@
  7964. #include <linux/kernel.h>
  7965. #include <linux/stddef.h>
  7966. -#include <linux/rcupdate.h>
  7967. +#include <linux/rcu_assign_pointer.h>
  7968. struct rb_node {
  7969. unsigned long __rb_parent_color;
  7970. diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
  7971. index d076183e49be..36bfb4dd57ae 100644
  7972. --- a/include/linux/rbtree_augmented.h
  7973. +++ b/include/linux/rbtree_augmented.h
  7974. @@ -26,6 +26,7 @@
  7975. #include <linux/compiler.h>
  7976. #include <linux/rbtree.h>
  7977. +#include <linux/rcupdate.h>
  7978. /*
  7979. * Please note - only struct rb_augment_callbacks and the prototypes for
  7980. diff --git a/include/linux/rcu_assign_pointer.h b/include/linux/rcu_assign_pointer.h
  7981. new file mode 100644
  7982. index 000000000000..7066962a4379
  7983. --- /dev/null
  7984. +++ b/include/linux/rcu_assign_pointer.h
  7985. @@ -0,0 +1,54 @@
  7986. +#ifndef __LINUX_RCU_ASSIGN_POINTER_H__
  7987. +#define __LINUX_RCU_ASSIGN_POINTER_H__
  7988. +#include <linux/compiler.h>
  7989. +#include <asm/barrier.h>
  7990. +
  7991. +/**
  7992. + * RCU_INITIALIZER() - statically initialize an RCU-protected global variable
  7993. + * @v: The value to statically initialize with.
  7994. + */
  7995. +#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)
  7996. +
  7997. +/**
  7998. + * rcu_assign_pointer() - assign to RCU-protected pointer
  7999. + * @p: pointer to assign to
  8000. + * @v: value to assign (publish)
  8001. + *
  8002. + * Assigns the specified value to the specified RCU-protected
  8003. + * pointer, ensuring that any concurrent RCU readers will see
  8004. + * any prior initialization.
  8005. + *
  8006. + * Inserts memory barriers on architectures that require them
  8007. + * (which is most of them), and also prevents the compiler from
  8008. + * reordering the code that initializes the structure after the pointer
  8009. + * assignment. More importantly, this call documents which pointers
  8010. + * will be dereferenced by RCU read-side code.
  8011. + *
  8012. + * In some special cases, you may use RCU_INIT_POINTER() instead
  8013. + * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due
  8014. + * to the fact that it does not constrain either the CPU or the compiler.
  8015. + * That said, using RCU_INIT_POINTER() when you should have used
  8016. + * rcu_assign_pointer() is a very bad thing that results in
  8017. + * impossible-to-diagnose memory corruption. So please be careful.
  8018. + * See the RCU_INIT_POINTER() comment header for details.
  8019. + *
  8020. + * Note that rcu_assign_pointer() evaluates each of its arguments only
  8021. + * once, appearances notwithstanding. One of the "extra" evaluations
  8022. + * is in typeof() and the other visible only to sparse (__CHECKER__),
  8023. + * neither of which actually execute the argument. As with most cpp
  8024. + * macros, this execute-arguments-only-once property is important, so
  8025. + * please be careful when making changes to rcu_assign_pointer() and the
  8026. + * other macros that it invokes.
  8027. + */
  8028. +#define rcu_assign_pointer(p, v) \
  8029. +({ \
  8030. + uintptr_t _r_a_p__v = (uintptr_t)(v); \
  8031. + \
  8032. + if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \
  8033. + WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \
  8034. + else \
  8035. + smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
  8036. + _r_a_p__v; \
  8037. +})
  8038. +
  8039. +#endif
  8040. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
  8041. index 01f71e1d2e94..30cc001d0d5a 100644
  8042. --- a/include/linux/rcupdate.h
  8043. +++ b/include/linux/rcupdate.h
  8044. @@ -46,6 +46,7 @@
  8045. #include <linux/compiler.h>
  8046. #include <linux/ktime.h>
  8047. #include <linux/irqflags.h>
  8048. +#include <linux/rcu_assign_pointer.h>
  8049. #include <asm/barrier.h>
  8050. @@ -178,6 +179,9 @@ void call_rcu(struct rcu_head *head,
  8051. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  8052. +#ifdef CONFIG_PREEMPT_RT_FULL
  8053. +#define call_rcu_bh call_rcu
  8054. +#else
  8055. /**
  8056. * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
  8057. * @head: structure to be used for queueing the RCU updates.
  8058. @@ -201,6 +205,7 @@ void call_rcu(struct rcu_head *head,
  8059. */
  8060. void call_rcu_bh(struct rcu_head *head,
  8061. rcu_callback_t func);
  8062. +#endif
  8063. /**
  8064. * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
  8065. @@ -301,6 +306,11 @@ void synchronize_rcu(void);
  8066. * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
  8067. */
  8068. #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
  8069. +#ifndef CONFIG_PREEMPT_RT_FULL
  8070. +#define sched_rcu_preempt_depth() rcu_preempt_depth()
  8071. +#else
  8072. +static inline int sched_rcu_preempt_depth(void) { return 0; }
  8073. +#endif
  8074. #else /* #ifdef CONFIG_PREEMPT_RCU */
  8075. @@ -326,6 +336,8 @@ static inline int rcu_preempt_depth(void)
  8076. return 0;
  8077. }
  8078. +#define sched_rcu_preempt_depth() rcu_preempt_depth()
  8079. +
  8080. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  8081. /* Internal to kernel */
  8082. @@ -505,7 +517,14 @@ extern struct lockdep_map rcu_callback_map;
  8083. int debug_lockdep_rcu_enabled(void);
  8084. int rcu_read_lock_held(void);
  8085. +#ifdef CONFIG_PREEMPT_RT_FULL
  8086. +static inline int rcu_read_lock_bh_held(void)
  8087. +{
  8088. + return rcu_read_lock_held();
  8089. +}
  8090. +#else
  8091. int rcu_read_lock_bh_held(void);
  8092. +#endif
  8093. /**
  8094. * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
  8095. @@ -625,54 +644,6 @@ static inline void rcu_preempt_sleep_check(void)
  8096. ((typeof(*p) __force __kernel *)(________p1)); \
  8097. })
  8098. -/**
  8099. - * RCU_INITIALIZER() - statically initialize an RCU-protected global variable
  8100. - * @v: The value to statically initialize with.
  8101. - */
  8102. -#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)
  8103. -
  8104. -/**
  8105. - * rcu_assign_pointer() - assign to RCU-protected pointer
  8106. - * @p: pointer to assign to
  8107. - * @v: value to assign (publish)
  8108. - *
  8109. - * Assigns the specified value to the specified RCU-protected
  8110. - * pointer, ensuring that any concurrent RCU readers will see
  8111. - * any prior initialization.
  8112. - *
  8113. - * Inserts memory barriers on architectures that require them
  8114. - * (which is most of them), and also prevents the compiler from
  8115. - * reordering the code that initializes the structure after the pointer
  8116. - * assignment. More importantly, this call documents which pointers
  8117. - * will be dereferenced by RCU read-side code.
  8118. - *
  8119. - * In some special cases, you may use RCU_INIT_POINTER() instead
  8120. - * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due
  8121. - * to the fact that it does not constrain either the CPU or the compiler.
  8122. - * That said, using RCU_INIT_POINTER() when you should have used
  8123. - * rcu_assign_pointer() is a very bad thing that results in
  8124. - * impossible-to-diagnose memory corruption. So please be careful.
  8125. - * See the RCU_INIT_POINTER() comment header for details.
  8126. - *
  8127. - * Note that rcu_assign_pointer() evaluates each of its arguments only
  8128. - * once, appearances notwithstanding. One of the "extra" evaluations
  8129. - * is in typeof() and the other visible only to sparse (__CHECKER__),
  8130. - * neither of which actually execute the argument. As with most cpp
  8131. - * macros, this execute-arguments-only-once property is important, so
  8132. - * please be careful when making changes to rcu_assign_pointer() and the
  8133. - * other macros that it invokes.
  8134. - */
  8135. -#define rcu_assign_pointer(p, v) \
  8136. -({ \
  8137. - uintptr_t _r_a_p__v = (uintptr_t)(v); \
  8138. - \
  8139. - if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \
  8140. - WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \
  8141. - else \
  8142. - smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
  8143. - _r_a_p__v; \
  8144. -})
  8145. -
  8146. /**
  8147. * rcu_access_pointer() - fetch RCU pointer with no dereferencing
  8148. * @p: The pointer to read
  8149. @@ -951,10 +922,14 @@ static inline void rcu_read_unlock(void)
  8150. static inline void rcu_read_lock_bh(void)
  8151. {
  8152. local_bh_disable();
  8153. +#ifdef CONFIG_PREEMPT_RT_FULL
  8154. + rcu_read_lock();
  8155. +#else
  8156. __acquire(RCU_BH);
  8157. rcu_lock_acquire(&rcu_bh_lock_map);
  8158. RCU_LOCKDEP_WARN(!rcu_is_watching(),
  8159. "rcu_read_lock_bh() used illegally while idle");
  8160. +#endif
  8161. }
  8162. /*
  8163. @@ -964,10 +939,14 @@ static inline void rcu_read_lock_bh(void)
  8164. */
  8165. static inline void rcu_read_unlock_bh(void)
  8166. {
  8167. +#ifdef CONFIG_PREEMPT_RT_FULL
  8168. + rcu_read_unlock();
  8169. +#else
  8170. RCU_LOCKDEP_WARN(!rcu_is_watching(),
  8171. "rcu_read_unlock_bh() used illegally while idle");
  8172. rcu_lock_release(&rcu_bh_lock_map);
  8173. __release(RCU_BH);
  8174. +#endif
  8175. local_bh_enable();
  8176. }
  8177. diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
  8178. index 63a4e4cf40a5..08ab12df2863 100644
  8179. --- a/include/linux/rcutree.h
  8180. +++ b/include/linux/rcutree.h
  8181. @@ -44,7 +44,11 @@ static inline void rcu_virt_note_context_switch(int cpu)
  8182. rcu_note_context_switch();
  8183. }
  8184. +#ifdef CONFIG_PREEMPT_RT_FULL
  8185. +# define synchronize_rcu_bh synchronize_rcu
  8186. +#else
  8187. void synchronize_rcu_bh(void);
  8188. +#endif
  8189. void synchronize_sched_expedited(void);
  8190. void synchronize_rcu_expedited(void);
  8191. @@ -72,7 +76,11 @@ static inline void synchronize_rcu_bh_expedited(void)
  8192. }
  8193. void rcu_barrier(void);
  8194. +#ifdef CONFIG_PREEMPT_RT_FULL
  8195. +# define rcu_barrier_bh rcu_barrier
  8196. +#else
  8197. void rcu_barrier_bh(void);
  8198. +#endif
  8199. void rcu_barrier_sched(void);
  8200. unsigned long get_state_synchronize_rcu(void);
  8201. void cond_synchronize_rcu(unsigned long oldstate);
  8202. @@ -82,17 +90,14 @@ void cond_synchronize_sched(unsigned long oldstate);
  8203. extern unsigned long rcutorture_testseq;
  8204. extern unsigned long rcutorture_vernum;
  8205. unsigned long rcu_batches_started(void);
  8206. -unsigned long rcu_batches_started_bh(void);
  8207. unsigned long rcu_batches_started_sched(void);
  8208. unsigned long rcu_batches_completed(void);
  8209. -unsigned long rcu_batches_completed_bh(void);
  8210. unsigned long rcu_batches_completed_sched(void);
  8211. unsigned long rcu_exp_batches_completed(void);
  8212. unsigned long rcu_exp_batches_completed_sched(void);
  8213. void show_rcu_gp_kthreads(void);
  8214. void rcu_force_quiescent_state(void);
  8215. -void rcu_bh_force_quiescent_state(void);
  8216. void rcu_sched_force_quiescent_state(void);
  8217. void rcu_idle_enter(void);
  8218. @@ -109,6 +114,16 @@ extern int rcu_scheduler_active __read_mostly;
  8219. bool rcu_is_watching(void);
  8220. +#ifndef CONFIG_PREEMPT_RT_FULL
  8221. +void rcu_bh_force_quiescent_state(void);
  8222. +unsigned long rcu_batches_started_bh(void);
  8223. +unsigned long rcu_batches_completed_bh(void);
  8224. +#else
  8225. +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state
  8226. +# define rcu_batches_completed_bh rcu_batches_completed
  8227. +# define rcu_batches_started_bh rcu_batches_completed
  8228. +#endif
  8229. +
  8230. void rcu_all_qs(void);
  8231. /* RCUtree hotplug events */
  8232. diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
  8233. index 1abba5ce2a2f..294a8b4875f1 100644
  8234. --- a/include/linux/rtmutex.h
  8235. +++ b/include/linux/rtmutex.h
  8236. @@ -13,11 +13,15 @@
  8237. #define __LINUX_RT_MUTEX_H
  8238. #include <linux/linkage.h>
  8239. +#include <linux/spinlock_types_raw.h>
  8240. #include <linux/rbtree.h>
  8241. -#include <linux/spinlock_types.h>
  8242. extern int max_lock_depth; /* for sysctl */
  8243. +#ifdef CONFIG_DEBUG_MUTEXES
  8244. +#include <linux/debug_locks.h>
  8245. +#endif
  8246. +
  8247. /**
  8248. * The rt_mutex structure
  8249. *
  8250. @@ -31,8 +35,8 @@ struct rt_mutex {
  8251. struct rb_root waiters;
  8252. struct rb_node *waiters_leftmost;
  8253. struct task_struct *owner;
  8254. -#ifdef CONFIG_DEBUG_RT_MUTEXES
  8255. int save_state;
  8256. +#ifdef CONFIG_DEBUG_RT_MUTEXES
  8257. const char *name, *file;
  8258. int line;
  8259. void *magic;
  8260. @@ -55,22 +59,33 @@ struct hrtimer_sleeper;
  8261. # define rt_mutex_debug_check_no_locks_held(task) do { } while (0)
  8262. #endif
  8263. +# define rt_mutex_init(mutex) \
  8264. + do { \
  8265. + raw_spin_lock_init(&(mutex)->wait_lock); \
  8266. + __rt_mutex_init(mutex, #mutex); \
  8267. + } while (0)
  8268. +
  8269. #ifdef CONFIG_DEBUG_RT_MUTEXES
  8270. # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
  8271. , .name = #mutexname, .file = __FILE__, .line = __LINE__
  8272. -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__)
  8273. extern void rt_mutex_debug_task_free(struct task_struct *tsk);
  8274. #else
  8275. # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
  8276. -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL)
  8277. # define rt_mutex_debug_task_free(t) do { } while (0)
  8278. #endif
  8279. -#define __RT_MUTEX_INITIALIZER(mutexname) \
  8280. - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  8281. +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
  8282. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  8283. , .waiters = RB_ROOT \
  8284. , .owner = NULL \
  8285. - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
  8286. + __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
  8287. +
  8288. +#define __RT_MUTEX_INITIALIZER(mutexname) \
  8289. + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
  8290. +
  8291. +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
  8292. + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
  8293. + , .save_state = 1 }
  8294. #define DEFINE_RT_MUTEX(mutexname) \
  8295. struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
  8296. @@ -90,7 +105,9 @@ extern void __rt_mutex_init(struct rt_mutex *lock, const char *name);
  8297. extern void rt_mutex_destroy(struct rt_mutex *lock);
  8298. extern void rt_mutex_lock(struct rt_mutex *lock);
  8299. +extern int rt_mutex_lock_state(struct rt_mutex *lock, int state);
  8300. extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
  8301. +extern int rt_mutex_lock_killable(struct rt_mutex *lock);
  8302. extern int rt_mutex_timed_lock(struct rt_mutex *lock,
  8303. struct hrtimer_sleeper *timeout);
  8304. diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h
  8305. new file mode 100644
  8306. index 000000000000..49ed2d45d3be
  8307. --- /dev/null
  8308. +++ b/include/linux/rwlock_rt.h
  8309. @@ -0,0 +1,99 @@
  8310. +#ifndef __LINUX_RWLOCK_RT_H
  8311. +#define __LINUX_RWLOCK_RT_H
  8312. +
  8313. +#ifndef __LINUX_SPINLOCK_H
  8314. +#error Do not include directly. Use spinlock.h
  8315. +#endif
  8316. +
  8317. +#define rwlock_init(rwl) \
  8318. +do { \
  8319. + static struct lock_class_key __key; \
  8320. + \
  8321. + rt_mutex_init(&(rwl)->lock); \
  8322. + __rt_rwlock_init(rwl, #rwl, &__key); \
  8323. +} while (0)
  8324. +
  8325. +extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
  8326. +extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
  8327. +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
  8328. +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags);
  8329. +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
  8330. +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
  8331. +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
  8332. +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock);
  8333. +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock);
  8334. +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
  8335. +
  8336. +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock))
  8337. +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock))
  8338. +
  8339. +#define write_trylock_irqsave(lock, flags) \
  8340. + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags))
  8341. +
  8342. +#define read_lock_irqsave(lock, flags) \
  8343. + do { \
  8344. + typecheck(unsigned long, flags); \
  8345. + flags = rt_read_lock_irqsave(lock); \
  8346. + } while (0)
  8347. +
  8348. +#define write_lock_irqsave(lock, flags) \
  8349. + do { \
  8350. + typecheck(unsigned long, flags); \
  8351. + flags = rt_write_lock_irqsave(lock); \
  8352. + } while (0)
  8353. +
  8354. +#define read_lock(lock) rt_read_lock(lock)
  8355. +
  8356. +#define read_lock_bh(lock) \
  8357. + do { \
  8358. + local_bh_disable(); \
  8359. + rt_read_lock(lock); \
  8360. + } while (0)
  8361. +
  8362. +#define read_lock_irq(lock) read_lock(lock)
  8363. +
  8364. +#define write_lock(lock) rt_write_lock(lock)
  8365. +
  8366. +#define write_lock_bh(lock) \
  8367. + do { \
  8368. + local_bh_disable(); \
  8369. + rt_write_lock(lock); \
  8370. + } while (0)
  8371. +
  8372. +#define write_lock_irq(lock) write_lock(lock)
  8373. +
  8374. +#define read_unlock(lock) rt_read_unlock(lock)
  8375. +
  8376. +#define read_unlock_bh(lock) \
  8377. + do { \
  8378. + rt_read_unlock(lock); \
  8379. + local_bh_enable(); \
  8380. + } while (0)
  8381. +
  8382. +#define read_unlock_irq(lock) read_unlock(lock)
  8383. +
  8384. +#define write_unlock(lock) rt_write_unlock(lock)
  8385. +
  8386. +#define write_unlock_bh(lock) \
  8387. + do { \
  8388. + rt_write_unlock(lock); \
  8389. + local_bh_enable(); \
  8390. + } while (0)
  8391. +
  8392. +#define write_unlock_irq(lock) write_unlock(lock)
  8393. +
  8394. +#define read_unlock_irqrestore(lock, flags) \
  8395. + do { \
  8396. + typecheck(unsigned long, flags); \
  8397. + (void) flags; \
  8398. + rt_read_unlock(lock); \
  8399. + } while (0)
  8400. +
  8401. +#define write_unlock_irqrestore(lock, flags) \
  8402. + do { \
  8403. + typecheck(unsigned long, flags); \
  8404. + (void) flags; \
  8405. + rt_write_unlock(lock); \
  8406. + } while (0)
  8407. +
  8408. +#endif
  8409. diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
  8410. index cc0072e93e36..5317cd957292 100644
  8411. --- a/include/linux/rwlock_types.h
  8412. +++ b/include/linux/rwlock_types.h
  8413. @@ -1,6 +1,10 @@
  8414. #ifndef __LINUX_RWLOCK_TYPES_H
  8415. #define __LINUX_RWLOCK_TYPES_H
  8416. +#if !defined(__LINUX_SPINLOCK_TYPES_H)
  8417. +# error "Do not include directly, include spinlock_types.h"
  8418. +#endif
  8419. +
  8420. /*
  8421. * include/linux/rwlock_types.h - generic rwlock type definitions
  8422. * and initializers
  8423. diff --git a/include/linux/rwlock_types_rt.h b/include/linux/rwlock_types_rt.h
  8424. new file mode 100644
  8425. index 000000000000..51b28d775fe1
  8426. --- /dev/null
  8427. +++ b/include/linux/rwlock_types_rt.h
  8428. @@ -0,0 +1,33 @@
  8429. +#ifndef __LINUX_RWLOCK_TYPES_RT_H
  8430. +#define __LINUX_RWLOCK_TYPES_RT_H
  8431. +
  8432. +#ifndef __LINUX_SPINLOCK_TYPES_H
  8433. +#error "Do not include directly. Include spinlock_types.h instead"
  8434. +#endif
  8435. +
  8436. +/*
  8437. + * rwlocks - rtmutex which allows single reader recursion
  8438. + */
  8439. +typedef struct {
  8440. + struct rt_mutex lock;
  8441. + int read_depth;
  8442. + unsigned int break_lock;
  8443. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8444. + struct lockdep_map dep_map;
  8445. +#endif
  8446. +} rwlock_t;
  8447. +
  8448. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8449. +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  8450. +#else
  8451. +# define RW_DEP_MAP_INIT(lockname)
  8452. +#endif
  8453. +
  8454. +#define __RW_LOCK_UNLOCKED(name) \
  8455. + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \
  8456. + RW_DEP_MAP_INIT(name) }
  8457. +
  8458. +#define DEFINE_RWLOCK(name) \
  8459. + rwlock_t name = __RW_LOCK_UNLOCKED(name)
  8460. +
  8461. +#endif
  8462. diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
  8463. index dd1d14250340..aa2ac1f65c2d 100644
  8464. --- a/include/linux/rwsem.h
  8465. +++ b/include/linux/rwsem.h
  8466. @@ -19,6 +19,10 @@
  8467. #include <linux/osq_lock.h>
  8468. #endif
  8469. +#ifdef CONFIG_PREEMPT_RT_FULL
  8470. +#include <linux/rwsem_rt.h>
  8471. +#else /* PREEMPT_RT_FULL */
  8472. +
  8473. struct rw_semaphore;
  8474. #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
  8475. @@ -106,6 +110,13 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem)
  8476. return !list_empty(&sem->wait_list);
  8477. }
  8478. +#endif /* !PREEMPT_RT_FULL */
  8479. +
  8480. +/*
  8481. + * The functions below are the same for all rwsem implementations including
  8482. + * the RT specific variant.
  8483. + */
  8484. +
  8485. /*
  8486. * lock for reading
  8487. */
  8488. diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h
  8489. new file mode 100644
  8490. index 000000000000..2ffbf093ae92
  8491. --- /dev/null
  8492. +++ b/include/linux/rwsem_rt.h
  8493. @@ -0,0 +1,67 @@
  8494. +#ifndef _LINUX_RWSEM_RT_H
  8495. +#define _LINUX_RWSEM_RT_H
  8496. +
  8497. +#ifndef _LINUX_RWSEM_H
  8498. +#error "Include rwsem.h"
  8499. +#endif
  8500. +
  8501. +#include <linux/rtmutex.h>
  8502. +#include <linux/swait.h>
  8503. +
  8504. +#define READER_BIAS (1U << 31)
  8505. +#define WRITER_BIAS (1U << 30)
  8506. +
  8507. +struct rw_semaphore {
  8508. + atomic_t readers;
  8509. + struct rt_mutex rtmutex;
  8510. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8511. + struct lockdep_map dep_map;
  8512. +#endif
  8513. +};
  8514. +
  8515. +#define __RWSEM_INITIALIZER(name) \
  8516. +{ \
  8517. + .readers = ATOMIC_INIT(READER_BIAS), \
  8518. + .rtmutex = __RT_MUTEX_INITIALIZER(name.rtmutex), \
  8519. + RW_DEP_MAP_INIT(name) \
  8520. +}
  8521. +
  8522. +#define DECLARE_RWSEM(lockname) \
  8523. + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
  8524. +
  8525. +extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name,
  8526. + struct lock_class_key *key);
  8527. +
  8528. +#define __init_rwsem(sem, name, key) \
  8529. +do { \
  8530. + rt_mutex_init(&(sem)->rtmutex); \
  8531. + __rwsem_init((sem), (name), (key)); \
  8532. +} while (0)
  8533. +
  8534. +#define init_rwsem(sem) \
  8535. +do { \
  8536. + static struct lock_class_key __key; \
  8537. + \
  8538. + __init_rwsem((sem), #sem, &__key); \
  8539. +} while (0)
  8540. +
  8541. +static inline int rwsem_is_locked(struct rw_semaphore *sem)
  8542. +{
  8543. + return atomic_read(&sem->readers) != READER_BIAS;
  8544. +}
  8545. +
  8546. +static inline int rwsem_is_contended(struct rw_semaphore *sem)
  8547. +{
  8548. + return atomic_read(&sem->readers) > 0;
  8549. +}
  8550. +
  8551. +extern void __down_read(struct rw_semaphore *sem);
  8552. +extern int __down_read_trylock(struct rw_semaphore *sem);
  8553. +extern void __down_write(struct rw_semaphore *sem);
  8554. +extern int __must_check __down_write_killable(struct rw_semaphore *sem);
  8555. +extern int __down_write_trylock(struct rw_semaphore *sem);
  8556. +extern void __up_read(struct rw_semaphore *sem);
  8557. +extern void __up_write(struct rw_semaphore *sem);
  8558. +extern void __downgrade_write(struct rw_semaphore *sem);
  8559. +
  8560. +#endif
  8561. diff --git a/include/linux/sched.h b/include/linux/sched.h
  8562. index a4d0afc009a7..e775696b480a 100644
  8563. --- a/include/linux/sched.h
  8564. +++ b/include/linux/sched.h
  8565. @@ -26,6 +26,7 @@ struct sched_param {
  8566. #include <linux/nodemask.h>
  8567. #include <linux/mm_types.h>
  8568. #include <linux/preempt.h>
  8569. +#include <asm/kmap_types.h>
  8570. #include <asm/page.h>
  8571. #include <asm/ptrace.h>
  8572. @@ -236,17 +237,13 @@ extern char ___assert_task_state[1 - 2*!!(
  8573. /* Convenience macros for the sake of wake_up */
  8574. #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
  8575. -#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
  8576. /* get_task_state() */
  8577. #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \
  8578. TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
  8579. __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
  8580. -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
  8581. #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
  8582. -#define task_is_stopped_or_traced(task) \
  8583. - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
  8584. #define task_contributes_to_load(task) \
  8585. ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
  8586. (task->flags & PF_FROZEN) == 0 && \
  8587. @@ -312,6 +309,11 @@ extern char ___assert_task_state[1 - 2*!!(
  8588. #endif
  8589. +#define __set_current_state_no_track(state_value) \
  8590. + do { current->state = (state_value); } while (0)
  8591. +#define set_current_state_no_track(state_value) \
  8592. + set_mb(current->state, (state_value))
  8593. +
  8594. /* Task command name length */
  8595. #define TASK_COMM_LEN 16
  8596. @@ -1022,9 +1024,31 @@ struct wake_q_head {
  8597. #define WAKE_Q(name) \
  8598. struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
  8599. -extern void wake_q_add(struct wake_q_head *head,
  8600. - struct task_struct *task);
  8601. -extern void wake_up_q(struct wake_q_head *head);
  8602. +extern void __wake_q_add(struct wake_q_head *head,
  8603. + struct task_struct *task, bool sleeper);
  8604. +static inline void wake_q_add(struct wake_q_head *head,
  8605. + struct task_struct *task)
  8606. +{
  8607. + __wake_q_add(head, task, false);
  8608. +}
  8609. +
  8610. +static inline void wake_q_add_sleeper(struct wake_q_head *head,
  8611. + struct task_struct *task)
  8612. +{
  8613. + __wake_q_add(head, task, true);
  8614. +}
  8615. +
  8616. +extern void __wake_up_q(struct wake_q_head *head, bool sleeper);
  8617. +
  8618. +static inline void wake_up_q(struct wake_q_head *head)
  8619. +{
  8620. + __wake_up_q(head, false);
  8621. +}
  8622. +
  8623. +static inline void wake_up_q_sleeper(struct wake_q_head *head)
  8624. +{
  8625. + __wake_up_q(head, true);
  8626. +}
  8627. /*
  8628. * sched-domains (multiprocessor balancing) declarations:
  8629. @@ -1491,6 +1515,7 @@ struct task_struct {
  8630. struct thread_info thread_info;
  8631. #endif
  8632. volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
  8633. + volatile long saved_state; /* saved state for "spinlock sleepers" */
  8634. void *stack;
  8635. atomic_t usage;
  8636. unsigned int flags; /* per process flags, defined below */
  8637. @@ -1530,6 +1555,13 @@ struct task_struct {
  8638. #endif
  8639. unsigned int policy;
  8640. +#ifdef CONFIG_PREEMPT_RT_FULL
  8641. + int migrate_disable;
  8642. + int migrate_disable_update;
  8643. +# ifdef CONFIG_SCHED_DEBUG
  8644. + int migrate_disable_atomic;
  8645. +# endif
  8646. +#endif
  8647. int nr_cpus_allowed;
  8648. cpumask_t cpus_allowed;
  8649. @@ -1668,6 +1700,9 @@ struct task_struct {
  8650. struct task_cputime cputime_expires;
  8651. struct list_head cpu_timers[3];
  8652. +#ifdef CONFIG_PREEMPT_RT_BASE
  8653. + struct task_struct *posix_timer_list;
  8654. +#endif
  8655. /* process credentials */
  8656. const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
  8657. @@ -1699,10 +1734,15 @@ struct task_struct {
  8658. /* signal handlers */
  8659. struct signal_struct *signal;
  8660. struct sighand_struct *sighand;
  8661. + struct sigqueue *sigqueue_cache;
  8662. sigset_t blocked, real_blocked;
  8663. sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
  8664. struct sigpending pending;
  8665. +#ifdef CONFIG_PREEMPT_RT_FULL
  8666. + /* TODO: move me into ->restart_block ? */
  8667. + struct siginfo forced_info;
  8668. +#endif
  8669. unsigned long sas_ss_sp;
  8670. size_t sas_ss_size;
  8671. @@ -1728,11 +1768,14 @@ struct task_struct {
  8672. raw_spinlock_t pi_lock;
  8673. struct wake_q_node wake_q;
  8674. + struct wake_q_node wake_q_sleeper;
  8675. #ifdef CONFIG_RT_MUTEXES
  8676. /* PI waiters blocked on a rt_mutex held by this task */
  8677. struct rb_root pi_waiters;
  8678. struct rb_node *pi_waiters_leftmost;
  8679. + /* Updated under owner's pi_lock and rq lock */
  8680. + struct task_struct *pi_top_task;
  8681. /* Deadlock detection and priority inheritance handling */
  8682. struct rt_mutex_waiter *pi_blocked_on;
  8683. #endif
  8684. @@ -1931,6 +1974,12 @@ struct task_struct {
  8685. /* bitmask and counter of trace recursion */
  8686. unsigned long trace_recursion;
  8687. #endif /* CONFIG_TRACING */
  8688. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  8689. + u64 preempt_timestamp_hist;
  8690. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  8691. + long timer_offset;
  8692. +#endif
  8693. +#endif
  8694. #ifdef CONFIG_KCOV
  8695. /* Coverage collection mode enabled for this task (0 if disabled). */
  8696. enum kcov_mode kcov_mode;
  8697. @@ -1956,8 +2005,22 @@ struct task_struct {
  8698. unsigned int sequential_io;
  8699. unsigned int sequential_io_avg;
  8700. #endif
  8701. +#ifdef CONFIG_PREEMPT_RT_BASE
  8702. + struct rcu_head put_rcu;
  8703. + int softirq_nestcnt;
  8704. + unsigned int softirqs_raised;
  8705. +#endif
  8706. +#ifdef CONFIG_PREEMPT_RT_FULL
  8707. +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
  8708. + int kmap_idx;
  8709. + pte_t kmap_pte[KM_TYPE_NR];
  8710. +# endif
  8711. +#endif
  8712. #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  8713. unsigned long task_state_change;
  8714. +#endif
  8715. +#ifdef CONFIG_PREEMPT_RT_FULL
  8716. + int xmit_recursion;
  8717. #endif
  8718. int pagefault_disabled;
  8719. #ifdef CONFIG_MMU
  8720. @@ -1998,14 +2061,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
  8721. }
  8722. #endif
  8723. -/* Future-safe accessor for struct task_struct's cpus_allowed. */
  8724. -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
  8725. -
  8726. -static inline int tsk_nr_cpus_allowed(struct task_struct *p)
  8727. -{
  8728. - return p->nr_cpus_allowed;
  8729. -}
  8730. -
  8731. #define TNF_MIGRATED 0x01
  8732. #define TNF_NO_GROUP 0x02
  8733. #define TNF_SHARED 0x04
  8734. @@ -2225,6 +2280,15 @@ extern struct pid *cad_pid;
  8735. extern void free_task(struct task_struct *tsk);
  8736. #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
  8737. +#ifdef CONFIG_PREEMPT_RT_BASE
  8738. +extern void __put_task_struct_cb(struct rcu_head *rhp);
  8739. +
  8740. +static inline void put_task_struct(struct task_struct *t)
  8741. +{
  8742. + if (atomic_dec_and_test(&t->usage))
  8743. + call_rcu(&t->put_rcu, __put_task_struct_cb);
  8744. +}
  8745. +#else
  8746. extern void __put_task_struct(struct task_struct *t);
  8747. static inline void put_task_struct(struct task_struct *t)
  8748. @@ -2232,6 +2296,7 @@ static inline void put_task_struct(struct task_struct *t)
  8749. if (atomic_dec_and_test(&t->usage))
  8750. __put_task_struct(t);
  8751. }
  8752. +#endif
  8753. struct task_struct *task_rcu_dereference(struct task_struct **ptask);
  8754. struct task_struct *try_get_task_struct(struct task_struct **ptask);
  8755. @@ -2273,6 +2338,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
  8756. /*
  8757. * Per process flags
  8758. */
  8759. +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */
  8760. #define PF_EXITING 0x00000004 /* getting shut down */
  8761. #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
  8762. #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
  8763. @@ -2441,6 +2507,10 @@ extern void do_set_cpus_allowed(struct task_struct *p,
  8764. extern int set_cpus_allowed_ptr(struct task_struct *p,
  8765. const struct cpumask *new_mask);
  8766. +int migrate_me(void);
  8767. +void tell_sched_cpu_down_begin(int cpu);
  8768. +void tell_sched_cpu_down_done(int cpu);
  8769. +
  8770. #else
  8771. static inline void do_set_cpus_allowed(struct task_struct *p,
  8772. const struct cpumask *new_mask)
  8773. @@ -2453,6 +2523,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
  8774. return -EINVAL;
  8775. return 0;
  8776. }
  8777. +static inline int migrate_me(void) { return 0; }
  8778. +static inline void tell_sched_cpu_down_begin(int cpu) { }
  8779. +static inline void tell_sched_cpu_down_done(int cpu) { }
  8780. #endif
  8781. #ifdef CONFIG_NO_HZ_COMMON
  8782. @@ -2691,6 +2764,7 @@ extern void xtime_update(unsigned long ticks);
  8783. extern int wake_up_state(struct task_struct *tsk, unsigned int state);
  8784. extern int wake_up_process(struct task_struct *tsk);
  8785. +extern int wake_up_lock_sleeper(struct task_struct * tsk);
  8786. extern void wake_up_new_task(struct task_struct *tsk);
  8787. #ifdef CONFIG_SMP
  8788. extern void kick_process(struct task_struct *tsk);
  8789. @@ -2899,6 +2973,17 @@ static inline void mmdrop(struct mm_struct *mm)
  8790. __mmdrop(mm);
  8791. }
  8792. +#ifdef CONFIG_PREEMPT_RT_BASE
  8793. +extern void __mmdrop_delayed(struct rcu_head *rhp);
  8794. +static inline void mmdrop_delayed(struct mm_struct *mm)
  8795. +{
  8796. + if (atomic_dec_and_test(&mm->mm_count))
  8797. + call_rcu(&mm->delayed_drop, __mmdrop_delayed);
  8798. +}
  8799. +#else
  8800. +# define mmdrop_delayed(mm) mmdrop(mm)
  8801. +#endif
  8802. +
  8803. static inline void mmdrop_async_fn(struct work_struct *work)
  8804. {
  8805. struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
  8806. @@ -3291,6 +3376,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
  8807. return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
  8808. }
  8809. +#ifdef CONFIG_PREEMPT_LAZY
  8810. +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
  8811. +{
  8812. + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
  8813. +}
  8814. +
  8815. +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
  8816. +{
  8817. + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
  8818. +}
  8819. +
  8820. +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
  8821. +{
  8822. + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
  8823. +}
  8824. +
  8825. +static inline int need_resched_lazy(void)
  8826. +{
  8827. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  8828. +}
  8829. +
  8830. +static inline int need_resched_now(void)
  8831. +{
  8832. + return test_thread_flag(TIF_NEED_RESCHED);
  8833. +}
  8834. +
  8835. +#else
  8836. +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
  8837. +static inline int need_resched_lazy(void) { return 0; }
  8838. +
  8839. +static inline int need_resched_now(void)
  8840. +{
  8841. + return test_thread_flag(TIF_NEED_RESCHED);
  8842. +}
  8843. +
  8844. +#endif
  8845. +
  8846. static inline int restart_syscall(void)
  8847. {
  8848. set_tsk_thread_flag(current, TIF_SIGPENDING);
  8849. @@ -3322,6 +3444,51 @@ static inline int signal_pending_state(long state, struct task_struct *p)
  8850. return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
  8851. }
  8852. +static inline bool __task_is_stopped_or_traced(struct task_struct *task)
  8853. +{
  8854. + if (task->state & (__TASK_STOPPED | __TASK_TRACED))
  8855. + return true;
  8856. +#ifdef CONFIG_PREEMPT_RT_FULL
  8857. + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
  8858. + return true;
  8859. +#endif
  8860. + return false;
  8861. +}
  8862. +
  8863. +static inline bool task_is_stopped_or_traced(struct task_struct *task)
  8864. +{
  8865. + bool traced_stopped;
  8866. +
  8867. +#ifdef CONFIG_PREEMPT_RT_FULL
  8868. + unsigned long flags;
  8869. +
  8870. + raw_spin_lock_irqsave(&task->pi_lock, flags);
  8871. + traced_stopped = __task_is_stopped_or_traced(task);
  8872. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  8873. +#else
  8874. + traced_stopped = __task_is_stopped_or_traced(task);
  8875. +#endif
  8876. + return traced_stopped;
  8877. +}
  8878. +
  8879. +static inline bool task_is_traced(struct task_struct *task)
  8880. +{
  8881. + bool traced = false;
  8882. +
  8883. + if (task->state & __TASK_TRACED)
  8884. + return true;
  8885. +#ifdef CONFIG_PREEMPT_RT_FULL
  8886. + /* in case the task is sleeping on tasklist_lock */
  8887. + raw_spin_lock_irq(&task->pi_lock);
  8888. + if (task->state & __TASK_TRACED)
  8889. + traced = true;
  8890. + else if (task->saved_state & __TASK_TRACED)
  8891. + traced = true;
  8892. + raw_spin_unlock_irq(&task->pi_lock);
  8893. +#endif
  8894. + return traced;
  8895. +}
  8896. +
  8897. /*
  8898. * cond_resched() and cond_resched_lock(): latency reduction via
  8899. * explicit rescheduling in places that are safe. The return
  8900. @@ -3347,12 +3514,16 @@ extern int __cond_resched_lock(spinlock_t *lock);
  8901. __cond_resched_lock(lock); \
  8902. })
  8903. +#ifndef CONFIG_PREEMPT_RT_FULL
  8904. extern int __cond_resched_softirq(void);
  8905. #define cond_resched_softirq() ({ \
  8906. ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
  8907. __cond_resched_softirq(); \
  8908. })
  8909. +#else
  8910. +# define cond_resched_softirq() cond_resched()
  8911. +#endif
  8912. static inline void cond_resched_rcu(void)
  8913. {
  8914. @@ -3527,6 +3698,31 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
  8915. #endif /* CONFIG_SMP */
  8916. +static inline int __migrate_disabled(struct task_struct *p)
  8917. +{
  8918. +#ifdef CONFIG_PREEMPT_RT_FULL
  8919. + return p->migrate_disable;
  8920. +#else
  8921. + return 0;
  8922. +#endif
  8923. +}
  8924. +
  8925. +/* Future-safe accessor for struct task_struct's cpus_allowed. */
  8926. +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p)
  8927. +{
  8928. + if (__migrate_disabled(p))
  8929. + return cpumask_of(task_cpu(p));
  8930. +
  8931. + return &p->cpus_allowed;
  8932. +}
  8933. +
  8934. +static inline int tsk_nr_cpus_allowed(struct task_struct *p)
  8935. +{
  8936. + if (__migrate_disabled(p))
  8937. + return 1;
  8938. + return p->nr_cpus_allowed;
  8939. +}
  8940. +
  8941. extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
  8942. extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
  8943. diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
  8944. index a30b172df6e1..db3e91f2bc03 100644
  8945. --- a/include/linux/sched/rt.h
  8946. +++ b/include/linux/sched/rt.h
  8947. @@ -16,27 +16,20 @@ static inline int rt_task(struct task_struct *p)
  8948. }
  8949. #ifdef CONFIG_RT_MUTEXES
  8950. -extern int rt_mutex_getprio(struct task_struct *p);
  8951. -extern void rt_mutex_setprio(struct task_struct *p, int prio);
  8952. -extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio);
  8953. -extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task);
  8954. +/*
  8955. + * Must hold either p->pi_lock or task_rq(p)->lock.
  8956. + */
  8957. +static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p)
  8958. +{
  8959. + return p->pi_top_task;
  8960. +}
  8961. +extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task);
  8962. extern void rt_mutex_adjust_pi(struct task_struct *p);
  8963. static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
  8964. {
  8965. return tsk->pi_blocked_on != NULL;
  8966. }
  8967. #else
  8968. -static inline int rt_mutex_getprio(struct task_struct *p)
  8969. -{
  8970. - return p->normal_prio;
  8971. -}
  8972. -
  8973. -static inline int rt_mutex_get_effective_prio(struct task_struct *task,
  8974. - int newprio)
  8975. -{
  8976. - return newprio;
  8977. -}
  8978. -
  8979. static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
  8980. {
  8981. return NULL;
  8982. diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
  8983. index ead97654c4e9..3d7223ffdd3b 100644
  8984. --- a/include/linux/seqlock.h
  8985. +++ b/include/linux/seqlock.h
  8986. @@ -220,20 +220,30 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
  8987. return __read_seqcount_retry(s, start);
  8988. }
  8989. -
  8990. -
  8991. -static inline void raw_write_seqcount_begin(seqcount_t *s)
  8992. +static inline void __raw_write_seqcount_begin(seqcount_t *s)
  8993. {
  8994. s->sequence++;
  8995. smp_wmb();
  8996. }
  8997. -static inline void raw_write_seqcount_end(seqcount_t *s)
  8998. +static inline void raw_write_seqcount_begin(seqcount_t *s)
  8999. +{
  9000. + preempt_disable_rt();
  9001. + __raw_write_seqcount_begin(s);
  9002. +}
  9003. +
  9004. +static inline void __raw_write_seqcount_end(seqcount_t *s)
  9005. {
  9006. smp_wmb();
  9007. s->sequence++;
  9008. }
  9009. +static inline void raw_write_seqcount_end(seqcount_t *s)
  9010. +{
  9011. + __raw_write_seqcount_end(s);
  9012. + preempt_enable_rt();
  9013. +}
  9014. +
  9015. /**
  9016. * raw_write_seqcount_barrier - do a seq write barrier
  9017. * @s: pointer to seqcount_t
  9018. @@ -428,10 +438,32 @@ typedef struct {
  9019. /*
  9020. * Read side functions for starting and finalizing a read side section.
  9021. */
  9022. +#ifndef CONFIG_PREEMPT_RT_FULL
  9023. static inline unsigned read_seqbegin(const seqlock_t *sl)
  9024. {
  9025. return read_seqcount_begin(&sl->seqcount);
  9026. }
  9027. +#else
  9028. +/*
  9029. + * Starvation safe read side for RT
  9030. + */
  9031. +static inline unsigned read_seqbegin(seqlock_t *sl)
  9032. +{
  9033. + unsigned ret;
  9034. +
  9035. +repeat:
  9036. + ret = ACCESS_ONCE(sl->seqcount.sequence);
  9037. + if (unlikely(ret & 1)) {
  9038. + /*
  9039. + * Take the lock and let the writer proceed (i.e. evtl
  9040. + * boost it), otherwise we could loop here forever.
  9041. + */
  9042. + spin_unlock_wait(&sl->lock);
  9043. + goto repeat;
  9044. + }
  9045. + return ret;
  9046. +}
  9047. +#endif
  9048. static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
  9049. {
  9050. @@ -446,36 +478,45 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
  9051. static inline void write_seqlock(seqlock_t *sl)
  9052. {
  9053. spin_lock(&sl->lock);
  9054. - write_seqcount_begin(&sl->seqcount);
  9055. + __raw_write_seqcount_begin(&sl->seqcount);
  9056. +}
  9057. +
  9058. +static inline int try_write_seqlock(seqlock_t *sl)
  9059. +{
  9060. + if (spin_trylock(&sl->lock)) {
  9061. + __raw_write_seqcount_begin(&sl->seqcount);
  9062. + return 1;
  9063. + }
  9064. + return 0;
  9065. }
  9066. static inline void write_sequnlock(seqlock_t *sl)
  9067. {
  9068. - write_seqcount_end(&sl->seqcount);
  9069. + __raw_write_seqcount_end(&sl->seqcount);
  9070. spin_unlock(&sl->lock);
  9071. }
  9072. static inline void write_seqlock_bh(seqlock_t *sl)
  9073. {
  9074. spin_lock_bh(&sl->lock);
  9075. - write_seqcount_begin(&sl->seqcount);
  9076. + __raw_write_seqcount_begin(&sl->seqcount);
  9077. }
  9078. static inline void write_sequnlock_bh(seqlock_t *sl)
  9079. {
  9080. - write_seqcount_end(&sl->seqcount);
  9081. + __raw_write_seqcount_end(&sl->seqcount);
  9082. spin_unlock_bh(&sl->lock);
  9083. }
  9084. static inline void write_seqlock_irq(seqlock_t *sl)
  9085. {
  9086. spin_lock_irq(&sl->lock);
  9087. - write_seqcount_begin(&sl->seqcount);
  9088. + __raw_write_seqcount_begin(&sl->seqcount);
  9089. }
  9090. static inline void write_sequnlock_irq(seqlock_t *sl)
  9091. {
  9092. - write_seqcount_end(&sl->seqcount);
  9093. + __raw_write_seqcount_end(&sl->seqcount);
  9094. spin_unlock_irq(&sl->lock);
  9095. }
  9096. @@ -484,7 +525,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
  9097. unsigned long flags;
  9098. spin_lock_irqsave(&sl->lock, flags);
  9099. - write_seqcount_begin(&sl->seqcount);
  9100. + __raw_write_seqcount_begin(&sl->seqcount);
  9101. return flags;
  9102. }
  9103. @@ -494,7 +535,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
  9104. static inline void
  9105. write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
  9106. {
  9107. - write_seqcount_end(&sl->seqcount);
  9108. + __raw_write_seqcount_end(&sl->seqcount);
  9109. spin_unlock_irqrestore(&sl->lock, flags);
  9110. }
  9111. diff --git a/include/linux/signal.h b/include/linux/signal.h
  9112. index b63f63eaa39c..295540fdfc72 100644
  9113. --- a/include/linux/signal.h
  9114. +++ b/include/linux/signal.h
  9115. @@ -233,6 +233,7 @@ static inline void init_sigpending(struct sigpending *sig)
  9116. }
  9117. extern void flush_sigqueue(struct sigpending *queue);
  9118. +extern void flush_task_sigqueue(struct task_struct *tsk);
  9119. /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
  9120. static inline int valid_signal(unsigned long sig)
  9121. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
  9122. index 601dfa849d30..dca387a8fa6b 100644
  9123. --- a/include/linux/skbuff.h
  9124. +++ b/include/linux/skbuff.h
  9125. @@ -284,6 +284,7 @@ struct sk_buff_head {
  9126. __u32 qlen;
  9127. spinlock_t lock;
  9128. + raw_spinlock_t raw_lock;
  9129. };
  9130. struct sk_buff;
  9131. @@ -1573,6 +1574,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
  9132. __skb_queue_head_init(list);
  9133. }
  9134. +static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
  9135. +{
  9136. + raw_spin_lock_init(&list->raw_lock);
  9137. + __skb_queue_head_init(list);
  9138. +}
  9139. +
  9140. static inline void skb_queue_head_init_class(struct sk_buff_head *list,
  9141. struct lock_class_key *class)
  9142. {
  9143. diff --git a/include/linux/smp.h b/include/linux/smp.h
  9144. index 8e0cb7a0f836..891c533724f5 100644
  9145. --- a/include/linux/smp.h
  9146. +++ b/include/linux/smp.h
  9147. @@ -120,6 +120,13 @@ extern unsigned int setup_max_cpus;
  9148. extern void __init setup_nr_cpu_ids(void);
  9149. extern void __init smp_init(void);
  9150. +extern int __boot_cpu_id;
  9151. +
  9152. +static inline int get_boot_cpu_id(void)
  9153. +{
  9154. + return __boot_cpu_id;
  9155. +}
  9156. +
  9157. #else /* !SMP */
  9158. static inline void smp_send_stop(void) { }
  9159. @@ -158,6 +165,11 @@ static inline void smp_init(void) { up_late_init(); }
  9160. static inline void smp_init(void) { }
  9161. #endif
  9162. +static inline int get_boot_cpu_id(void)
  9163. +{
  9164. + return 0;
  9165. +}
  9166. +
  9167. #endif /* !SMP */
  9168. /*
  9169. @@ -185,6 +197,9 @@ static inline void smp_init(void) { }
  9170. #define get_cpu() ({ preempt_disable(); smp_processor_id(); })
  9171. #define put_cpu() preempt_enable()
  9172. +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
  9173. +#define put_cpu_light() migrate_enable()
  9174. +
  9175. /*
  9176. * Callback to arch code if there's nosmp or maxcpus=0 on the
  9177. * boot command line:
  9178. diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
  9179. index 47dd0cebd204..b241cc044bd3 100644
  9180. --- a/include/linux/spinlock.h
  9181. +++ b/include/linux/spinlock.h
  9182. @@ -271,7 +271,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
  9183. #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock))
  9184. /* Include rwlock functions */
  9185. -#include <linux/rwlock.h>
  9186. +#ifdef CONFIG_PREEMPT_RT_FULL
  9187. +# include <linux/rwlock_rt.h>
  9188. +#else
  9189. +# include <linux/rwlock.h>
  9190. +#endif
  9191. /*
  9192. * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
  9193. @@ -282,6 +286,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
  9194. # include <linux/spinlock_api_up.h>
  9195. #endif
  9196. +#ifdef CONFIG_PREEMPT_RT_FULL
  9197. +# include <linux/spinlock_rt.h>
  9198. +#else /* PREEMPT_RT_FULL */
  9199. +
  9200. /*
  9201. * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
  9202. */
  9203. @@ -416,4 +424,6 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
  9204. #define atomic_dec_and_lock(atomic, lock) \
  9205. __cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
  9206. +#endif /* !PREEMPT_RT_FULL */
  9207. +
  9208. #endif /* __LINUX_SPINLOCK_H */
  9209. diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
  9210. index 5344268e6e62..043263f30e81 100644
  9211. --- a/include/linux/spinlock_api_smp.h
  9212. +++ b/include/linux/spinlock_api_smp.h
  9213. @@ -189,6 +189,8 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock)
  9214. return 0;
  9215. }
  9216. -#include <linux/rwlock_api_smp.h>
  9217. +#ifndef CONFIG_PREEMPT_RT_FULL
  9218. +# include <linux/rwlock_api_smp.h>
  9219. +#endif
  9220. #endif /* __LINUX_SPINLOCK_API_SMP_H */
  9221. diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
  9222. new file mode 100644
  9223. index 000000000000..43ca841b913a
  9224. --- /dev/null
  9225. +++ b/include/linux/spinlock_rt.h
  9226. @@ -0,0 +1,162 @@
  9227. +#ifndef __LINUX_SPINLOCK_RT_H
  9228. +#define __LINUX_SPINLOCK_RT_H
  9229. +
  9230. +#ifndef __LINUX_SPINLOCK_H
  9231. +#error Do not include directly. Use spinlock.h
  9232. +#endif
  9233. +
  9234. +#include <linux/bug.h>
  9235. +
  9236. +extern void
  9237. +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key);
  9238. +
  9239. +#define spin_lock_init(slock) \
  9240. +do { \
  9241. + static struct lock_class_key __key; \
  9242. + \
  9243. + rt_mutex_init(&(slock)->lock); \
  9244. + __rt_spin_lock_init(slock, #slock, &__key); \
  9245. +} while (0)
  9246. +
  9247. +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock);
  9248. +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock);
  9249. +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock);
  9250. +
  9251. +extern void __lockfunc rt_spin_lock(spinlock_t *lock);
  9252. +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
  9253. +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
  9254. +extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
  9255. +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
  9256. +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
  9257. +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
  9258. +extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
  9259. +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock);
  9260. +
  9261. +/*
  9262. + * lockdep-less calls, for derived types like rwlock:
  9263. + * (for trylock they can use rt_mutex_trylock() directly.
  9264. + */
  9265. +extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock);
  9266. +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
  9267. +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
  9268. +
  9269. +#define spin_lock(lock) rt_spin_lock(lock)
  9270. +
  9271. +#define spin_lock_bh(lock) \
  9272. + do { \
  9273. + local_bh_disable(); \
  9274. + rt_spin_lock(lock); \
  9275. + } while (0)
  9276. +
  9277. +#define spin_lock_irq(lock) spin_lock(lock)
  9278. +
  9279. +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock))
  9280. +
  9281. +#define spin_trylock(lock) \
  9282. +({ \
  9283. + int __locked; \
  9284. + __locked = spin_do_trylock(lock); \
  9285. + __locked; \
  9286. +})
  9287. +
  9288. +#ifdef CONFIG_LOCKDEP
  9289. +# define spin_lock_nested(lock, subclass) \
  9290. + do { \
  9291. + rt_spin_lock_nested(lock, subclass); \
  9292. + } while (0)
  9293. +
  9294. +#define spin_lock_bh_nested(lock, subclass) \
  9295. + do { \
  9296. + local_bh_disable(); \
  9297. + rt_spin_lock_nested(lock, subclass); \
  9298. + } while (0)
  9299. +
  9300. +# define spin_lock_irqsave_nested(lock, flags, subclass) \
  9301. + do { \
  9302. + typecheck(unsigned long, flags); \
  9303. + flags = 0; \
  9304. + rt_spin_lock_nested(lock, subclass); \
  9305. + } while (0)
  9306. +#else
  9307. +# define spin_lock_nested(lock, subclass) spin_lock(lock)
  9308. +# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock)
  9309. +
  9310. +# define spin_lock_irqsave_nested(lock, flags, subclass) \
  9311. + do { \
  9312. + typecheck(unsigned long, flags); \
  9313. + flags = 0; \
  9314. + spin_lock(lock); \
  9315. + } while (0)
  9316. +#endif
  9317. +
  9318. +#define spin_lock_irqsave(lock, flags) \
  9319. + do { \
  9320. + typecheck(unsigned long, flags); \
  9321. + flags = 0; \
  9322. + spin_lock(lock); \
  9323. + } while (0)
  9324. +
  9325. +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock)
  9326. +{
  9327. + unsigned long flags = 0;
  9328. +#ifdef CONFIG_TRACE_IRQFLAGS
  9329. + flags = rt_spin_lock_trace_flags(lock);
  9330. +#else
  9331. + spin_lock(lock); /* lock_local */
  9332. +#endif
  9333. + return flags;
  9334. +}
  9335. +
  9336. +/* FIXME: we need rt_spin_lock_nest_lock */
  9337. +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0)
  9338. +
  9339. +#define spin_unlock(lock) rt_spin_unlock(lock)
  9340. +
  9341. +#define spin_unlock_bh(lock) \
  9342. + do { \
  9343. + rt_spin_unlock(lock); \
  9344. + local_bh_enable(); \
  9345. + } while (0)
  9346. +
  9347. +#define spin_unlock_irq(lock) spin_unlock(lock)
  9348. +
  9349. +#define spin_unlock_irqrestore(lock, flags) \
  9350. + do { \
  9351. + typecheck(unsigned long, flags); \
  9352. + (void) flags; \
  9353. + spin_unlock(lock); \
  9354. + } while (0)
  9355. +
  9356. +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock))
  9357. +#define spin_trylock_irq(lock) spin_trylock(lock)
  9358. +
  9359. +#define spin_trylock_irqsave(lock, flags) \
  9360. + rt_spin_trylock_irqsave(lock, &(flags))
  9361. +
  9362. +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock)
  9363. +
  9364. +#ifdef CONFIG_GENERIC_LOCKBREAK
  9365. +# define spin_is_contended(lock) ((lock)->break_lock)
  9366. +#else
  9367. +# define spin_is_contended(lock) (((void)(lock), 0))
  9368. +#endif
  9369. +
  9370. +static inline int spin_can_lock(spinlock_t *lock)
  9371. +{
  9372. + return !rt_mutex_is_locked(&lock->lock);
  9373. +}
  9374. +
  9375. +static inline int spin_is_locked(spinlock_t *lock)
  9376. +{
  9377. + return rt_mutex_is_locked(&lock->lock);
  9378. +}
  9379. +
  9380. +static inline void assert_spin_locked(spinlock_t *lock)
  9381. +{
  9382. + BUG_ON(!spin_is_locked(lock));
  9383. +}
  9384. +
  9385. +#define atomic_dec_and_lock(atomic, lock) \
  9386. + atomic_dec_and_spin_lock(atomic, lock)
  9387. +
  9388. +#endif
  9389. diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
  9390. index 73548eb13a5d..10bac715ea96 100644
  9391. --- a/include/linux/spinlock_types.h
  9392. +++ b/include/linux/spinlock_types.h
  9393. @@ -9,80 +9,15 @@
  9394. * Released under the General Public License (GPL).
  9395. */
  9396. -#if defined(CONFIG_SMP)
  9397. -# include <asm/spinlock_types.h>
  9398. -#else
  9399. -# include <linux/spinlock_types_up.h>
  9400. -#endif
  9401. -
  9402. -#include <linux/lockdep.h>
  9403. -
  9404. -typedef struct raw_spinlock {
  9405. - arch_spinlock_t raw_lock;
  9406. -#ifdef CONFIG_GENERIC_LOCKBREAK
  9407. - unsigned int break_lock;
  9408. -#endif
  9409. -#ifdef CONFIG_DEBUG_SPINLOCK
  9410. - unsigned int magic, owner_cpu;
  9411. - void *owner;
  9412. -#endif
  9413. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9414. - struct lockdep_map dep_map;
  9415. -#endif
  9416. -} raw_spinlock_t;
  9417. -
  9418. -#define SPINLOCK_MAGIC 0xdead4ead
  9419. -
  9420. -#define SPINLOCK_OWNER_INIT ((void *)-1L)
  9421. -
  9422. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9423. -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  9424. -#else
  9425. -# define SPIN_DEP_MAP_INIT(lockname)
  9426. -#endif
  9427. +#include <linux/spinlock_types_raw.h>
  9428. -#ifdef CONFIG_DEBUG_SPINLOCK
  9429. -# define SPIN_DEBUG_INIT(lockname) \
  9430. - .magic = SPINLOCK_MAGIC, \
  9431. - .owner_cpu = -1, \
  9432. - .owner = SPINLOCK_OWNER_INIT,
  9433. +#ifndef CONFIG_PREEMPT_RT_FULL
  9434. +# include <linux/spinlock_types_nort.h>
  9435. +# include <linux/rwlock_types.h>
  9436. #else
  9437. -# define SPIN_DEBUG_INIT(lockname)
  9438. +# include <linux/rtmutex.h>
  9439. +# include <linux/spinlock_types_rt.h>
  9440. +# include <linux/rwlock_types_rt.h>
  9441. #endif
  9442. -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
  9443. - { \
  9444. - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
  9445. - SPIN_DEBUG_INIT(lockname) \
  9446. - SPIN_DEP_MAP_INIT(lockname) }
  9447. -
  9448. -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
  9449. - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
  9450. -
  9451. -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
  9452. -
  9453. -typedef struct spinlock {
  9454. - union {
  9455. - struct raw_spinlock rlock;
  9456. -
  9457. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9458. -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
  9459. - struct {
  9460. - u8 __padding[LOCK_PADSIZE];
  9461. - struct lockdep_map dep_map;
  9462. - };
  9463. -#endif
  9464. - };
  9465. -} spinlock_t;
  9466. -
  9467. -#define __SPIN_LOCK_INITIALIZER(lockname) \
  9468. - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
  9469. -
  9470. -#define __SPIN_LOCK_UNLOCKED(lockname) \
  9471. - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
  9472. -
  9473. -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
  9474. -
  9475. -#include <linux/rwlock_types.h>
  9476. -
  9477. #endif /* __LINUX_SPINLOCK_TYPES_H */
  9478. diff --git a/include/linux/spinlock_types_nort.h b/include/linux/spinlock_types_nort.h
  9479. new file mode 100644
  9480. index 000000000000..f1dac1fb1d6a
  9481. --- /dev/null
  9482. +++ b/include/linux/spinlock_types_nort.h
  9483. @@ -0,0 +1,33 @@
  9484. +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H
  9485. +#define __LINUX_SPINLOCK_TYPES_NORT_H
  9486. +
  9487. +#ifndef __LINUX_SPINLOCK_TYPES_H
  9488. +#error "Do not include directly. Include spinlock_types.h instead"
  9489. +#endif
  9490. +
  9491. +/*
  9492. + * The non RT version maps spinlocks to raw_spinlocks
  9493. + */
  9494. +typedef struct spinlock {
  9495. + union {
  9496. + struct raw_spinlock rlock;
  9497. +
  9498. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9499. +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
  9500. + struct {
  9501. + u8 __padding[LOCK_PADSIZE];
  9502. + struct lockdep_map dep_map;
  9503. + };
  9504. +#endif
  9505. + };
  9506. +} spinlock_t;
  9507. +
  9508. +#define __SPIN_LOCK_INITIALIZER(lockname) \
  9509. + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
  9510. +
  9511. +#define __SPIN_LOCK_UNLOCKED(lockname) \
  9512. + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
  9513. +
  9514. +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
  9515. +
  9516. +#endif
  9517. diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h
  9518. new file mode 100644
  9519. index 000000000000..edffc4d53fc9
  9520. --- /dev/null
  9521. +++ b/include/linux/spinlock_types_raw.h
  9522. @@ -0,0 +1,56 @@
  9523. +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  9524. +#define __LINUX_SPINLOCK_TYPES_RAW_H
  9525. +
  9526. +#if defined(CONFIG_SMP)
  9527. +# include <asm/spinlock_types.h>
  9528. +#else
  9529. +# include <linux/spinlock_types_up.h>
  9530. +#endif
  9531. +
  9532. +#include <linux/lockdep.h>
  9533. +
  9534. +typedef struct raw_spinlock {
  9535. + arch_spinlock_t raw_lock;
  9536. +#ifdef CONFIG_GENERIC_LOCKBREAK
  9537. + unsigned int break_lock;
  9538. +#endif
  9539. +#ifdef CONFIG_DEBUG_SPINLOCK
  9540. + unsigned int magic, owner_cpu;
  9541. + void *owner;
  9542. +#endif
  9543. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9544. + struct lockdep_map dep_map;
  9545. +#endif
  9546. +} raw_spinlock_t;
  9547. +
  9548. +#define SPINLOCK_MAGIC 0xdead4ead
  9549. +
  9550. +#define SPINLOCK_OWNER_INIT ((void *)-1L)
  9551. +
  9552. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9553. +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  9554. +#else
  9555. +# define SPIN_DEP_MAP_INIT(lockname)
  9556. +#endif
  9557. +
  9558. +#ifdef CONFIG_DEBUG_SPINLOCK
  9559. +# define SPIN_DEBUG_INIT(lockname) \
  9560. + .magic = SPINLOCK_MAGIC, \
  9561. + .owner_cpu = -1, \
  9562. + .owner = SPINLOCK_OWNER_INIT,
  9563. +#else
  9564. +# define SPIN_DEBUG_INIT(lockname)
  9565. +#endif
  9566. +
  9567. +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
  9568. + { \
  9569. + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
  9570. + SPIN_DEBUG_INIT(lockname) \
  9571. + SPIN_DEP_MAP_INIT(lockname) }
  9572. +
  9573. +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
  9574. + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
  9575. +
  9576. +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
  9577. +
  9578. +#endif
  9579. diff --git a/include/linux/spinlock_types_rt.h b/include/linux/spinlock_types_rt.h
  9580. new file mode 100644
  9581. index 000000000000..3e3d8c5f7a9a
  9582. --- /dev/null
  9583. +++ b/include/linux/spinlock_types_rt.h
  9584. @@ -0,0 +1,48 @@
  9585. +#ifndef __LINUX_SPINLOCK_TYPES_RT_H
  9586. +#define __LINUX_SPINLOCK_TYPES_RT_H
  9587. +
  9588. +#ifndef __LINUX_SPINLOCK_TYPES_H
  9589. +#error "Do not include directly. Include spinlock_types.h instead"
  9590. +#endif
  9591. +
  9592. +#include <linux/cache.h>
  9593. +
  9594. +/*
  9595. + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field:
  9596. + */
  9597. +typedef struct spinlock {
  9598. + struct rt_mutex lock;
  9599. + unsigned int break_lock;
  9600. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9601. + struct lockdep_map dep_map;
  9602. +#endif
  9603. +} spinlock_t;
  9604. +
  9605. +#ifdef CONFIG_DEBUG_RT_MUTEXES
  9606. +# define __RT_SPIN_INITIALIZER(name) \
  9607. + { \
  9608. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
  9609. + .save_state = 1, \
  9610. + .file = __FILE__, \
  9611. + .line = __LINE__ , \
  9612. + }
  9613. +#else
  9614. +# define __RT_SPIN_INITIALIZER(name) \
  9615. + { \
  9616. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
  9617. + .save_state = 1, \
  9618. + }
  9619. +#endif
  9620. +
  9621. +/*
  9622. +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock)
  9623. +*/
  9624. +
  9625. +#define __SPIN_LOCK_UNLOCKED(name) \
  9626. + { .lock = __RT_SPIN_INITIALIZER(name.lock), \
  9627. + SPIN_DEP_MAP_INIT(name) }
  9628. +
  9629. +#define DEFINE_SPINLOCK(name) \
  9630. + spinlock_t name = __SPIN_LOCK_UNLOCKED(name)
  9631. +
  9632. +#endif
  9633. diff --git a/include/linux/srcu.h b/include/linux/srcu.h
  9634. index dc8eb63c6568..e793d3a257da 100644
  9635. --- a/include/linux/srcu.h
  9636. +++ b/include/linux/srcu.h
  9637. @@ -84,10 +84,10 @@ int init_srcu_struct(struct srcu_struct *sp);
  9638. void process_srcu(struct work_struct *work);
  9639. -#define __SRCU_STRUCT_INIT(name) \
  9640. +#define __SRCU_STRUCT_INIT(name, pcpu_name) \
  9641. { \
  9642. .completed = -300, \
  9643. - .per_cpu_ref = &name##_srcu_array, \
  9644. + .per_cpu_ref = &pcpu_name, \
  9645. .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \
  9646. .running = false, \
  9647. .batch_queue = RCU_BATCH_INIT(name.batch_queue), \
  9648. @@ -119,7 +119,7 @@ void process_srcu(struct work_struct *work);
  9649. */
  9650. #define __DEFINE_SRCU(name, is_static) \
  9651. static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
  9652. - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
  9653. + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array)
  9654. #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
  9655. #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
  9656. diff --git a/include/linux/suspend.h b/include/linux/suspend.h
  9657. index d9718378a8be..e81e6dc7dcb1 100644
  9658. --- a/include/linux/suspend.h
  9659. +++ b/include/linux/suspend.h
  9660. @@ -193,6 +193,12 @@ struct platform_freeze_ops {
  9661. void (*end)(void);
  9662. };
  9663. +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
  9664. +extern bool pm_in_action;
  9665. +#else
  9666. +# define pm_in_action false
  9667. +#endif
  9668. +
  9669. #ifdef CONFIG_SUSPEND
  9670. /**
  9671. * suspend_set_ops - set platform dependent suspend operations
  9672. diff --git a/include/linux/swait.h b/include/linux/swait.h
  9673. index c1f9c62a8a50..83f004a72320 100644
  9674. --- a/include/linux/swait.h
  9675. +++ b/include/linux/swait.h
  9676. @@ -87,6 +87,7 @@ static inline int swait_active(struct swait_queue_head *q)
  9677. extern void swake_up(struct swait_queue_head *q);
  9678. extern void swake_up_all(struct swait_queue_head *q);
  9679. extern void swake_up_locked(struct swait_queue_head *q);
  9680. +extern void swake_up_all_locked(struct swait_queue_head *q);
  9681. extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
  9682. extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
  9683. diff --git a/include/linux/swap.h b/include/linux/swap.h
  9684. index 55ff5593c193..52bf5477dc92 100644
  9685. --- a/include/linux/swap.h
  9686. +++ b/include/linux/swap.h
  9687. @@ -11,6 +11,7 @@
  9688. #include <linux/fs.h>
  9689. #include <linux/atomic.h>
  9690. #include <linux/page-flags.h>
  9691. +#include <linux/locallock.h>
  9692. #include <asm/page.h>
  9693. struct notifier_block;
  9694. @@ -247,7 +248,8 @@ struct swap_info_struct {
  9695. void *workingset_eviction(struct address_space *mapping, struct page *page);
  9696. bool workingset_refault(void *shadow);
  9697. void workingset_activation(struct page *page);
  9698. -extern struct list_lru workingset_shadow_nodes;
  9699. +extern struct list_lru __workingset_shadow_nodes;
  9700. +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
  9701. static inline unsigned int workingset_node_pages(struct radix_tree_node *node)
  9702. {
  9703. @@ -292,6 +294,7 @@ extern unsigned long nr_free_pagecache_pages(void);
  9704. /* linux/mm/swap.c */
  9705. +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock);
  9706. extern void lru_cache_add(struct page *);
  9707. extern void lru_cache_add_anon(struct page *page);
  9708. extern void lru_cache_add_file(struct page *page);
  9709. diff --git a/include/linux/swork.h b/include/linux/swork.h
  9710. new file mode 100644
  9711. index 000000000000..f175fa9a6016
  9712. --- /dev/null
  9713. +++ b/include/linux/swork.h
  9714. @@ -0,0 +1,24 @@
  9715. +#ifndef _LINUX_SWORK_H
  9716. +#define _LINUX_SWORK_H
  9717. +
  9718. +#include <linux/list.h>
  9719. +
  9720. +struct swork_event {
  9721. + struct list_head item;
  9722. + unsigned long flags;
  9723. + void (*func)(struct swork_event *);
  9724. +};
  9725. +
  9726. +static inline void INIT_SWORK(struct swork_event *event,
  9727. + void (*func)(struct swork_event *))
  9728. +{
  9729. + event->flags = 0;
  9730. + event->func = func;
  9731. +}
  9732. +
  9733. +bool swork_queue(struct swork_event *sev);
  9734. +
  9735. +int swork_get(void);
  9736. +void swork_put(void);
  9737. +
  9738. +#endif /* _LINUX_SWORK_H */
  9739. diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
  9740. index 2873baf5372a..eb1a108f17ca 100644
  9741. --- a/include/linux/thread_info.h
  9742. +++ b/include/linux/thread_info.h
  9743. @@ -107,7 +107,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
  9744. #define test_thread_flag(flag) \
  9745. test_ti_thread_flag(current_thread_info(), flag)
  9746. -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
  9747. +#ifdef CONFIG_PREEMPT_LAZY
  9748. +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \
  9749. + test_thread_flag(TIF_NEED_RESCHED_LAZY))
  9750. +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
  9751. +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY))
  9752. +
  9753. +#else
  9754. +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
  9755. +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
  9756. +#define tif_need_resched_lazy() 0
  9757. +#endif
  9758. #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
  9759. static inline int arch_within_stack_frames(const void * const stack,
  9760. diff --git a/include/linux/timer.h b/include/linux/timer.h
  9761. index ec86e4e55ea3..8e5b680d1275 100644
  9762. --- a/include/linux/timer.h
  9763. +++ b/include/linux/timer.h
  9764. @@ -241,7 +241,7 @@ extern void add_timer(struct timer_list *timer);
  9765. extern int try_to_del_timer_sync(struct timer_list *timer);
  9766. -#ifdef CONFIG_SMP
  9767. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  9768. extern int del_timer_sync(struct timer_list *timer);
  9769. #else
  9770. # define del_timer_sync(t) del_timer(t)
  9771. diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
  9772. index ba57266d9e80..5c36934ec2bc 100644
  9773. --- a/include/linux/trace_events.h
  9774. +++ b/include/linux/trace_events.h
  9775. @@ -56,6 +56,9 @@ struct trace_entry {
  9776. unsigned char flags;
  9777. unsigned char preempt_count;
  9778. int pid;
  9779. + unsigned short migrate_disable;
  9780. + unsigned short padding;
  9781. + unsigned char preempt_lazy_count;
  9782. };
  9783. #define TRACE_EVENT_TYPE_MAX \
  9784. diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
  9785. index f30c187ed785..83bf0f798426 100644
  9786. --- a/include/linux/uaccess.h
  9787. +++ b/include/linux/uaccess.h
  9788. @@ -24,6 +24,7 @@ static __always_inline void pagefault_disabled_dec(void)
  9789. */
  9790. static inline void pagefault_disable(void)
  9791. {
  9792. + migrate_disable();
  9793. pagefault_disabled_inc();
  9794. /*
  9795. * make sure to have issued the store before a pagefault
  9796. @@ -40,6 +41,7 @@ static inline void pagefault_enable(void)
  9797. */
  9798. barrier();
  9799. pagefault_disabled_dec();
  9800. + migrate_enable();
  9801. }
  9802. /*
  9803. diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
  9804. index 4a29c75b146e..0a294e950df8 100644
  9805. --- a/include/linux/uprobes.h
  9806. +++ b/include/linux/uprobes.h
  9807. @@ -27,6 +27,7 @@
  9808. #include <linux/errno.h>
  9809. #include <linux/rbtree.h>
  9810. #include <linux/types.h>
  9811. +#include <linux/wait.h>
  9812. struct vm_area_struct;
  9813. struct mm_struct;
  9814. diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
  9815. index 613771909b6e..e28c5a43229d 100644
  9816. --- a/include/linux/vmstat.h
  9817. +++ b/include/linux/vmstat.h
  9818. @@ -33,7 +33,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
  9819. */
  9820. static inline void __count_vm_event(enum vm_event_item item)
  9821. {
  9822. + preempt_disable_rt();
  9823. raw_cpu_inc(vm_event_states.event[item]);
  9824. + preempt_enable_rt();
  9825. }
  9826. static inline void count_vm_event(enum vm_event_item item)
  9827. @@ -43,7 +45,9 @@ static inline void count_vm_event(enum vm_event_item item)
  9828. static inline void __count_vm_events(enum vm_event_item item, long delta)
  9829. {
  9830. + preempt_disable_rt();
  9831. raw_cpu_add(vm_event_states.event[item], delta);
  9832. + preempt_enable_rt();
  9833. }
  9834. static inline void count_vm_events(enum vm_event_item item, long delta)
  9835. diff --git a/include/linux/wait.h b/include/linux/wait.h
  9836. index 2408e8d5c05c..db50d6609195 100644
  9837. --- a/include/linux/wait.h
  9838. +++ b/include/linux/wait.h
  9839. @@ -8,6 +8,7 @@
  9840. #include <linux/spinlock.h>
  9841. #include <asm/current.h>
  9842. #include <uapi/linux/wait.h>
  9843. +#include <linux/atomic.h>
  9844. typedef struct __wait_queue wait_queue_t;
  9845. typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
  9846. diff --git a/include/net/dst.h b/include/net/dst.h
  9847. index ddcff17615da..a1fc787b1a8c 100644
  9848. --- a/include/net/dst.h
  9849. +++ b/include/net/dst.h
  9850. @@ -452,7 +452,7 @@ static inline void dst_confirm(struct dst_entry *dst)
  9851. static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
  9852. struct sk_buff *skb)
  9853. {
  9854. - const struct hh_cache *hh;
  9855. + struct hh_cache *hh;
  9856. if (dst->pending_confirm) {
  9857. unsigned long now = jiffies;
  9858. diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
  9859. index 231e121cc7d9..d125222b979d 100644
  9860. --- a/include/net/gen_stats.h
  9861. +++ b/include/net/gen_stats.h
  9862. @@ -5,6 +5,7 @@
  9863. #include <linux/socket.h>
  9864. #include <linux/rtnetlink.h>
  9865. #include <linux/pkt_sched.h>
  9866. +#include <net/net_seq_lock.h>
  9867. struct gnet_stats_basic_cpu {
  9868. struct gnet_stats_basic_packed bstats;
  9869. @@ -33,11 +34,11 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
  9870. spinlock_t *lock, struct gnet_dump *d,
  9871. int padattr);
  9872. -int gnet_stats_copy_basic(const seqcount_t *running,
  9873. +int gnet_stats_copy_basic(net_seqlock_t *running,
  9874. struct gnet_dump *d,
  9875. struct gnet_stats_basic_cpu __percpu *cpu,
  9876. struct gnet_stats_basic_packed *b);
  9877. -void __gnet_stats_copy_basic(const seqcount_t *running,
  9878. +void __gnet_stats_copy_basic(net_seqlock_t *running,
  9879. struct gnet_stats_basic_packed *bstats,
  9880. struct gnet_stats_basic_cpu __percpu *cpu,
  9881. struct gnet_stats_basic_packed *b);
  9882. @@ -55,14 +56,14 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
  9883. struct gnet_stats_basic_cpu __percpu *cpu_bstats,
  9884. struct gnet_stats_rate_est64 *rate_est,
  9885. spinlock_t *stats_lock,
  9886. - seqcount_t *running, struct nlattr *opt);
  9887. + net_seqlock_t *running, struct nlattr *opt);
  9888. void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
  9889. struct gnet_stats_rate_est64 *rate_est);
  9890. int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
  9891. struct gnet_stats_basic_cpu __percpu *cpu_bstats,
  9892. struct gnet_stats_rate_est64 *rate_est,
  9893. spinlock_t *stats_lock,
  9894. - seqcount_t *running, struct nlattr *opt);
  9895. + net_seqlock_t *running, struct nlattr *opt);
  9896. bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
  9897. const struct gnet_stats_rate_est64 *rate_est);
  9898. #endif
  9899. diff --git a/include/net/neighbour.h b/include/net/neighbour.h
  9900. index 8b683841e574..bf656008f6e7 100644
  9901. --- a/include/net/neighbour.h
  9902. +++ b/include/net/neighbour.h
  9903. @@ -446,7 +446,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
  9904. }
  9905. #endif
  9906. -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
  9907. +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
  9908. {
  9909. unsigned int seq;
  9910. int hh_len;
  9911. @@ -501,7 +501,7 @@ struct neighbour_cb {
  9912. #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
  9913. -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
  9914. +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n,
  9915. const struct net_device *dev)
  9916. {
  9917. unsigned int seq;
  9918. diff --git a/include/net/net_seq_lock.h b/include/net/net_seq_lock.h
  9919. new file mode 100644
  9920. index 000000000000..a7034298a82a
  9921. --- /dev/null
  9922. +++ b/include/net/net_seq_lock.h
  9923. @@ -0,0 +1,15 @@
  9924. +#ifndef __NET_NET_SEQ_LOCK_H__
  9925. +#define __NET_NET_SEQ_LOCK_H__
  9926. +
  9927. +#ifdef CONFIG_PREEMPT_RT_BASE
  9928. +# define net_seqlock_t seqlock_t
  9929. +# define net_seq_begin(__r) read_seqbegin(__r)
  9930. +# define net_seq_retry(__r, __s) read_seqretry(__r, __s)
  9931. +
  9932. +#else
  9933. +# define net_seqlock_t seqcount_t
  9934. +# define net_seq_begin(__r) read_seqcount_begin(__r)
  9935. +# define net_seq_retry(__r, __s) read_seqcount_retry(__r, __s)
  9936. +#endif
  9937. +
  9938. +#endif
  9939. diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
  9940. index 7adf4386ac8f..d3fd5c357268 100644
  9941. --- a/include/net/netns/ipv4.h
  9942. +++ b/include/net/netns/ipv4.h
  9943. @@ -69,6 +69,7 @@ struct netns_ipv4 {
  9944. int sysctl_icmp_echo_ignore_all;
  9945. int sysctl_icmp_echo_ignore_broadcasts;
  9946. + int sysctl_icmp_echo_sysrq;
  9947. int sysctl_icmp_ignore_bogus_error_responses;
  9948. int sysctl_icmp_ratelimit;
  9949. int sysctl_icmp_ratemask;
  9950. diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
  9951. index f18fc1a0321f..5d2c9b89c168 100644
  9952. --- a/include/net/sch_generic.h
  9953. +++ b/include/net/sch_generic.h
  9954. @@ -10,6 +10,7 @@
  9955. #include <linux/dynamic_queue_limits.h>
  9956. #include <net/gen_stats.h>
  9957. #include <net/rtnetlink.h>
  9958. +#include <net/net_seq_lock.h>
  9959. struct Qdisc_ops;
  9960. struct qdisc_walker;
  9961. @@ -86,7 +87,7 @@ struct Qdisc {
  9962. struct sk_buff *gso_skb ____cacheline_aligned_in_smp;
  9963. struct qdisc_skb_head q;
  9964. struct gnet_stats_basic_packed bstats;
  9965. - seqcount_t running;
  9966. + net_seqlock_t running;
  9967. struct gnet_stats_queue qstats;
  9968. unsigned long state;
  9969. struct Qdisc *next_sched;
  9970. @@ -98,13 +99,22 @@ struct Qdisc {
  9971. spinlock_t busylock ____cacheline_aligned_in_smp;
  9972. };
  9973. -static inline bool qdisc_is_running(const struct Qdisc *qdisc)
  9974. +static inline bool qdisc_is_running(struct Qdisc *qdisc)
  9975. {
  9976. +#ifdef CONFIG_PREEMPT_RT_BASE
  9977. + return spin_is_locked(&qdisc->running.lock) ? true : false;
  9978. +#else
  9979. return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
  9980. +#endif
  9981. }
  9982. static inline bool qdisc_run_begin(struct Qdisc *qdisc)
  9983. {
  9984. +#ifdef CONFIG_PREEMPT_RT_BASE
  9985. + if (try_write_seqlock(&qdisc->running))
  9986. + return true;
  9987. + return false;
  9988. +#else
  9989. if (qdisc_is_running(qdisc))
  9990. return false;
  9991. /* Variant of write_seqcount_begin() telling lockdep a trylock
  9992. @@ -113,11 +123,16 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
  9993. raw_write_seqcount_begin(&qdisc->running);
  9994. seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
  9995. return true;
  9996. +#endif
  9997. }
  9998. static inline void qdisc_run_end(struct Qdisc *qdisc)
  9999. {
  10000. +#ifdef CONFIG_PREEMPT_RT_BASE
  10001. + write_sequnlock(&qdisc->running);
  10002. +#else
  10003. write_seqcount_end(&qdisc->running);
  10004. +#endif
  10005. }
  10006. static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
  10007. @@ -308,7 +323,7 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
  10008. return qdisc_lock(root);
  10009. }
  10010. -static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
  10011. +static inline net_seqlock_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
  10012. {
  10013. struct Qdisc *root = qdisc_root_sleeping(qdisc);
  10014. diff --git a/include/trace/events/hist.h b/include/trace/events/hist.h
  10015. new file mode 100644
  10016. index 000000000000..f7710de1b1f3
  10017. --- /dev/null
  10018. +++ b/include/trace/events/hist.h
  10019. @@ -0,0 +1,73 @@
  10020. +#undef TRACE_SYSTEM
  10021. +#define TRACE_SYSTEM hist
  10022. +
  10023. +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ)
  10024. +#define _TRACE_HIST_H
  10025. +
  10026. +#include "latency_hist.h"
  10027. +#include <linux/tracepoint.h>
  10028. +
  10029. +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST)
  10030. +#define trace_preemptirqsoff_hist(a, b)
  10031. +#define trace_preemptirqsoff_hist_rcuidle(a, b)
  10032. +#else
  10033. +TRACE_EVENT(preemptirqsoff_hist,
  10034. +
  10035. + TP_PROTO(int reason, int starthist),
  10036. +
  10037. + TP_ARGS(reason, starthist),
  10038. +
  10039. + TP_STRUCT__entry(
  10040. + __field(int, reason)
  10041. + __field(int, starthist)
  10042. + ),
  10043. +
  10044. + TP_fast_assign(
  10045. + __entry->reason = reason;
  10046. + __entry->starthist = starthist;
  10047. + ),
  10048. +
  10049. + TP_printk("reason=%s starthist=%s", getaction(__entry->reason),
  10050. + __entry->starthist ? "start" : "stop")
  10051. +);
  10052. +#endif
  10053. +
  10054. +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST
  10055. +#define trace_hrtimer_interrupt(a, b, c, d)
  10056. +#else
  10057. +TRACE_EVENT(hrtimer_interrupt,
  10058. +
  10059. + TP_PROTO(int cpu, long long offset, struct task_struct *curr,
  10060. + struct task_struct *task),
  10061. +
  10062. + TP_ARGS(cpu, offset, curr, task),
  10063. +
  10064. + TP_STRUCT__entry(
  10065. + __field(int, cpu)
  10066. + __field(long long, offset)
  10067. + __array(char, ccomm, TASK_COMM_LEN)
  10068. + __field(int, cprio)
  10069. + __array(char, tcomm, TASK_COMM_LEN)
  10070. + __field(int, tprio)
  10071. + ),
  10072. +
  10073. + TP_fast_assign(
  10074. + __entry->cpu = cpu;
  10075. + __entry->offset = offset;
  10076. + memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN);
  10077. + __entry->cprio = curr->prio;
  10078. + memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>",
  10079. + task != NULL ? TASK_COMM_LEN : 7);
  10080. + __entry->tprio = task != NULL ? task->prio : -1;
  10081. + ),
  10082. +
  10083. + TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]",
  10084. + __entry->cpu, __entry->offset, __entry->ccomm,
  10085. + __entry->cprio, __entry->tcomm, __entry->tprio)
  10086. +);
  10087. +#endif
  10088. +
  10089. +#endif /* _TRACE_HIST_H */
  10090. +
  10091. +/* This part must be outside protection */
  10092. +#include <trace/define_trace.h>
  10093. diff --git a/include/trace/events/latency_hist.h b/include/trace/events/latency_hist.h
  10094. new file mode 100644
  10095. index 000000000000..d3f2fbd560b1
  10096. --- /dev/null
  10097. +++ b/include/trace/events/latency_hist.h
  10098. @@ -0,0 +1,29 @@
  10099. +#ifndef _LATENCY_HIST_H
  10100. +#define _LATENCY_HIST_H
  10101. +
  10102. +enum hist_action {
  10103. + IRQS_ON,
  10104. + PREEMPT_ON,
  10105. + TRACE_STOP,
  10106. + IRQS_OFF,
  10107. + PREEMPT_OFF,
  10108. + TRACE_START,
  10109. +};
  10110. +
  10111. +static char *actions[] = {
  10112. + "IRQS_ON",
  10113. + "PREEMPT_ON",
  10114. + "TRACE_STOP",
  10115. + "IRQS_OFF",
  10116. + "PREEMPT_OFF",
  10117. + "TRACE_START",
  10118. +};
  10119. +
  10120. +static inline char *getaction(int action)
  10121. +{
  10122. + if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0]))
  10123. + return actions[action];
  10124. + return "unknown";
  10125. +}
  10126. +
  10127. +#endif /* _LATENCY_HIST_H */
  10128. diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
  10129. index 9b90c57517a9..516ae88cddf4 100644
  10130. --- a/include/trace/events/sched.h
  10131. +++ b/include/trace/events/sched.h
  10132. @@ -70,7 +70,7 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
  10133. TP_fast_assign(
  10134. memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
  10135. __entry->pid = p->pid;
  10136. - __entry->prio = p->prio;
  10137. + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */
  10138. __entry->success = 1; /* rudiment, kill when possible */
  10139. __entry->target_cpu = task_cpu(p);
  10140. ),
  10141. @@ -147,6 +147,7 @@ TRACE_EVENT(sched_switch,
  10142. memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
  10143. __entry->next_pid = next->pid;
  10144. __entry->next_prio = next->prio;
  10145. + /* XXX SCHED_DEADLINE */
  10146. ),
  10147. TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
  10148. @@ -181,7 +182,7 @@ TRACE_EVENT(sched_migrate_task,
  10149. TP_fast_assign(
  10150. memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
  10151. __entry->pid = p->pid;
  10152. - __entry->prio = p->prio;
  10153. + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */
  10154. __entry->orig_cpu = task_cpu(p);
  10155. __entry->dest_cpu = dest_cpu;
  10156. ),
  10157. @@ -206,7 +207,7 @@ DECLARE_EVENT_CLASS(sched_process_template,
  10158. TP_fast_assign(
  10159. memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
  10160. __entry->pid = p->pid;
  10161. - __entry->prio = p->prio;
  10162. + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */
  10163. ),
  10164. TP_printk("comm=%s pid=%d prio=%d",
  10165. @@ -253,7 +254,7 @@ TRACE_EVENT(sched_process_wait,
  10166. TP_fast_assign(
  10167. memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
  10168. __entry->pid = pid_nr(pid);
  10169. - __entry->prio = current->prio;
  10170. + __entry->prio = current->prio; /* XXX SCHED_DEADLINE */
  10171. ),
  10172. TP_printk("comm=%s pid=%d prio=%d",
  10173. @@ -413,9 +414,9 @@ DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
  10174. */
  10175. TRACE_EVENT(sched_pi_setprio,
  10176. - TP_PROTO(struct task_struct *tsk, int newprio),
  10177. + TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
  10178. - TP_ARGS(tsk, newprio),
  10179. + TP_ARGS(tsk, pi_task),
  10180. TP_STRUCT__entry(
  10181. __array( char, comm, TASK_COMM_LEN )
  10182. @@ -428,7 +429,8 @@ TRACE_EVENT(sched_pi_setprio,
  10183. memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
  10184. __entry->pid = tsk->pid;
  10185. __entry->oldprio = tsk->prio;
  10186. - __entry->newprio = newprio;
  10187. + __entry->newprio = pi_task ? pi_task->prio : tsk->prio;
  10188. + /* XXX SCHED_DEADLINE bits missing */
  10189. ),
  10190. TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
  10191. diff --git a/init/Kconfig b/init/Kconfig
  10192. index 34407f15e6d3..2ce33a32e65d 100644
  10193. --- a/init/Kconfig
  10194. +++ b/init/Kconfig
  10195. @@ -506,7 +506,7 @@ config TINY_RCU
  10196. config RCU_EXPERT
  10197. bool "Make expert-level adjustments to RCU configuration"
  10198. - default n
  10199. + default y if PREEMPT_RT_FULL
  10200. help
  10201. This option needs to be enabled if you wish to make
  10202. expert-level adjustments to RCU configuration. By default,
  10203. @@ -623,7 +623,7 @@ config RCU_FANOUT_LEAF
  10204. config RCU_FAST_NO_HZ
  10205. bool "Accelerate last non-dyntick-idle CPU's grace periods"
  10206. - depends on NO_HZ_COMMON && SMP && RCU_EXPERT
  10207. + depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL
  10208. default n
  10209. help
  10210. This option permits CPUs to enter dynticks-idle state even if
  10211. @@ -650,7 +650,7 @@ config TREE_RCU_TRACE
  10212. config RCU_BOOST
  10213. bool "Enable RCU priority boosting"
  10214. depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
  10215. - default n
  10216. + default y if PREEMPT_RT_FULL
  10217. help
  10218. This option boosts the priority of preempted RCU readers that
  10219. block the current preemptible RCU grace period for too long.
  10220. @@ -781,19 +781,6 @@ config RCU_NOCB_CPU_ALL
  10221. endchoice
  10222. -config RCU_EXPEDITE_BOOT
  10223. - bool
  10224. - default n
  10225. - help
  10226. - This option enables expedited grace periods at boot time,
  10227. - as if rcu_expedite_gp() had been invoked early in boot.
  10228. - The corresponding rcu_unexpedite_gp() is invoked from
  10229. - rcu_end_inkernel_boot(), which is intended to be invoked
  10230. - at the end of the kernel-only boot sequence, just before
  10231. - init is exec'ed.
  10232. -
  10233. - Accept the default if unsure.
  10234. -
  10235. endmenu # "RCU Subsystem"
  10236. config BUILD_BIN2C
  10237. @@ -1064,6 +1051,7 @@ config CFS_BANDWIDTH
  10238. config RT_GROUP_SCHED
  10239. bool "Group scheduling for SCHED_RR/FIFO"
  10240. depends on CGROUP_SCHED
  10241. + depends on !PREEMPT_RT_FULL
  10242. default n
  10243. help
  10244. This feature lets you explicitly allocate real CPU bandwidth
  10245. @@ -1772,6 +1760,7 @@ choice
  10246. config SLAB
  10247. bool "SLAB"
  10248. + depends on !PREEMPT_RT_FULL
  10249. select HAVE_HARDENED_USERCOPY_ALLOCATOR
  10250. help
  10251. The regular slab allocator that is established and known to work
  10252. @@ -1792,6 +1781,7 @@ config SLUB
  10253. config SLOB
  10254. depends on EXPERT
  10255. bool "SLOB (Simple Allocator)"
  10256. + depends on !PREEMPT_RT_FULL
  10257. help
  10258. SLOB replaces the stock allocator with a drastically simpler
  10259. allocator. SLOB is generally more space efficient but
  10260. @@ -1810,7 +1800,7 @@ config SLAB_FREELIST_RANDOM
  10261. config SLUB_CPU_PARTIAL
  10262. default y
  10263. - depends on SLUB && SMP
  10264. + depends on SLUB && SMP && !PREEMPT_RT_FULL
  10265. bool "SLUB per cpu partial cache"
  10266. help
  10267. Per cpu partial caches accellerate objects allocation and freeing
  10268. diff --git a/init/Makefile b/init/Makefile
  10269. index c4fb45525d08..821190dfaa75 100644
  10270. --- a/init/Makefile
  10271. +++ b/init/Makefile
  10272. @@ -35,4 +35,4 @@ silent_chk_compile.h = :
  10273. include/generated/compile.h: FORCE
  10274. @$($(quiet)chk_compile.h)
  10275. $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
  10276. - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
  10277. + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)"
  10278. diff --git a/init/main.c b/init/main.c
  10279. index 99f026565608..48ffaaad8ac9 100644
  10280. --- a/init/main.c
  10281. +++ b/init/main.c
  10282. @@ -508,6 +508,7 @@ asmlinkage __visible void __init start_kernel(void)
  10283. setup_command_line(command_line);
  10284. setup_nr_cpu_ids();
  10285. setup_per_cpu_areas();
  10286. + softirq_early_init();
  10287. boot_cpu_state_init();
  10288. smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
  10289. diff --git a/ipc/sem.c b/ipc/sem.c
  10290. index 10b94bc59d4a..b8360eaacc7a 100644
  10291. --- a/ipc/sem.c
  10292. +++ b/ipc/sem.c
  10293. @@ -712,6 +712,13 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
  10294. static void wake_up_sem_queue_prepare(struct list_head *pt,
  10295. struct sem_queue *q, int error)
  10296. {
  10297. +#ifdef CONFIG_PREEMPT_RT_BASE
  10298. + struct task_struct *p = q->sleeper;
  10299. + get_task_struct(p);
  10300. + q->status = error;
  10301. + wake_up_process(p);
  10302. + put_task_struct(p);
  10303. +#else
  10304. if (list_empty(pt)) {
  10305. /*
  10306. * Hold preempt off so that we don't get preempted and have the
  10307. @@ -723,6 +730,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt,
  10308. q->pid = error;
  10309. list_add_tail(&q->list, pt);
  10310. +#endif
  10311. }
  10312. /**
  10313. @@ -736,6 +744,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt,
  10314. */
  10315. static void wake_up_sem_queue_do(struct list_head *pt)
  10316. {
  10317. +#ifndef CONFIG_PREEMPT_RT_BASE
  10318. struct sem_queue *q, *t;
  10319. int did_something;
  10320. @@ -748,6 +757,7 @@ static void wake_up_sem_queue_do(struct list_head *pt)
  10321. }
  10322. if (did_something)
  10323. preempt_enable();
  10324. +#endif
  10325. }
  10326. static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
  10327. diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
  10328. index ebdb0043203a..b9e6aa7e5aa6 100644
  10329. --- a/kernel/Kconfig.locks
  10330. +++ b/kernel/Kconfig.locks
  10331. @@ -225,11 +225,11 @@ config ARCH_SUPPORTS_ATOMIC_RMW
  10332. config MUTEX_SPIN_ON_OWNER
  10333. def_bool y
  10334. - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
  10335. + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
  10336. config RWSEM_SPIN_ON_OWNER
  10337. def_bool y
  10338. - depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
  10339. + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
  10340. config LOCK_SPIN_ON_OWNER
  10341. def_bool y
  10342. diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
  10343. index 3f9c97419f02..11dbe26a8279 100644
  10344. --- a/kernel/Kconfig.preempt
  10345. +++ b/kernel/Kconfig.preempt
  10346. @@ -1,3 +1,16 @@
  10347. +config PREEMPT
  10348. + bool
  10349. + select PREEMPT_COUNT
  10350. +
  10351. +config PREEMPT_RT_BASE
  10352. + bool
  10353. + select PREEMPT
  10354. +
  10355. +config HAVE_PREEMPT_LAZY
  10356. + bool
  10357. +
  10358. +config PREEMPT_LAZY
  10359. + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL
  10360. choice
  10361. prompt "Preemption Model"
  10362. @@ -33,9 +46,9 @@ config PREEMPT_VOLUNTARY
  10363. Select this if you are building a kernel for a desktop system.
  10364. -config PREEMPT
  10365. +config PREEMPT__LL
  10366. bool "Preemptible Kernel (Low-Latency Desktop)"
  10367. - select PREEMPT_COUNT
  10368. + select PREEMPT
  10369. select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
  10370. help
  10371. This option reduces the latency of the kernel by making
  10372. @@ -52,6 +65,22 @@ config PREEMPT
  10373. embedded system with latency requirements in the milliseconds
  10374. range.
  10375. +config PREEMPT_RTB
  10376. + bool "Preemptible Kernel (Basic RT)"
  10377. + select PREEMPT_RT_BASE
  10378. + help
  10379. + This option is basically the same as (Low-Latency Desktop) but
  10380. + enables changes which are preliminary for the full preemptible
  10381. + RT kernel.
  10382. +
  10383. +config PREEMPT_RT_FULL
  10384. + bool "Fully Preemptible Kernel (RT)"
  10385. + depends on IRQ_FORCED_THREADING
  10386. + select PREEMPT_RT_BASE
  10387. + select PREEMPT_RCU
  10388. + help
  10389. + All and everything
  10390. +
  10391. endchoice
  10392. config PREEMPT_COUNT
  10393. diff --git a/kernel/cgroup.c b/kernel/cgroup.c
  10394. index 4c233437ee1a..6c3c9f298f22 100644
  10395. --- a/kernel/cgroup.c
  10396. +++ b/kernel/cgroup.c
  10397. @@ -5041,10 +5041,10 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head)
  10398. queue_work(cgroup_destroy_wq, &css->destroy_work);
  10399. }
  10400. -static void css_release_work_fn(struct work_struct *work)
  10401. +static void css_release_work_fn(struct swork_event *sev)
  10402. {
  10403. struct cgroup_subsys_state *css =
  10404. - container_of(work, struct cgroup_subsys_state, destroy_work);
  10405. + container_of(sev, struct cgroup_subsys_state, destroy_swork);
  10406. struct cgroup_subsys *ss = css->ss;
  10407. struct cgroup *cgrp = css->cgroup;
  10408. @@ -5087,8 +5087,8 @@ static void css_release(struct percpu_ref *ref)
  10409. struct cgroup_subsys_state *css =
  10410. container_of(ref, struct cgroup_subsys_state, refcnt);
  10411. - INIT_WORK(&css->destroy_work, css_release_work_fn);
  10412. - queue_work(cgroup_destroy_wq, &css->destroy_work);
  10413. + INIT_SWORK(&css->destroy_swork, css_release_work_fn);
  10414. + swork_queue(&css->destroy_swork);
  10415. }
  10416. static void init_and_link_css(struct cgroup_subsys_state *css,
  10417. @@ -5749,6 +5749,7 @@ static int __init cgroup_wq_init(void)
  10418. */
  10419. cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
  10420. BUG_ON(!cgroup_destroy_wq);
  10421. + BUG_ON(swork_get());
  10422. /*
  10423. * Used to destroy pidlists and separate to serve as flush domain.
  10424. diff --git a/kernel/cpu.c b/kernel/cpu.c
  10425. index 802eb3361a0a..c6a4cf8ba645 100644
  10426. --- a/kernel/cpu.c
  10427. +++ b/kernel/cpu.c
  10428. @@ -239,6 +239,289 @@ static struct {
  10429. #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
  10430. #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
  10431. +/**
  10432. + * hotplug_pcp - per cpu hotplug descriptor
  10433. + * @unplug: set when pin_current_cpu() needs to sync tasks
  10434. + * @sync_tsk: the task that waits for tasks to finish pinned sections
  10435. + * @refcount: counter of tasks in pinned sections
  10436. + * @grab_lock: set when the tasks entering pinned sections should wait
  10437. + * @synced: notifier for @sync_tsk to tell cpu_down it's finished
  10438. + * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
  10439. + * @mutex_init: zero if the mutex hasn't been initialized yet.
  10440. + *
  10441. + * Although @unplug and @sync_tsk may point to the same task, the @unplug
  10442. + * is used as a flag and still exists after @sync_tsk has exited and
  10443. + * @sync_tsk set to NULL.
  10444. + */
  10445. +struct hotplug_pcp {
  10446. + struct task_struct *unplug;
  10447. + struct task_struct *sync_tsk;
  10448. + int refcount;
  10449. + int grab_lock;
  10450. + struct completion synced;
  10451. + struct completion unplug_wait;
  10452. +#ifdef CONFIG_PREEMPT_RT_FULL
  10453. + /*
  10454. + * Note, on PREEMPT_RT, the hotplug lock must save the state of
  10455. + * the task, otherwise the mutex will cause the task to fail
  10456. + * to sleep when required. (Because it's called from migrate_disable())
  10457. + *
  10458. + * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
  10459. + * state.
  10460. + */
  10461. + spinlock_t lock;
  10462. +#else
  10463. + struct mutex mutex;
  10464. +#endif
  10465. + int mutex_init;
  10466. +};
  10467. +
  10468. +#ifdef CONFIG_PREEMPT_RT_FULL
  10469. +# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock)
  10470. +# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock)
  10471. +#else
  10472. +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
  10473. +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
  10474. +#endif
  10475. +
  10476. +static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
  10477. +
  10478. +/**
  10479. + * pin_current_cpu - Prevent the current cpu from being unplugged
  10480. + *
  10481. + * Lightweight version of get_online_cpus() to prevent cpu from being
  10482. + * unplugged when code runs in a migration disabled region.
  10483. + *
  10484. + * Must be called with preemption disabled (preempt_count = 1)!
  10485. + */
  10486. +void pin_current_cpu(void)
  10487. +{
  10488. + struct hotplug_pcp *hp;
  10489. + int force = 0;
  10490. +
  10491. +retry:
  10492. + hp = this_cpu_ptr(&hotplug_pcp);
  10493. +
  10494. + if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
  10495. + hp->unplug == current) {
  10496. + hp->refcount++;
  10497. + return;
  10498. + }
  10499. + if (hp->grab_lock) {
  10500. + preempt_enable();
  10501. + hotplug_lock(hp);
  10502. + hotplug_unlock(hp);
  10503. + } else {
  10504. + preempt_enable();
  10505. + /*
  10506. + * Try to push this task off of this CPU.
  10507. + */
  10508. + if (!migrate_me()) {
  10509. + preempt_disable();
  10510. + hp = this_cpu_ptr(&hotplug_pcp);
  10511. + if (!hp->grab_lock) {
  10512. + /*
  10513. + * Just let it continue it's already pinned
  10514. + * or about to sleep.
  10515. + */
  10516. + force = 1;
  10517. + goto retry;
  10518. + }
  10519. + preempt_enable();
  10520. + }
  10521. + }
  10522. + preempt_disable();
  10523. + goto retry;
  10524. +}
  10525. +
  10526. +/**
  10527. + * unpin_current_cpu - Allow unplug of current cpu
  10528. + *
  10529. + * Must be called with preemption or interrupts disabled!
  10530. + */
  10531. +void unpin_current_cpu(void)
  10532. +{
  10533. + struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
  10534. +
  10535. + WARN_ON(hp->refcount <= 0);
  10536. +
  10537. + /* This is safe. sync_unplug_thread is pinned to this cpu */
  10538. + if (!--hp->refcount && hp->unplug && hp->unplug != current)
  10539. + wake_up_process(hp->unplug);
  10540. +}
  10541. +
  10542. +static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
  10543. +{
  10544. + set_current_state(TASK_UNINTERRUPTIBLE);
  10545. + while (hp->refcount) {
  10546. + schedule_preempt_disabled();
  10547. + set_current_state(TASK_UNINTERRUPTIBLE);
  10548. + }
  10549. +}
  10550. +
  10551. +static int sync_unplug_thread(void *data)
  10552. +{
  10553. + struct hotplug_pcp *hp = data;
  10554. +
  10555. + wait_for_completion(&hp->unplug_wait);
  10556. + preempt_disable();
  10557. + hp->unplug = current;
  10558. + wait_for_pinned_cpus(hp);
  10559. +
  10560. + /*
  10561. + * This thread will synchronize the cpu_down() with threads
  10562. + * that have pinned the CPU. When the pinned CPU count reaches
  10563. + * zero, we inform the cpu_down code to continue to the next step.
  10564. + */
  10565. + set_current_state(TASK_UNINTERRUPTIBLE);
  10566. + preempt_enable();
  10567. + complete(&hp->synced);
  10568. +
  10569. + /*
  10570. + * If all succeeds, the next step will need tasks to wait till
  10571. + * the CPU is offline before continuing. To do this, the grab_lock
  10572. + * is set and tasks going into pin_current_cpu() will block on the
  10573. + * mutex. But we still need to wait for those that are already in
  10574. + * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
  10575. + * will kick this thread out.
  10576. + */
  10577. + while (!hp->grab_lock && !kthread_should_stop()) {
  10578. + schedule();
  10579. + set_current_state(TASK_UNINTERRUPTIBLE);
  10580. + }
  10581. +
  10582. + /* Make sure grab_lock is seen before we see a stale completion */
  10583. + smp_mb();
  10584. +
  10585. + /*
  10586. + * Now just before cpu_down() enters stop machine, we need to make
  10587. + * sure all tasks that are in pinned CPU sections are out, and new
  10588. + * tasks will now grab the lock, keeping them from entering pinned
  10589. + * CPU sections.
  10590. + */
  10591. + if (!kthread_should_stop()) {
  10592. + preempt_disable();
  10593. + wait_for_pinned_cpus(hp);
  10594. + preempt_enable();
  10595. + complete(&hp->synced);
  10596. + }
  10597. +
  10598. + set_current_state(TASK_UNINTERRUPTIBLE);
  10599. + while (!kthread_should_stop()) {
  10600. + schedule();
  10601. + set_current_state(TASK_UNINTERRUPTIBLE);
  10602. + }
  10603. + set_current_state(TASK_RUNNING);
  10604. +
  10605. + /*
  10606. + * Force this thread off this CPU as it's going down and
  10607. + * we don't want any more work on this CPU.
  10608. + */
  10609. + current->flags &= ~PF_NO_SETAFFINITY;
  10610. + set_cpus_allowed_ptr(current, cpu_present_mask);
  10611. + migrate_me();
  10612. + return 0;
  10613. +}
  10614. +
  10615. +static void __cpu_unplug_sync(struct hotplug_pcp *hp)
  10616. +{
  10617. + wake_up_process(hp->sync_tsk);
  10618. + wait_for_completion(&hp->synced);
  10619. +}
  10620. +
  10621. +static void __cpu_unplug_wait(unsigned int cpu)
  10622. +{
  10623. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  10624. +
  10625. + complete(&hp->unplug_wait);
  10626. + wait_for_completion(&hp->synced);
  10627. +}
  10628. +
  10629. +/*
  10630. + * Start the sync_unplug_thread on the target cpu and wait for it to
  10631. + * complete.
  10632. + */
  10633. +static int cpu_unplug_begin(unsigned int cpu)
  10634. +{
  10635. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  10636. + int err;
  10637. +
  10638. + /* Protected by cpu_hotplug.lock */
  10639. + if (!hp->mutex_init) {
  10640. +#ifdef CONFIG_PREEMPT_RT_FULL
  10641. + spin_lock_init(&hp->lock);
  10642. +#else
  10643. + mutex_init(&hp->mutex);
  10644. +#endif
  10645. + hp->mutex_init = 1;
  10646. + }
  10647. +
  10648. + /* Inform the scheduler to migrate tasks off this CPU */
  10649. + tell_sched_cpu_down_begin(cpu);
  10650. +
  10651. + init_completion(&hp->synced);
  10652. + init_completion(&hp->unplug_wait);
  10653. +
  10654. + hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
  10655. + if (IS_ERR(hp->sync_tsk)) {
  10656. + err = PTR_ERR(hp->sync_tsk);
  10657. + hp->sync_tsk = NULL;
  10658. + return err;
  10659. + }
  10660. + kthread_bind(hp->sync_tsk, cpu);
  10661. +
  10662. + /*
  10663. + * Wait for tasks to get out of the pinned sections,
  10664. + * it's still OK if new tasks enter. Some CPU notifiers will
  10665. + * wait for tasks that are going to enter these sections and
  10666. + * we must not have them block.
  10667. + */
  10668. + wake_up_process(hp->sync_tsk);
  10669. + return 0;
  10670. +}
  10671. +
  10672. +static void cpu_unplug_sync(unsigned int cpu)
  10673. +{
  10674. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  10675. +
  10676. + init_completion(&hp->synced);
  10677. + /* The completion needs to be initialzied before setting grab_lock */
  10678. + smp_wmb();
  10679. +
  10680. + /* Grab the mutex before setting grab_lock */
  10681. + hotplug_lock(hp);
  10682. + hp->grab_lock = 1;
  10683. +
  10684. + /*
  10685. + * The CPU notifiers have been completed.
  10686. + * Wait for tasks to get out of pinned CPU sections and have new
  10687. + * tasks block until the CPU is completely down.
  10688. + */
  10689. + __cpu_unplug_sync(hp);
  10690. +
  10691. + /* All done with the sync thread */
  10692. + kthread_stop(hp->sync_tsk);
  10693. + hp->sync_tsk = NULL;
  10694. +}
  10695. +
  10696. +static void cpu_unplug_done(unsigned int cpu)
  10697. +{
  10698. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  10699. +
  10700. + hp->unplug = NULL;
  10701. + /* Let all tasks know cpu unplug is finished before cleaning up */
  10702. + smp_wmb();
  10703. +
  10704. + if (hp->sync_tsk)
  10705. + kthread_stop(hp->sync_tsk);
  10706. +
  10707. + if (hp->grab_lock) {
  10708. + hotplug_unlock(hp);
  10709. + /* protected by cpu_hotplug.lock */
  10710. + hp->grab_lock = 0;
  10711. + }
  10712. + tell_sched_cpu_down_done(cpu);
  10713. +}
  10714. void get_online_cpus(void)
  10715. {
  10716. @@ -802,10 +1085,14 @@ static int takedown_cpu(unsigned int cpu)
  10717. struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
  10718. int err;
  10719. + __cpu_unplug_wait(cpu);
  10720. /* Park the smpboot threads */
  10721. kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
  10722. smpboot_park_threads(cpu);
  10723. + /* Notifiers are done. Don't let any more tasks pin this CPU. */
  10724. + cpu_unplug_sync(cpu);
  10725. +
  10726. /*
  10727. * Prevent irq alloc/free while the dying cpu reorganizes the
  10728. * interrupt affinities.
  10729. @@ -890,6 +1177,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
  10730. struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
  10731. int prev_state, ret = 0;
  10732. bool hasdied = false;
  10733. + int mycpu;
  10734. + cpumask_var_t cpumask;
  10735. + cpumask_var_t cpumask_org;
  10736. if (num_online_cpus() == 1)
  10737. return -EBUSY;
  10738. @@ -897,7 +1187,34 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
  10739. if (!cpu_present(cpu))
  10740. return -EINVAL;
  10741. + /* Move the downtaker off the unplug cpu */
  10742. + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
  10743. + return -ENOMEM;
  10744. + if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) {
  10745. + free_cpumask_var(cpumask);
  10746. + return -ENOMEM;
  10747. + }
  10748. +
  10749. + cpumask_copy(cpumask_org, tsk_cpus_allowed(current));
  10750. + cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
  10751. + set_cpus_allowed_ptr(current, cpumask);
  10752. + free_cpumask_var(cpumask);
  10753. + migrate_disable();
  10754. + mycpu = smp_processor_id();
  10755. + if (mycpu == cpu) {
  10756. + printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
  10757. + migrate_enable();
  10758. + ret = -EBUSY;
  10759. + goto restore_cpus;
  10760. + }
  10761. +
  10762. + migrate_enable();
  10763. cpu_hotplug_begin();
  10764. + ret = cpu_unplug_begin(cpu);
  10765. + if (ret) {
  10766. + printk("cpu_unplug_begin(%d) failed\n", cpu);
  10767. + goto out_cancel;
  10768. + }
  10769. cpuhp_tasks_frozen = tasks_frozen;
  10770. @@ -936,10 +1253,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
  10771. hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
  10772. out:
  10773. + cpu_unplug_done(cpu);
  10774. +out_cancel:
  10775. cpu_hotplug_done();
  10776. /* This post dead nonsense must die */
  10777. if (!ret && hasdied)
  10778. cpu_notify_nofail(CPU_POST_DEAD, cpu);
  10779. +restore_cpus:
  10780. + set_cpus_allowed_ptr(current, cpumask_org);
  10781. + free_cpumask_var(cpumask_org);
  10782. return ret;
  10783. }
  10784. @@ -1242,6 +1564,8 @@ core_initcall(cpu_hotplug_pm_sync_init);
  10785. #endif /* CONFIG_PM_SLEEP_SMP */
  10786. +int __boot_cpu_id;
  10787. +
  10788. #endif /* CONFIG_SMP */
  10789. /* Boot processor state steps */
  10790. @@ -1926,6 +2250,10 @@ void __init boot_cpu_init(void)
  10791. set_cpu_active(cpu, true);
  10792. set_cpu_present(cpu, true);
  10793. set_cpu_possible(cpu, true);
  10794. +
  10795. +#ifdef CONFIG_SMP
  10796. + __boot_cpu_id = cpu;
  10797. +#endif
  10798. }
  10799. /*
  10800. diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c
  10801. index 009cc9a17d95..67b02e138a47 100644
  10802. --- a/kernel/cpu_pm.c
  10803. +++ b/kernel/cpu_pm.c
  10804. @@ -22,15 +22,21 @@
  10805. #include <linux/spinlock.h>
  10806. #include <linux/syscore_ops.h>
  10807. -static DEFINE_RWLOCK(cpu_pm_notifier_lock);
  10808. -static RAW_NOTIFIER_HEAD(cpu_pm_notifier_chain);
  10809. +static ATOMIC_NOTIFIER_HEAD(cpu_pm_notifier_chain);
  10810. static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls)
  10811. {
  10812. int ret;
  10813. - ret = __raw_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL,
  10814. + /*
  10815. + * __atomic_notifier_call_chain has a RCU read critical section, which
  10816. + * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let
  10817. + * RCU know this.
  10818. + */
  10819. + rcu_irq_enter_irqson();
  10820. + ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL,
  10821. nr_to_call, nr_calls);
  10822. + rcu_irq_exit_irqson();
  10823. return notifier_to_errno(ret);
  10824. }
  10825. @@ -47,14 +53,7 @@ static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls)
  10826. */
  10827. int cpu_pm_register_notifier(struct notifier_block *nb)
  10828. {
  10829. - unsigned long flags;
  10830. - int ret;
  10831. -
  10832. - write_lock_irqsave(&cpu_pm_notifier_lock, flags);
  10833. - ret = raw_notifier_chain_register(&cpu_pm_notifier_chain, nb);
  10834. - write_unlock_irqrestore(&cpu_pm_notifier_lock, flags);
  10835. -
  10836. - return ret;
  10837. + return atomic_notifier_chain_register(&cpu_pm_notifier_chain, nb);
  10838. }
  10839. EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
  10840. @@ -69,14 +68,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
  10841. */
  10842. int cpu_pm_unregister_notifier(struct notifier_block *nb)
  10843. {
  10844. - unsigned long flags;
  10845. - int ret;
  10846. -
  10847. - write_lock_irqsave(&cpu_pm_notifier_lock, flags);
  10848. - ret = raw_notifier_chain_unregister(&cpu_pm_notifier_chain, nb);
  10849. - write_unlock_irqrestore(&cpu_pm_notifier_lock, flags);
  10850. -
  10851. - return ret;
  10852. + return atomic_notifier_chain_unregister(&cpu_pm_notifier_chain, nb);
  10853. }
  10854. EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
  10855. @@ -100,7 +92,6 @@ int cpu_pm_enter(void)
  10856. int nr_calls;
  10857. int ret = 0;
  10858. - read_lock(&cpu_pm_notifier_lock);
  10859. ret = cpu_pm_notify(CPU_PM_ENTER, -1, &nr_calls);
  10860. if (ret)
  10861. /*
  10862. @@ -108,7 +99,6 @@ int cpu_pm_enter(void)
  10863. * PM entry who are notified earlier to prepare for it.
  10864. */
  10865. cpu_pm_notify(CPU_PM_ENTER_FAILED, nr_calls - 1, NULL);
  10866. - read_unlock(&cpu_pm_notifier_lock);
  10867. return ret;
  10868. }
  10869. @@ -128,13 +118,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_enter);
  10870. */
  10871. int cpu_pm_exit(void)
  10872. {
  10873. - int ret;
  10874. -
  10875. - read_lock(&cpu_pm_notifier_lock);
  10876. - ret = cpu_pm_notify(CPU_PM_EXIT, -1, NULL);
  10877. - read_unlock(&cpu_pm_notifier_lock);
  10878. -
  10879. - return ret;
  10880. + return cpu_pm_notify(CPU_PM_EXIT, -1, NULL);
  10881. }
  10882. EXPORT_SYMBOL_GPL(cpu_pm_exit);
  10883. @@ -159,7 +143,6 @@ int cpu_cluster_pm_enter(void)
  10884. int nr_calls;
  10885. int ret = 0;
  10886. - read_lock(&cpu_pm_notifier_lock);
  10887. ret = cpu_pm_notify(CPU_CLUSTER_PM_ENTER, -1, &nr_calls);
  10888. if (ret)
  10889. /*
  10890. @@ -167,7 +150,6 @@ int cpu_cluster_pm_enter(void)
  10891. * PM entry who are notified earlier to prepare for it.
  10892. */
  10893. cpu_pm_notify(CPU_CLUSTER_PM_ENTER_FAILED, nr_calls - 1, NULL);
  10894. - read_unlock(&cpu_pm_notifier_lock);
  10895. return ret;
  10896. }
  10897. @@ -190,13 +172,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
  10898. */
  10899. int cpu_cluster_pm_exit(void)
  10900. {
  10901. - int ret;
  10902. -
  10903. - read_lock(&cpu_pm_notifier_lock);
  10904. - ret = cpu_pm_notify(CPU_CLUSTER_PM_EXIT, -1, NULL);
  10905. - read_unlock(&cpu_pm_notifier_lock);
  10906. -
  10907. - return ret;
  10908. + return cpu_pm_notify(CPU_CLUSTER_PM_EXIT, -1, NULL);
  10909. }
  10910. EXPORT_SYMBOL_GPL(cpu_cluster_pm_exit);
  10911. diff --git a/kernel/cpuset.c b/kernel/cpuset.c
  10912. index 511b1dd8ff09..1dd63833ecdc 100644
  10913. --- a/kernel/cpuset.c
  10914. +++ b/kernel/cpuset.c
  10915. @@ -285,7 +285,7 @@ static struct cpuset top_cpuset = {
  10916. */
  10917. static DEFINE_MUTEX(cpuset_mutex);
  10918. -static DEFINE_SPINLOCK(callback_lock);
  10919. +static DEFINE_RAW_SPINLOCK(callback_lock);
  10920. static struct workqueue_struct *cpuset_migrate_mm_wq;
  10921. @@ -908,9 +908,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
  10922. continue;
  10923. rcu_read_unlock();
  10924. - spin_lock_irq(&callback_lock);
  10925. + raw_spin_lock_irq(&callback_lock);
  10926. cpumask_copy(cp->effective_cpus, new_cpus);
  10927. - spin_unlock_irq(&callback_lock);
  10928. + raw_spin_unlock_irq(&callback_lock);
  10929. WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
  10930. !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
  10931. @@ -975,9 +975,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
  10932. if (retval < 0)
  10933. return retval;
  10934. - spin_lock_irq(&callback_lock);
  10935. + raw_spin_lock_irq(&callback_lock);
  10936. cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
  10937. - spin_unlock_irq(&callback_lock);
  10938. + raw_spin_unlock_irq(&callback_lock);
  10939. /* use trialcs->cpus_allowed as a temp variable */
  10940. update_cpumasks_hier(cs, trialcs->cpus_allowed);
  10941. @@ -1177,9 +1177,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
  10942. continue;
  10943. rcu_read_unlock();
  10944. - spin_lock_irq(&callback_lock);
  10945. + raw_spin_lock_irq(&callback_lock);
  10946. cp->effective_mems = *new_mems;
  10947. - spin_unlock_irq(&callback_lock);
  10948. + raw_spin_unlock_irq(&callback_lock);
  10949. WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
  10950. !nodes_equal(cp->mems_allowed, cp->effective_mems));
  10951. @@ -1247,9 +1247,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
  10952. if (retval < 0)
  10953. goto done;
  10954. - spin_lock_irq(&callback_lock);
  10955. + raw_spin_lock_irq(&callback_lock);
  10956. cs->mems_allowed = trialcs->mems_allowed;
  10957. - spin_unlock_irq(&callback_lock);
  10958. + raw_spin_unlock_irq(&callback_lock);
  10959. /* use trialcs->mems_allowed as a temp variable */
  10960. update_nodemasks_hier(cs, &trialcs->mems_allowed);
  10961. @@ -1340,9 +1340,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
  10962. spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
  10963. || (is_spread_page(cs) != is_spread_page(trialcs)));
  10964. - spin_lock_irq(&callback_lock);
  10965. + raw_spin_lock_irq(&callback_lock);
  10966. cs->flags = trialcs->flags;
  10967. - spin_unlock_irq(&callback_lock);
  10968. + raw_spin_unlock_irq(&callback_lock);
  10969. if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
  10970. rebuild_sched_domains_locked();
  10971. @@ -1757,7 +1757,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
  10972. cpuset_filetype_t type = seq_cft(sf)->private;
  10973. int ret = 0;
  10974. - spin_lock_irq(&callback_lock);
  10975. + raw_spin_lock_irq(&callback_lock);
  10976. switch (type) {
  10977. case FILE_CPULIST:
  10978. @@ -1776,7 +1776,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
  10979. ret = -EINVAL;
  10980. }
  10981. - spin_unlock_irq(&callback_lock);
  10982. + raw_spin_unlock_irq(&callback_lock);
  10983. return ret;
  10984. }
  10985. @@ -1991,12 +1991,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
  10986. cpuset_inc();
  10987. - spin_lock_irq(&callback_lock);
  10988. + raw_spin_lock_irq(&callback_lock);
  10989. if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
  10990. cpumask_copy(cs->effective_cpus, parent->effective_cpus);
  10991. cs->effective_mems = parent->effective_mems;
  10992. }
  10993. - spin_unlock_irq(&callback_lock);
  10994. + raw_spin_unlock_irq(&callback_lock);
  10995. if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
  10996. goto out_unlock;
  10997. @@ -2023,12 +2023,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
  10998. }
  10999. rcu_read_unlock();
  11000. - spin_lock_irq(&callback_lock);
  11001. + raw_spin_lock_irq(&callback_lock);
  11002. cs->mems_allowed = parent->mems_allowed;
  11003. cs->effective_mems = parent->mems_allowed;
  11004. cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
  11005. cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
  11006. - spin_unlock_irq(&callback_lock);
  11007. + raw_spin_unlock_irq(&callback_lock);
  11008. out_unlock:
  11009. mutex_unlock(&cpuset_mutex);
  11010. return 0;
  11011. @@ -2067,7 +2067,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
  11012. static void cpuset_bind(struct cgroup_subsys_state *root_css)
  11013. {
  11014. mutex_lock(&cpuset_mutex);
  11015. - spin_lock_irq(&callback_lock);
  11016. + raw_spin_lock_irq(&callback_lock);
  11017. if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
  11018. cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
  11019. @@ -2078,7 +2078,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
  11020. top_cpuset.mems_allowed = top_cpuset.effective_mems;
  11021. }
  11022. - spin_unlock_irq(&callback_lock);
  11023. + raw_spin_unlock_irq(&callback_lock);
  11024. mutex_unlock(&cpuset_mutex);
  11025. }
  11026. @@ -2179,12 +2179,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
  11027. {
  11028. bool is_empty;
  11029. - spin_lock_irq(&callback_lock);
  11030. + raw_spin_lock_irq(&callback_lock);
  11031. cpumask_copy(cs->cpus_allowed, new_cpus);
  11032. cpumask_copy(cs->effective_cpus, new_cpus);
  11033. cs->mems_allowed = *new_mems;
  11034. cs->effective_mems = *new_mems;
  11035. - spin_unlock_irq(&callback_lock);
  11036. + raw_spin_unlock_irq(&callback_lock);
  11037. /*
  11038. * Don't call update_tasks_cpumask() if the cpuset becomes empty,
  11039. @@ -2221,10 +2221,10 @@ hotplug_update_tasks(struct cpuset *cs,
  11040. if (nodes_empty(*new_mems))
  11041. *new_mems = parent_cs(cs)->effective_mems;
  11042. - spin_lock_irq(&callback_lock);
  11043. + raw_spin_lock_irq(&callback_lock);
  11044. cpumask_copy(cs->effective_cpus, new_cpus);
  11045. cs->effective_mems = *new_mems;
  11046. - spin_unlock_irq(&callback_lock);
  11047. + raw_spin_unlock_irq(&callback_lock);
  11048. if (cpus_updated)
  11049. update_tasks_cpumask(cs);
  11050. @@ -2317,21 +2317,21 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
  11051. /* synchronize cpus_allowed to cpu_active_mask */
  11052. if (cpus_updated) {
  11053. - spin_lock_irq(&callback_lock);
  11054. + raw_spin_lock_irq(&callback_lock);
  11055. if (!on_dfl)
  11056. cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
  11057. cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
  11058. - spin_unlock_irq(&callback_lock);
  11059. + raw_spin_unlock_irq(&callback_lock);
  11060. /* we don't mess with cpumasks of tasks in top_cpuset */
  11061. }
  11062. /* synchronize mems_allowed to N_MEMORY */
  11063. if (mems_updated) {
  11064. - spin_lock_irq(&callback_lock);
  11065. + raw_spin_lock_irq(&callback_lock);
  11066. if (!on_dfl)
  11067. top_cpuset.mems_allowed = new_mems;
  11068. top_cpuset.effective_mems = new_mems;
  11069. - spin_unlock_irq(&callback_lock);
  11070. + raw_spin_unlock_irq(&callback_lock);
  11071. update_tasks_nodemask(&top_cpuset);
  11072. }
  11073. @@ -2436,11 +2436,11 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
  11074. {
  11075. unsigned long flags;
  11076. - spin_lock_irqsave(&callback_lock, flags);
  11077. + raw_spin_lock_irqsave(&callback_lock, flags);
  11078. rcu_read_lock();
  11079. guarantee_online_cpus(task_cs(tsk), pmask);
  11080. rcu_read_unlock();
  11081. - spin_unlock_irqrestore(&callback_lock, flags);
  11082. + raw_spin_unlock_irqrestore(&callback_lock, flags);
  11083. }
  11084. void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
  11085. @@ -2488,11 +2488,11 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
  11086. nodemask_t mask;
  11087. unsigned long flags;
  11088. - spin_lock_irqsave(&callback_lock, flags);
  11089. + raw_spin_lock_irqsave(&callback_lock, flags);
  11090. rcu_read_lock();
  11091. guarantee_online_mems(task_cs(tsk), &mask);
  11092. rcu_read_unlock();
  11093. - spin_unlock_irqrestore(&callback_lock, flags);
  11094. + raw_spin_unlock_irqrestore(&callback_lock, flags);
  11095. return mask;
  11096. }
  11097. @@ -2584,14 +2584,14 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
  11098. return true;
  11099. /* Not hardwall and node outside mems_allowed: scan up cpusets */
  11100. - spin_lock_irqsave(&callback_lock, flags);
  11101. + raw_spin_lock_irqsave(&callback_lock, flags);
  11102. rcu_read_lock();
  11103. cs = nearest_hardwall_ancestor(task_cs(current));
  11104. allowed = node_isset(node, cs->mems_allowed);
  11105. rcu_read_unlock();
  11106. - spin_unlock_irqrestore(&callback_lock, flags);
  11107. + raw_spin_unlock_irqrestore(&callback_lock, flags);
  11108. return allowed;
  11109. }
  11110. diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
  11111. index 77777d918676..3203e9dee9f8 100644
  11112. --- a/kernel/debug/kdb/kdb_io.c
  11113. +++ b/kernel/debug/kdb/kdb_io.c
  11114. @@ -554,7 +554,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
  11115. int linecount;
  11116. int colcount;
  11117. int logging, saved_loglevel = 0;
  11118. - int saved_trap_printk;
  11119. int got_printf_lock = 0;
  11120. int retlen = 0;
  11121. int fnd, len;
  11122. @@ -565,8 +564,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
  11123. unsigned long uninitialized_var(flags);
  11124. preempt_disable();
  11125. - saved_trap_printk = kdb_trap_printk;
  11126. - kdb_trap_printk = 0;
  11127. /* Serialize kdb_printf if multiple cpus try to write at once.
  11128. * But if any cpu goes recursive in kdb, just print the output,
  11129. @@ -855,7 +852,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
  11130. } else {
  11131. __release(kdb_printf_lock);
  11132. }
  11133. - kdb_trap_printk = saved_trap_printk;
  11134. preempt_enable();
  11135. return retlen;
  11136. }
  11137. @@ -865,9 +861,11 @@ int kdb_printf(const char *fmt, ...)
  11138. va_list ap;
  11139. int r;
  11140. + kdb_trap_printk++;
  11141. va_start(ap, fmt);
  11142. r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap);
  11143. va_end(ap);
  11144. + kdb_trap_printk--;
  11145. return r;
  11146. }
  11147. diff --git a/kernel/events/core.c b/kernel/events/core.c
  11148. index 13b9784427b0..f74fbfe5465c 100644
  11149. --- a/kernel/events/core.c
  11150. +++ b/kernel/events/core.c
  11151. @@ -1050,6 +1050,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
  11152. raw_spin_lock_init(&cpuctx->hrtimer_lock);
  11153. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
  11154. timer->function = perf_mux_hrtimer_handler;
  11155. + timer->irqsafe = 1;
  11156. }
  11157. static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
  11158. @@ -8405,6 +8406,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
  11159. hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  11160. hwc->hrtimer.function = perf_swevent_hrtimer;
  11161. + hwc->hrtimer.irqsafe = 1;
  11162. /*
  11163. * Since hrtimers have a fixed rate, we can do a static freq->period
  11164. diff --git a/kernel/exit.c b/kernel/exit.c
  11165. index 3076f3089919..fb2ebcf3ca7c 100644
  11166. --- a/kernel/exit.c
  11167. +++ b/kernel/exit.c
  11168. @@ -143,7 +143,7 @@ static void __exit_signal(struct task_struct *tsk)
  11169. * Do this under ->siglock, we can race with another thread
  11170. * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
  11171. */
  11172. - flush_sigqueue(&tsk->pending);
  11173. + flush_task_sigqueue(tsk);
  11174. tsk->sighand = NULL;
  11175. spin_unlock(&sighand->siglock);
  11176. diff --git a/kernel/fork.c b/kernel/fork.c
  11177. index 70e10cb49be0..2529725eefa2 100644
  11178. --- a/kernel/fork.c
  11179. +++ b/kernel/fork.c
  11180. @@ -77,6 +77,7 @@
  11181. #include <linux/compiler.h>
  11182. #include <linux/sysctl.h>
  11183. #include <linux/kcov.h>
  11184. +#include <linux/kprobes.h>
  11185. #include <asm/pgtable.h>
  11186. #include <asm/pgalloc.h>
  11187. @@ -378,13 +379,24 @@ static inline void put_signal_struct(struct signal_struct *sig)
  11188. if (atomic_dec_and_test(&sig->sigcnt))
  11189. free_signal_struct(sig);
  11190. }
  11191. -
  11192. +#ifdef CONFIG_PREEMPT_RT_BASE
  11193. +static
  11194. +#endif
  11195. void __put_task_struct(struct task_struct *tsk)
  11196. {
  11197. WARN_ON(!tsk->exit_state);
  11198. WARN_ON(atomic_read(&tsk->usage));
  11199. WARN_ON(tsk == current);
  11200. + /*
  11201. + * Remove function-return probe instances associated with this
  11202. + * task and put them back on the free list.
  11203. + */
  11204. + kprobe_flush_task(tsk);
  11205. +
  11206. + /* Task is done with its stack. */
  11207. + put_task_stack(tsk);
  11208. +
  11209. cgroup_free(tsk);
  11210. task_numa_free(tsk);
  11211. security_task_free(tsk);
  11212. @@ -395,7 +407,18 @@ void __put_task_struct(struct task_struct *tsk)
  11213. if (!profile_handoff_task(tsk))
  11214. free_task(tsk);
  11215. }
  11216. +#ifndef CONFIG_PREEMPT_RT_BASE
  11217. EXPORT_SYMBOL_GPL(__put_task_struct);
  11218. +#else
  11219. +void __put_task_struct_cb(struct rcu_head *rhp)
  11220. +{
  11221. + struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu);
  11222. +
  11223. + __put_task_struct(tsk);
  11224. +
  11225. +}
  11226. +EXPORT_SYMBOL_GPL(__put_task_struct_cb);
  11227. +#endif
  11228. void __init __weak arch_task_cache_init(void) { }
  11229. @@ -541,6 +564,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
  11230. tsk->splice_pipe = NULL;
  11231. tsk->task_frag.page = NULL;
  11232. tsk->wake_q.next = NULL;
  11233. + tsk->wake_q_sleeper.next = NULL;
  11234. account_kernel_stack(tsk, 1);
  11235. @@ -867,6 +891,19 @@ void __mmdrop(struct mm_struct *mm)
  11236. }
  11237. EXPORT_SYMBOL_GPL(__mmdrop);
  11238. +#ifdef CONFIG_PREEMPT_RT_BASE
  11239. +/*
  11240. + * RCU callback for delayed mm drop. Not strictly rcu, but we don't
  11241. + * want another facility to make this work.
  11242. + */
  11243. +void __mmdrop_delayed(struct rcu_head *rhp)
  11244. +{
  11245. + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
  11246. +
  11247. + __mmdrop(mm);
  11248. +}
  11249. +#endif
  11250. +
  11251. static inline void __mmput(struct mm_struct *mm)
  11252. {
  11253. VM_BUG_ON(atomic_read(&mm->mm_users));
  11254. @@ -1432,6 +1469,7 @@ static void rt_mutex_init_task(struct task_struct *p)
  11255. #ifdef CONFIG_RT_MUTEXES
  11256. p->pi_waiters = RB_ROOT;
  11257. p->pi_waiters_leftmost = NULL;
  11258. + p->pi_top_task = NULL;
  11259. p->pi_blocked_on = NULL;
  11260. #endif
  11261. }
  11262. @@ -1441,6 +1479,9 @@ static void rt_mutex_init_task(struct task_struct *p)
  11263. */
  11264. static void posix_cpu_timers_init(struct task_struct *tsk)
  11265. {
  11266. +#ifdef CONFIG_PREEMPT_RT_BASE
  11267. + tsk->posix_timer_list = NULL;
  11268. +#endif
  11269. tsk->cputime_expires.prof_exp = 0;
  11270. tsk->cputime_expires.virt_exp = 0;
  11271. tsk->cputime_expires.sched_exp = 0;
  11272. @@ -1567,6 +1608,7 @@ static __latent_entropy struct task_struct *copy_process(
  11273. spin_lock_init(&p->alloc_lock);
  11274. init_sigpending(&p->pending);
  11275. + p->sigqueue_cache = NULL;
  11276. p->utime = p->stime = p->gtime = 0;
  11277. p->utimescaled = p->stimescaled = 0;
  11278. diff --git a/kernel/futex.c b/kernel/futex.c
  11279. index 88bad86180ac..2e074d63e8fa 100644
  11280. --- a/kernel/futex.c
  11281. +++ b/kernel/futex.c
  11282. @@ -801,7 +801,7 @@ static int refill_pi_state_cache(void)
  11283. return 0;
  11284. }
  11285. -static struct futex_pi_state * alloc_pi_state(void)
  11286. +static struct futex_pi_state *alloc_pi_state(void)
  11287. {
  11288. struct futex_pi_state *pi_state = current->pi_state_cache;
  11289. @@ -811,6 +811,11 @@ static struct futex_pi_state * alloc_pi_state(void)
  11290. return pi_state;
  11291. }
  11292. +static void get_pi_state(struct futex_pi_state *pi_state)
  11293. +{
  11294. + WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount));
  11295. +}
  11296. +
  11297. /*
  11298. * Drops a reference to the pi_state object and frees or caches it
  11299. * when the last reference is gone.
  11300. @@ -855,7 +860,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
  11301. * Look up the task based on what TID userspace gave us.
  11302. * We dont trust it.
  11303. */
  11304. -static struct task_struct * futex_find_get_task(pid_t pid)
  11305. +static struct task_struct *futex_find_get_task(pid_t pid)
  11306. {
  11307. struct task_struct *p;
  11308. @@ -905,7 +910,9 @@ void exit_pi_state_list(struct task_struct *curr)
  11309. * task still owns the PI-state:
  11310. */
  11311. if (head->next != next) {
  11312. + raw_spin_unlock_irq(&curr->pi_lock);
  11313. spin_unlock(&hb->lock);
  11314. + raw_spin_lock_irq(&curr->pi_lock);
  11315. continue;
  11316. }
  11317. @@ -915,10 +922,12 @@ void exit_pi_state_list(struct task_struct *curr)
  11318. pi_state->owner = NULL;
  11319. raw_spin_unlock_irq(&curr->pi_lock);
  11320. - rt_mutex_unlock(&pi_state->pi_mutex);
  11321. -
  11322. + get_pi_state(pi_state);
  11323. spin_unlock(&hb->lock);
  11324. + rt_mutex_futex_unlock(&pi_state->pi_mutex);
  11325. + put_pi_state(pi_state);
  11326. +
  11327. raw_spin_lock_irq(&curr->pi_lock);
  11328. }
  11329. raw_spin_unlock_irq(&curr->pi_lock);
  11330. @@ -972,6 +981,39 @@ void exit_pi_state_list(struct task_struct *curr)
  11331. *
  11332. * [10] There is no transient state which leaves owner and user space
  11333. * TID out of sync.
  11334. + *
  11335. + *
  11336. + * Serialization and lifetime rules:
  11337. + *
  11338. + * hb->lock:
  11339. + *
  11340. + * hb -> futex_q, relation
  11341. + * futex_q -> pi_state, relation
  11342. + *
  11343. + * (cannot be raw because hb can contain arbitrary amount
  11344. + * of futex_q's)
  11345. + *
  11346. + * pi_mutex->wait_lock:
  11347. + *
  11348. + * {uval, pi_state}
  11349. + *
  11350. + * (and pi_mutex 'obviously')
  11351. + *
  11352. + * p->pi_lock:
  11353. + *
  11354. + * p->pi_state_list -> pi_state->list, relation
  11355. + *
  11356. + * pi_state->refcount:
  11357. + *
  11358. + * pi_state lifetime
  11359. + *
  11360. + *
  11361. + * Lock order:
  11362. + *
  11363. + * hb->lock
  11364. + * pi_mutex->wait_lock
  11365. + * p->pi_lock
  11366. + *
  11367. */
  11368. /*
  11369. @@ -979,10 +1021,13 @@ void exit_pi_state_list(struct task_struct *curr)
  11370. * the pi_state against the user space value. If correct, attach to
  11371. * it.
  11372. */
  11373. -static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
  11374. +static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
  11375. + struct futex_pi_state *pi_state,
  11376. struct futex_pi_state **ps)
  11377. {
  11378. pid_t pid = uval & FUTEX_TID_MASK;
  11379. + u32 uval2;
  11380. + int ret;
  11381. /*
  11382. * Userspace might have messed up non-PI and PI futexes [3]
  11383. @@ -990,8 +1035,38 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
  11384. if (unlikely(!pi_state))
  11385. return -EINVAL;
  11386. + /*
  11387. + * We get here with hb->lock held, and having found a
  11388. + * futex_top_waiter(). This means that futex_lock_pi() of said futex_q
  11389. + * has dropped the hb->lock in between queue_me() and unqueue_me_pi(),
  11390. + * which in turn means that futex_lock_pi() still has a reference on
  11391. + * our pi_state.
  11392. + *
  11393. + * The waiter holding a reference on @pi_state also protects against
  11394. + * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi()
  11395. + * and futex_wait_requeue_pi() as it cannot go to 0 and consequently
  11396. + * free pi_state before we can take a reference ourselves.
  11397. + */
  11398. WARN_ON(!atomic_read(&pi_state->refcount));
  11399. + /*
  11400. + * Now that we have a pi_state, we can acquire wait_lock
  11401. + * and do the state validation.
  11402. + */
  11403. + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
  11404. +
  11405. + /*
  11406. + * Since {uval, pi_state} is serialized by wait_lock, and our current
  11407. + * uval was read without holding it, it can have changed. Verify it
  11408. + * still is what we expect it to be, otherwise retry the entire
  11409. + * operation.
  11410. + */
  11411. + if (get_futex_value_locked(&uval2, uaddr))
  11412. + goto out_efault;
  11413. +
  11414. + if (uval != uval2)
  11415. + goto out_eagain;
  11416. +
  11417. /*
  11418. * Handle the owner died case:
  11419. */
  11420. @@ -1007,11 +1082,11 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
  11421. * is not 0. Inconsistent state. [5]
  11422. */
  11423. if (pid)
  11424. - return -EINVAL;
  11425. + goto out_einval;
  11426. /*
  11427. * Take a ref on the state and return success. [4]
  11428. */
  11429. - goto out_state;
  11430. + goto out_attach;
  11431. }
  11432. /*
  11433. @@ -1023,14 +1098,14 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
  11434. * Take a ref on the state and return success. [6]
  11435. */
  11436. if (!pid)
  11437. - goto out_state;
  11438. + goto out_attach;
  11439. } else {
  11440. /*
  11441. * If the owner died bit is not set, then the pi_state
  11442. * must have an owner. [7]
  11443. */
  11444. if (!pi_state->owner)
  11445. - return -EINVAL;
  11446. + goto out_einval;
  11447. }
  11448. /*
  11449. @@ -1039,11 +1114,29 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
  11450. * user space TID. [9/10]
  11451. */
  11452. if (pid != task_pid_vnr(pi_state->owner))
  11453. - return -EINVAL;
  11454. -out_state:
  11455. - atomic_inc(&pi_state->refcount);
  11456. + goto out_einval;
  11457. +
  11458. +out_attach:
  11459. + get_pi_state(pi_state);
  11460. + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
  11461. *ps = pi_state;
  11462. return 0;
  11463. +
  11464. +out_einval:
  11465. + ret = -EINVAL;
  11466. + goto out_error;
  11467. +
  11468. +out_eagain:
  11469. + ret = -EAGAIN;
  11470. + goto out_error;
  11471. +
  11472. +out_efault:
  11473. + ret = -EFAULT;
  11474. + goto out_error;
  11475. +
  11476. +out_error:
  11477. + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
  11478. + return ret;
  11479. }
  11480. /*
  11481. @@ -1094,6 +1187,9 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
  11482. /*
  11483. * No existing pi state. First waiter. [2]
  11484. + *
  11485. + * This creates pi_state, we have hb->lock held, this means nothing can
  11486. + * observe this state, wait_lock is irrelevant.
  11487. */
  11488. pi_state = alloc_pi_state();
  11489. @@ -1118,17 +1214,18 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
  11490. return 0;
  11491. }
  11492. -static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
  11493. +static int lookup_pi_state(u32 __user *uaddr, u32 uval,
  11494. + struct futex_hash_bucket *hb,
  11495. union futex_key *key, struct futex_pi_state **ps)
  11496. {
  11497. - struct futex_q *match = futex_top_waiter(hb, key);
  11498. + struct futex_q *top_waiter = futex_top_waiter(hb, key);
  11499. /*
  11500. * If there is a waiter on that futex, validate it and
  11501. * attach to the pi_state when the validation succeeds.
  11502. */
  11503. - if (match)
  11504. - return attach_to_pi_state(uval, match->pi_state, ps);
  11505. + if (top_waiter)
  11506. + return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
  11507. /*
  11508. * We are the first waiter - try to look up the owner based on
  11509. @@ -1147,7 +1244,7 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
  11510. if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
  11511. return -EFAULT;
  11512. - /*If user space value changed, let the caller retry */
  11513. + /* If user space value changed, let the caller retry */
  11514. return curval != uval ? -EAGAIN : 0;
  11515. }
  11516. @@ -1175,7 +1272,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
  11517. struct task_struct *task, int set_waiters)
  11518. {
  11519. u32 uval, newval, vpid = task_pid_vnr(task);
  11520. - struct futex_q *match;
  11521. + struct futex_q *top_waiter;
  11522. int ret;
  11523. /*
  11524. @@ -1201,9 +1298,9 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
  11525. * Lookup existing state first. If it exists, try to attach to
  11526. * its pi_state.
  11527. */
  11528. - match = futex_top_waiter(hb, key);
  11529. - if (match)
  11530. - return attach_to_pi_state(uval, match->pi_state, ps);
  11531. + top_waiter = futex_top_waiter(hb, key);
  11532. + if (top_waiter)
  11533. + return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
  11534. /*
  11535. * No waiter and user TID is 0. We are here because the
  11536. @@ -1284,50 +1381,45 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
  11537. wake_q_add(wake_q, p);
  11538. __unqueue_futex(q);
  11539. /*
  11540. - * The waiting task can free the futex_q as soon as
  11541. - * q->lock_ptr = NULL is written, without taking any locks. A
  11542. - * memory barrier is required here to prevent the following
  11543. - * store to lock_ptr from getting ahead of the plist_del.
  11544. + * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
  11545. + * is written, without taking any locks. This is possible in the event
  11546. + * of a spurious wakeup, for example. A memory barrier is required here
  11547. + * to prevent the following store to lock_ptr from getting ahead of the
  11548. + * plist_del in __unqueue_futex().
  11549. */
  11550. - smp_wmb();
  11551. - q->lock_ptr = NULL;
  11552. + smp_store_release(&q->lock_ptr, NULL);
  11553. }
  11554. -static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
  11555. - struct futex_hash_bucket *hb)
  11556. +/*
  11557. + * Caller must hold a reference on @pi_state.
  11558. + */
  11559. +static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
  11560. {
  11561. - struct task_struct *new_owner;
  11562. - struct futex_pi_state *pi_state = this->pi_state;
  11563. u32 uninitialized_var(curval), newval;
  11564. + struct task_struct *new_owner;
  11565. + bool postunlock = false;
  11566. WAKE_Q(wake_q);
  11567. - bool deboost;
  11568. + WAKE_Q(wake_sleeper_q);
  11569. int ret = 0;
  11570. - if (!pi_state)
  11571. - return -EINVAL;
  11572. -
  11573. - /*
  11574. - * If current does not own the pi_state then the futex is
  11575. - * inconsistent and user space fiddled with the futex value.
  11576. - */
  11577. - if (pi_state->owner != current)
  11578. - return -EINVAL;
  11579. -
  11580. - raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
  11581. new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
  11582. + if (WARN_ON_ONCE(!new_owner)) {
  11583. + /*
  11584. + * As per the comment in futex_unlock_pi() this should not happen.
  11585. + *
  11586. + * When this happens, give up our locks and try again, giving
  11587. + * the futex_lock_pi() instance time to complete, either by
  11588. + * waiting on the rtmutex or removing itself from the futex
  11589. + * queue.
  11590. + */
  11591. + ret = -EAGAIN;
  11592. + goto out_unlock;
  11593. + }
  11594. /*
  11595. - * It is possible that the next waiter (the one that brought
  11596. - * this owner to the kernel) timed out and is no longer
  11597. - * waiting on the lock.
  11598. - */
  11599. - if (!new_owner)
  11600. - new_owner = this->task;
  11601. -
  11602. - /*
  11603. - * We pass it to the next owner. The WAITERS bit is always
  11604. - * kept enabled while there is PI state around. We cleanup the
  11605. - * owner died bit, because we are the owner.
  11606. + * We pass it to the next owner. The WAITERS bit is always kept
  11607. + * enabled while there is PI state around. We cleanup the owner
  11608. + * died bit, because we are the owner.
  11609. */
  11610. newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
  11611. @@ -1336,6 +1428,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
  11612. if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
  11613. ret = -EFAULT;
  11614. +
  11615. } else if (curval != uval) {
  11616. /*
  11617. * If a unconditional UNLOCK_PI operation (user space did not
  11618. @@ -1348,10 +1441,14 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
  11619. else
  11620. ret = -EINVAL;
  11621. }
  11622. - if (ret) {
  11623. - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
  11624. - return ret;
  11625. - }
  11626. +
  11627. + if (ret)
  11628. + goto out_unlock;
  11629. +
  11630. + /*
  11631. + * This is a point of no return; once we modify the uval there is no
  11632. + * going back and subsequent operations must not fail.
  11633. + */
  11634. raw_spin_lock(&pi_state->owner->pi_lock);
  11635. WARN_ON(list_empty(&pi_state->list));
  11636. @@ -1364,22 +1461,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
  11637. pi_state->owner = new_owner;
  11638. raw_spin_unlock(&new_owner->pi_lock);
  11639. + postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
  11640. + &wake_sleeper_q);
  11641. +out_unlock:
  11642. raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
  11643. - deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
  11644. + if (postunlock)
  11645. + rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
  11646. - /*
  11647. - * First unlock HB so the waiter does not spin on it once he got woken
  11648. - * up. Second wake up the waiter before the priority is adjusted. If we
  11649. - * deboost first (and lose our higher priority), then the task might get
  11650. - * scheduled away before the wake up can take place.
  11651. - */
  11652. - spin_unlock(&hb->lock);
  11653. - wake_up_q(&wake_q);
  11654. - if (deboost)
  11655. - rt_mutex_adjust_prio(current);
  11656. -
  11657. - return 0;
  11658. + return ret;
  11659. }
  11660. /*
  11661. @@ -1825,7 +1915,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
  11662. * If that call succeeds then we have pi_state and an
  11663. * initial refcount on it.
  11664. */
  11665. - ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
  11666. + ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state);
  11667. }
  11668. switch (ret) {
  11669. @@ -1908,7 +1998,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
  11670. * refcount on the pi_state and store the pointer in
  11671. * the futex_q object of the waiter.
  11672. */
  11673. - atomic_inc(&pi_state->refcount);
  11674. + get_pi_state(pi_state);
  11675. this->pi_state = pi_state;
  11676. ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
  11677. this->rt_waiter,
  11678. @@ -1925,6 +2015,16 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
  11679. requeue_pi_wake_futex(this, &key2, hb2);
  11680. drop_count++;
  11681. continue;
  11682. + } else if (ret == -EAGAIN) {
  11683. + /*
  11684. + * Waiter was woken by timeout or
  11685. + * signal and has set pi_blocked_on to
  11686. + * PI_WAKEUP_INPROGRESS before we
  11687. + * tried to enqueue it on the rtmutex.
  11688. + */
  11689. + this->pi_state = NULL;
  11690. + put_pi_state(pi_state);
  11691. + continue;
  11692. } else if (ret) {
  11693. /*
  11694. * rt_mutex_start_proxy_lock() detected a
  11695. @@ -2008,20 +2108,7 @@ queue_unlock(struct futex_hash_bucket *hb)
  11696. hb_waiters_dec(hb);
  11697. }
  11698. -/**
  11699. - * queue_me() - Enqueue the futex_q on the futex_hash_bucket
  11700. - * @q: The futex_q to enqueue
  11701. - * @hb: The destination hash bucket
  11702. - *
  11703. - * The hb->lock must be held by the caller, and is released here. A call to
  11704. - * queue_me() is typically paired with exactly one call to unqueue_me(). The
  11705. - * exceptions involve the PI related operations, which may use unqueue_me_pi()
  11706. - * or nothing if the unqueue is done as part of the wake process and the unqueue
  11707. - * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
  11708. - * an example).
  11709. - */
  11710. -static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
  11711. - __releases(&hb->lock)
  11712. +static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
  11713. {
  11714. int prio;
  11715. @@ -2038,6 +2125,24 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
  11716. plist_node_init(&q->list, prio);
  11717. plist_add(&q->list, &hb->chain);
  11718. q->task = current;
  11719. +}
  11720. +
  11721. +/**
  11722. + * queue_me() - Enqueue the futex_q on the futex_hash_bucket
  11723. + * @q: The futex_q to enqueue
  11724. + * @hb: The destination hash bucket
  11725. + *
  11726. + * The hb->lock must be held by the caller, and is released here. A call to
  11727. + * queue_me() is typically paired with exactly one call to unqueue_me(). The
  11728. + * exceptions involve the PI related operations, which may use unqueue_me_pi()
  11729. + * or nothing if the unqueue is done as part of the wake process and the unqueue
  11730. + * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
  11731. + * an example).
  11732. + */
  11733. +static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
  11734. + __releases(&hb->lock)
  11735. +{
  11736. + __queue_me(q, hb);
  11737. spin_unlock(&hb->lock);
  11738. }
  11739. @@ -2124,10 +2229,13 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
  11740. {
  11741. u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
  11742. struct futex_pi_state *pi_state = q->pi_state;
  11743. - struct task_struct *oldowner = pi_state->owner;
  11744. u32 uval, uninitialized_var(curval), newval;
  11745. + struct task_struct *oldowner;
  11746. int ret;
  11747. + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
  11748. +
  11749. + oldowner = pi_state->owner;
  11750. /* Owner died? */
  11751. if (!pi_state->owner)
  11752. newtid |= FUTEX_OWNER_DIED;
  11753. @@ -2135,7 +2243,8 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
  11754. /*
  11755. * We are here either because we stole the rtmutex from the
  11756. * previous highest priority waiter or we are the highest priority
  11757. - * waiter but failed to get the rtmutex the first time.
  11758. + * waiter but have failed to get the rtmutex the first time.
  11759. + *
  11760. * We have to replace the newowner TID in the user space variable.
  11761. * This must be atomic as we have to preserve the owner died bit here.
  11762. *
  11763. @@ -2143,17 +2252,16 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
  11764. * because we can fault here. Imagine swapped out pages or a fork
  11765. * that marked all the anonymous memory readonly for cow.
  11766. *
  11767. - * Modifying pi_state _before_ the user space value would
  11768. - * leave the pi_state in an inconsistent state when we fault
  11769. - * here, because we need to drop the hash bucket lock to
  11770. - * handle the fault. This might be observed in the PID check
  11771. - * in lookup_pi_state.
  11772. + * Modifying pi_state _before_ the user space value would leave the
  11773. + * pi_state in an inconsistent state when we fault here, because we
  11774. + * need to drop the locks to handle the fault. This might be observed
  11775. + * in the PID check in lookup_pi_state.
  11776. */
  11777. retry:
  11778. if (get_futex_value_locked(&uval, uaddr))
  11779. goto handle_fault;
  11780. - while (1) {
  11781. + for (;;) {
  11782. newval = (uval & FUTEX_OWNER_DIED) | newtid;
  11783. if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
  11784. @@ -2168,47 +2276,60 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
  11785. * itself.
  11786. */
  11787. if (pi_state->owner != NULL) {
  11788. - raw_spin_lock_irq(&pi_state->owner->pi_lock);
  11789. + raw_spin_lock(&pi_state->owner->pi_lock);
  11790. WARN_ON(list_empty(&pi_state->list));
  11791. list_del_init(&pi_state->list);
  11792. - raw_spin_unlock_irq(&pi_state->owner->pi_lock);
  11793. + raw_spin_unlock(&pi_state->owner->pi_lock);
  11794. }
  11795. pi_state->owner = newowner;
  11796. - raw_spin_lock_irq(&newowner->pi_lock);
  11797. + raw_spin_lock(&newowner->pi_lock);
  11798. WARN_ON(!list_empty(&pi_state->list));
  11799. list_add(&pi_state->list, &newowner->pi_state_list);
  11800. - raw_spin_unlock_irq(&newowner->pi_lock);
  11801. + raw_spin_unlock(&newowner->pi_lock);
  11802. + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
  11803. +
  11804. return 0;
  11805. /*
  11806. - * To handle the page fault we need to drop the hash bucket
  11807. - * lock here. That gives the other task (either the highest priority
  11808. - * waiter itself or the task which stole the rtmutex) the
  11809. - * chance to try the fixup of the pi_state. So once we are
  11810. - * back from handling the fault we need to check the pi_state
  11811. - * after reacquiring the hash bucket lock and before trying to
  11812. - * do another fixup. When the fixup has been done already we
  11813. - * simply return.
  11814. + * To handle the page fault we need to drop the locks here. That gives
  11815. + * the other task (either the highest priority waiter itself or the
  11816. + * task which stole the rtmutex) the chance to try the fixup of the
  11817. + * pi_state. So once we are back from handling the fault we need to
  11818. + * check the pi_state after reacquiring the locks and before trying to
  11819. + * do another fixup. When the fixup has been done already we simply
  11820. + * return.
  11821. + *
  11822. + * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
  11823. + * drop hb->lock since the caller owns the hb -> futex_q relation.
  11824. + * Dropping the pi_mutex->wait_lock requires the state revalidate.
  11825. */
  11826. handle_fault:
  11827. + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
  11828. spin_unlock(q->lock_ptr);
  11829. ret = fault_in_user_writeable(uaddr);
  11830. spin_lock(q->lock_ptr);
  11831. + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
  11832. /*
  11833. * Check if someone else fixed it for us:
  11834. */
  11835. - if (pi_state->owner != oldowner)
  11836. - return 0;
  11837. + if (pi_state->owner != oldowner) {
  11838. + ret = 0;
  11839. + goto out_unlock;
  11840. + }
  11841. if (ret)
  11842. - return ret;
  11843. + goto out_unlock;
  11844. goto retry;
  11845. +
  11846. +out_unlock:
  11847. + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
  11848. + return ret;
  11849. }
  11850. static long futex_wait_restart(struct restart_block *restart);
  11851. @@ -2230,57 +2351,32 @@ static long futex_wait_restart(struct restart_block *restart);
  11852. */
  11853. static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
  11854. {
  11855. - struct task_struct *owner;
  11856. int ret = 0;
  11857. if (locked) {
  11858. /*
  11859. * Got the lock. We might not be the anticipated owner if we
  11860. * did a lock-steal - fix up the PI-state in that case:
  11861. + *
  11862. + * We can safely read pi_state->owner without holding wait_lock
  11863. + * because we now own the rt_mutex, only the owner will attempt
  11864. + * to change it.
  11865. */
  11866. if (q->pi_state->owner != current)
  11867. ret = fixup_pi_state_owner(uaddr, q, current);
  11868. goto out;
  11869. }
  11870. - /*
  11871. - * Catch the rare case, where the lock was released when we were on the
  11872. - * way back before we locked the hash bucket.
  11873. - */
  11874. - if (q->pi_state->owner == current) {
  11875. - /*
  11876. - * Try to get the rt_mutex now. This might fail as some other
  11877. - * task acquired the rt_mutex after we removed ourself from the
  11878. - * rt_mutex waiters list.
  11879. - */
  11880. - if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
  11881. - locked = 1;
  11882. - goto out;
  11883. - }
  11884. -
  11885. - /*
  11886. - * pi_state is incorrect, some other task did a lock steal and
  11887. - * we returned due to timeout or signal without taking the
  11888. - * rt_mutex. Too late.
  11889. - */
  11890. - raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
  11891. - owner = rt_mutex_owner(&q->pi_state->pi_mutex);
  11892. - if (!owner)
  11893. - owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
  11894. - raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
  11895. - ret = fixup_pi_state_owner(uaddr, q, owner);
  11896. - goto out;
  11897. - }
  11898. -
  11899. /*
  11900. * Paranoia check. If we did not take the lock, then we should not be
  11901. * the owner of the rt_mutex.
  11902. */
  11903. - if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
  11904. + if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
  11905. printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
  11906. "pi-state %p\n", ret,
  11907. q->pi_state->pi_mutex.owner,
  11908. q->pi_state->owner);
  11909. + }
  11910. out:
  11911. return ret ? ret : locked;
  11912. @@ -2504,6 +2600,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
  11913. ktime_t *time, int trylock)
  11914. {
  11915. struct hrtimer_sleeper timeout, *to = NULL;
  11916. + struct futex_pi_state *pi_state = NULL;
  11917. + struct rt_mutex_waiter rt_waiter;
  11918. struct futex_hash_bucket *hb;
  11919. struct futex_q q = futex_q_init;
  11920. int res, ret;
  11921. @@ -2556,24 +2654,76 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
  11922. }
  11923. }
  11924. + WARN_ON(!q.pi_state);
  11925. +
  11926. /*
  11927. * Only actually queue now that the atomic ops are done:
  11928. */
  11929. - queue_me(&q, hb);
  11930. + __queue_me(&q, hb);
  11931. - WARN_ON(!q.pi_state);
  11932. - /*
  11933. - * Block on the PI mutex:
  11934. - */
  11935. - if (!trylock) {
  11936. - ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to);
  11937. - } else {
  11938. - ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
  11939. + if (trylock) {
  11940. + ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
  11941. /* Fixup the trylock return value: */
  11942. ret = ret ? 0 : -EWOULDBLOCK;
  11943. + goto no_block;
  11944. + }
  11945. +
  11946. + rt_mutex_init_waiter(&rt_waiter, false);
  11947. +
  11948. + /*
  11949. + * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
  11950. + * hold it while doing rt_mutex_start_proxy(), because then it will
  11951. + * include hb->lock in the blocking chain, even through we'll not in
  11952. + * fact hold it while blocking. This will lead it to report -EDEADLK
  11953. + * and BUG when futex_unlock_pi() interleaves with this.
  11954. + *
  11955. + * Therefore acquire wait_lock while holding hb->lock, but drop the
  11956. + * latter before calling rt_mutex_start_proxy_lock(). This still fully
  11957. + * serializes against futex_unlock_pi() as that does the exact same
  11958. + * lock handoff sequence.
  11959. + */
  11960. + raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
  11961. + /*
  11962. + * the migrate_disable() here disables migration in the in_atomic() fast
  11963. + * path which is enabled again in the following spin_unlock(). We have
  11964. + * one migrate_disable() pending in the slow-path which is reversed
  11965. + * after the raw_spin_unlock_irq() where we leave the atomic context.
  11966. + */
  11967. + migrate_disable();
  11968. +
  11969. + spin_unlock(q.lock_ptr);
  11970. + ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
  11971. + raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
  11972. + migrate_enable();
  11973. +
  11974. + if (ret) {
  11975. + if (ret == 1)
  11976. + ret = 0;
  11977. +
  11978. + spin_lock(q.lock_ptr);
  11979. + goto no_block;
  11980. }
  11981. +
  11982. + if (unlikely(to))
  11983. + hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
  11984. +
  11985. + ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
  11986. +
  11987. spin_lock(q.lock_ptr);
  11988. + /*
  11989. + * If we failed to acquire the lock (signal/timeout), we must
  11990. + * first acquire the hb->lock before removing the lock from the
  11991. + * rt_mutex waitqueue, such that we can keep the hb and rt_mutex
  11992. + * wait lists consistent.
  11993. + *
  11994. + * In particular; it is important that futex_unlock_pi() can not
  11995. + * observe this inconsistency.
  11996. + */
  11997. + if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
  11998. + ret = 0;
  11999. +
  12000. +no_block:
  12001. /*
  12002. * Fixup the pi_state owner and possibly acquire the lock if we
  12003. * haven't already.
  12004. @@ -2590,12 +2740,19 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
  12005. * If fixup_owner() faulted and was unable to handle the fault, unlock
  12006. * it and return the fault to userspace.
  12007. */
  12008. - if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
  12009. - rt_mutex_unlock(&q.pi_state->pi_mutex);
  12010. + if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
  12011. + pi_state = q.pi_state;
  12012. + get_pi_state(pi_state);
  12013. + }
  12014. /* Unqueue and drop the lock */
  12015. unqueue_me_pi(&q);
  12016. + if (pi_state) {
  12017. + rt_mutex_futex_unlock(&pi_state->pi_mutex);
  12018. + put_pi_state(pi_state);
  12019. + }
  12020. +
  12021. goto out_put_key;
  12022. out_unlock_put_key:
  12023. @@ -2604,8 +2761,10 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
  12024. out_put_key:
  12025. put_futex_key(&q.key);
  12026. out:
  12027. - if (to)
  12028. + if (to) {
  12029. + hrtimer_cancel(&to->timer);
  12030. destroy_hrtimer_on_stack(&to->timer);
  12031. + }
  12032. return ret != -EINTR ? ret : -ERESTARTNOINTR;
  12033. uaddr_faulted:
  12034. @@ -2632,7 +2791,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
  12035. u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current);
  12036. union futex_key key = FUTEX_KEY_INIT;
  12037. struct futex_hash_bucket *hb;
  12038. - struct futex_q *match;
  12039. + struct futex_q *top_waiter;
  12040. int ret;
  12041. retry:
  12042. @@ -2656,12 +2815,48 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
  12043. * all and we at least want to know if user space fiddled
  12044. * with the futex value instead of blindly unlocking.
  12045. */
  12046. - match = futex_top_waiter(hb, &key);
  12047. - if (match) {
  12048. - ret = wake_futex_pi(uaddr, uval, match, hb);
  12049. + top_waiter = futex_top_waiter(hb, &key);
  12050. + if (top_waiter) {
  12051. + struct futex_pi_state *pi_state = top_waiter->pi_state;
  12052. +
  12053. + ret = -EINVAL;
  12054. + if (!pi_state)
  12055. + goto out_unlock;
  12056. +
  12057. + /*
  12058. + * If current does not own the pi_state then the futex is
  12059. + * inconsistent and user space fiddled with the futex value.
  12060. + */
  12061. + if (pi_state->owner != current)
  12062. + goto out_unlock;
  12063. +
  12064. + get_pi_state(pi_state);
  12065. + /*
  12066. + * By taking wait_lock while still holding hb->lock, we ensure
  12067. + * there is no point where we hold neither; and therefore
  12068. + * wake_futex_pi() must observe a state consistent with what we
  12069. + * observed.
  12070. + */
  12071. + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
  12072. + /*
  12073. + * Magic trickery for now to make the RT migrate disable
  12074. + * logic happy. The following spin_unlock() happens with
  12075. + * interrupts disabled so the internal migrate_enable()
  12076. + * won't undo the migrate_disable() which was issued when
  12077. + * locking hb->lock.
  12078. + */
  12079. + migrate_disable();
  12080. + spin_unlock(&hb->lock);
  12081. +
  12082. + /* Drops pi_state->pi_mutex.wait_lock */
  12083. + ret = wake_futex_pi(uaddr, uval, pi_state);
  12084. +
  12085. + migrate_enable();
  12086. +
  12087. + put_pi_state(pi_state);
  12088. +
  12089. /*
  12090. - * In case of success wake_futex_pi dropped the hash
  12091. - * bucket lock.
  12092. + * Success, we're done! No tricky corner cases.
  12093. */
  12094. if (!ret)
  12095. goto out_putkey;
  12096. @@ -2676,7 +2871,6 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
  12097. * setting the FUTEX_WAITERS bit. Try again.
  12098. */
  12099. if (ret == -EAGAIN) {
  12100. - spin_unlock(&hb->lock);
  12101. put_futex_key(&key);
  12102. goto retry;
  12103. }
  12104. @@ -2684,7 +2878,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
  12105. * wake_futex_pi has detected invalid state. Tell user
  12106. * space.
  12107. */
  12108. - goto out_unlock;
  12109. + goto out_putkey;
  12110. }
  12111. /*
  12112. @@ -2694,8 +2888,10 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
  12113. * preserve the WAITERS bit not the OWNER_DIED one. We are the
  12114. * owner.
  12115. */
  12116. - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))
  12117. + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) {
  12118. + spin_unlock(&hb->lock);
  12119. goto pi_faulted;
  12120. + }
  12121. /*
  12122. * If uval has changed, let user space handle it.
  12123. @@ -2709,7 +2905,6 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
  12124. return ret;
  12125. pi_faulted:
  12126. - spin_unlock(&hb->lock);
  12127. put_futex_key(&key);
  12128. ret = fault_in_user_writeable(uaddr);
  12129. @@ -2813,8 +3008,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  12130. u32 __user *uaddr2)
  12131. {
  12132. struct hrtimer_sleeper timeout, *to = NULL;
  12133. + struct futex_pi_state *pi_state = NULL;
  12134. struct rt_mutex_waiter rt_waiter;
  12135. - struct futex_hash_bucket *hb;
  12136. + struct futex_hash_bucket *hb, *hb2;
  12137. union futex_key key2 = FUTEX_KEY_INIT;
  12138. struct futex_q q = futex_q_init;
  12139. int res, ret;
  12140. @@ -2839,10 +3035,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  12141. * The waiter is allocated on our stack, manipulated by the requeue
  12142. * code while we sleep on uaddr.
  12143. */
  12144. - debug_rt_mutex_init_waiter(&rt_waiter);
  12145. - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
  12146. - RB_CLEAR_NODE(&rt_waiter.tree_entry);
  12147. - rt_waiter.task = NULL;
  12148. + rt_mutex_init_waiter(&rt_waiter, false);
  12149. ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
  12150. if (unlikely(ret != 0))
  12151. @@ -2873,20 +3066,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  12152. /* Queue the futex_q, drop the hb lock, wait for wakeup. */
  12153. futex_wait_queue_me(hb, &q, to);
  12154. - spin_lock(&hb->lock);
  12155. - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  12156. - spin_unlock(&hb->lock);
  12157. - if (ret)
  12158. - goto out_put_keys;
  12159. + /*
  12160. + * On RT we must avoid races with requeue and trying to block
  12161. + * on two mutexes (hb->lock and uaddr2's rtmutex) by
  12162. + * serializing access to pi_blocked_on with pi_lock.
  12163. + */
  12164. + raw_spin_lock_irq(&current->pi_lock);
  12165. + if (current->pi_blocked_on) {
  12166. + /*
  12167. + * We have been requeued or are in the process of
  12168. + * being requeued.
  12169. + */
  12170. + raw_spin_unlock_irq(&current->pi_lock);
  12171. + } else {
  12172. + /*
  12173. + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS
  12174. + * prevents a concurrent requeue from moving us to the
  12175. + * uaddr2 rtmutex. After that we can safely acquire
  12176. + * (and possibly block on) hb->lock.
  12177. + */
  12178. + current->pi_blocked_on = PI_WAKEUP_INPROGRESS;
  12179. + raw_spin_unlock_irq(&current->pi_lock);
  12180. +
  12181. + spin_lock(&hb->lock);
  12182. +
  12183. + /*
  12184. + * Clean up pi_blocked_on. We might leak it otherwise
  12185. + * when we succeeded with the hb->lock in the fast
  12186. + * path.
  12187. + */
  12188. + raw_spin_lock_irq(&current->pi_lock);
  12189. + current->pi_blocked_on = NULL;
  12190. + raw_spin_unlock_irq(&current->pi_lock);
  12191. +
  12192. + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  12193. + spin_unlock(&hb->lock);
  12194. + if (ret)
  12195. + goto out_put_keys;
  12196. + }
  12197. /*
  12198. - * In order for us to be here, we know our q.key == key2, and since
  12199. - * we took the hb->lock above, we also know that futex_requeue() has
  12200. - * completed and we no longer have to concern ourselves with a wakeup
  12201. - * race with the atomic proxy lock acquisition by the requeue code. The
  12202. - * futex_requeue dropped our key1 reference and incremented our key2
  12203. - * reference count.
  12204. + * In order to be here, we have either been requeued, are in
  12205. + * the process of being requeued, or requeue successfully
  12206. + * acquired uaddr2 on our behalf. If pi_blocked_on was
  12207. + * non-null above, we may be racing with a requeue. Do not
  12208. + * rely on q->lock_ptr to be hb2->lock until after blocking on
  12209. + * hb->lock or hb2->lock. The futex_requeue dropped our key1
  12210. + * reference and incremented our key2 reference count.
  12211. */
  12212. + hb2 = hash_futex(&key2);
  12213. /* Check if the requeue code acquired the second futex for us. */
  12214. if (!q.rt_waiter) {
  12215. @@ -2895,16 +3123,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  12216. * did a lock-steal - fix up the PI-state in that case.
  12217. */
  12218. if (q.pi_state && (q.pi_state->owner != current)) {
  12219. - spin_lock(q.lock_ptr);
  12220. + spin_lock(&hb2->lock);
  12221. + BUG_ON(&hb2->lock != q.lock_ptr);
  12222. ret = fixup_pi_state_owner(uaddr2, &q, current);
  12223. - if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current)
  12224. - rt_mutex_unlock(&q.pi_state->pi_mutex);
  12225. + if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
  12226. + pi_state = q.pi_state;
  12227. + get_pi_state(pi_state);
  12228. + }
  12229. /*
  12230. * Drop the reference to the pi state which
  12231. * the requeue_pi() code acquired for us.
  12232. */
  12233. put_pi_state(q.pi_state);
  12234. - spin_unlock(q.lock_ptr);
  12235. + spin_unlock(&hb2->lock);
  12236. }
  12237. } else {
  12238. struct rt_mutex *pi_mutex;
  12239. @@ -2916,10 +3147,14 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  12240. */
  12241. WARN_ON(!q.pi_state);
  12242. pi_mutex = &q.pi_state->pi_mutex;
  12243. - ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
  12244. - debug_rt_mutex_free_waiter(&rt_waiter);
  12245. + ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
  12246. - spin_lock(q.lock_ptr);
  12247. + spin_lock(&hb2->lock);
  12248. + BUG_ON(&hb2->lock != q.lock_ptr);
  12249. + if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
  12250. + ret = 0;
  12251. +
  12252. + debug_rt_mutex_free_waiter(&rt_waiter);
  12253. /*
  12254. * Fixup the pi_state owner and possibly acquire the lock if we
  12255. * haven't already.
  12256. @@ -2937,13 +3172,20 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  12257. * the fault, unlock the rt_mutex and return the fault to
  12258. * userspace.
  12259. */
  12260. - if (ret && rt_mutex_owner(pi_mutex) == current)
  12261. - rt_mutex_unlock(pi_mutex);
  12262. + if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
  12263. + pi_state = q.pi_state;
  12264. + get_pi_state(pi_state);
  12265. + }
  12266. /* Unqueue and drop the lock. */
  12267. unqueue_me_pi(&q);
  12268. }
  12269. + if (pi_state) {
  12270. + rt_mutex_futex_unlock(&pi_state->pi_mutex);
  12271. + put_pi_state(pi_state);
  12272. + }
  12273. +
  12274. if (ret == -EINTR) {
  12275. /*
  12276. * We've already been requeued, but cannot restart by calling
  12277. diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
  12278. index d3f24905852c..f87aa8fdcc51 100644
  12279. --- a/kernel/irq/handle.c
  12280. +++ b/kernel/irq/handle.c
  12281. @@ -181,10 +181,16 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
  12282. {
  12283. irqreturn_t retval;
  12284. unsigned int flags = 0;
  12285. + struct pt_regs *regs = get_irq_regs();
  12286. + u64 ip = regs ? instruction_pointer(regs) : 0;
  12287. retval = __handle_irq_event_percpu(desc, &flags);
  12288. - add_interrupt_randomness(desc->irq_data.irq, flags);
  12289. +#ifdef CONFIG_PREEMPT_RT_FULL
  12290. + desc->random_ip = ip;
  12291. +#else
  12292. + add_interrupt_randomness(desc->irq_data.irq, flags, ip);
  12293. +#endif
  12294. if (!noirqdebug)
  12295. note_interrupt(desc, retval);
  12296. diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
  12297. index ea41820ab12e..5994867526f3 100644
  12298. --- a/kernel/irq/manage.c
  12299. +++ b/kernel/irq/manage.c
  12300. @@ -22,6 +22,7 @@
  12301. #include "internals.h"
  12302. #ifdef CONFIG_IRQ_FORCED_THREADING
  12303. +# ifndef CONFIG_PREEMPT_RT_BASE
  12304. __read_mostly bool force_irqthreads;
  12305. static int __init setup_forced_irqthreads(char *arg)
  12306. @@ -30,6 +31,7 @@ static int __init setup_forced_irqthreads(char *arg)
  12307. return 0;
  12308. }
  12309. early_param("threadirqs", setup_forced_irqthreads);
  12310. +# endif
  12311. #endif
  12312. static void __synchronize_hardirq(struct irq_desc *desc)
  12313. @@ -233,7 +235,12 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
  12314. if (desc->affinity_notify) {
  12315. kref_get(&desc->affinity_notify->kref);
  12316. +
  12317. +#ifdef CONFIG_PREEMPT_RT_BASE
  12318. + swork_queue(&desc->affinity_notify->swork);
  12319. +#else
  12320. schedule_work(&desc->affinity_notify->work);
  12321. +#endif
  12322. }
  12323. irqd_set(data, IRQD_AFFINITY_SET);
  12324. @@ -271,10 +278,8 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
  12325. }
  12326. EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
  12327. -static void irq_affinity_notify(struct work_struct *work)
  12328. +static void _irq_affinity_notify(struct irq_affinity_notify *notify)
  12329. {
  12330. - struct irq_affinity_notify *notify =
  12331. - container_of(work, struct irq_affinity_notify, work);
  12332. struct irq_desc *desc = irq_to_desc(notify->irq);
  12333. cpumask_var_t cpumask;
  12334. unsigned long flags;
  12335. @@ -296,6 +301,35 @@ static void irq_affinity_notify(struct work_struct *work)
  12336. kref_put(&notify->kref, notify->release);
  12337. }
  12338. +#ifdef CONFIG_PREEMPT_RT_BASE
  12339. +static void init_helper_thread(void)
  12340. +{
  12341. + static int init_sworker_once;
  12342. +
  12343. + if (init_sworker_once)
  12344. + return;
  12345. + if (WARN_ON(swork_get()))
  12346. + return;
  12347. + init_sworker_once = 1;
  12348. +}
  12349. +
  12350. +static void irq_affinity_notify(struct swork_event *swork)
  12351. +{
  12352. + struct irq_affinity_notify *notify =
  12353. + container_of(swork, struct irq_affinity_notify, swork);
  12354. + _irq_affinity_notify(notify);
  12355. +}
  12356. +
  12357. +#else
  12358. +
  12359. +static void irq_affinity_notify(struct work_struct *work)
  12360. +{
  12361. + struct irq_affinity_notify *notify =
  12362. + container_of(work, struct irq_affinity_notify, work);
  12363. + _irq_affinity_notify(notify);
  12364. +}
  12365. +#endif
  12366. +
  12367. /**
  12368. * irq_set_affinity_notifier - control notification of IRQ affinity changes
  12369. * @irq: Interrupt for which to enable/disable notification
  12370. @@ -324,7 +358,12 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
  12371. if (notify) {
  12372. notify->irq = irq;
  12373. kref_init(&notify->kref);
  12374. +#ifdef CONFIG_PREEMPT_RT_BASE
  12375. + INIT_SWORK(&notify->swork, irq_affinity_notify);
  12376. + init_helper_thread();
  12377. +#else
  12378. INIT_WORK(&notify->work, irq_affinity_notify);
  12379. +#endif
  12380. }
  12381. raw_spin_lock_irqsave(&desc->lock, flags);
  12382. @@ -879,7 +918,15 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
  12383. local_bh_disable();
  12384. ret = action->thread_fn(action->irq, action->dev_id);
  12385. irq_finalize_oneshot(desc, action);
  12386. - local_bh_enable();
  12387. + /*
  12388. + * Interrupts which have real time requirements can be set up
  12389. + * to avoid softirq processing in the thread handler. This is
  12390. + * safe as these interrupts do not raise soft interrupts.
  12391. + */
  12392. + if (irq_settings_no_softirq_call(desc))
  12393. + _local_bh_enable();
  12394. + else
  12395. + local_bh_enable();
  12396. return ret;
  12397. }
  12398. @@ -976,6 +1023,12 @@ static int irq_thread(void *data)
  12399. if (action_ret == IRQ_WAKE_THREAD)
  12400. irq_wake_secondary(desc, action);
  12401. +#ifdef CONFIG_PREEMPT_RT_FULL
  12402. + migrate_disable();
  12403. + add_interrupt_randomness(action->irq, 0,
  12404. + desc->random_ip ^ (unsigned long) action);
  12405. + migrate_enable();
  12406. +#endif
  12407. wake_threads_waitq(desc);
  12408. }
  12409. @@ -1338,6 +1391,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
  12410. irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
  12411. }
  12412. + if (new->flags & IRQF_NO_SOFTIRQ_CALL)
  12413. + irq_settings_set_no_softirq_call(desc);
  12414. +
  12415. /* Set default affinity mask once everything is setup */
  12416. setup_affinity(desc, mask);
  12417. @@ -2063,7 +2119,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state);
  12418. * This call sets the internal irqchip state of an interrupt,
  12419. * depending on the value of @which.
  12420. *
  12421. - * This function should be called with preemption disabled if the
  12422. + * This function should be called with migration disabled if the
  12423. * interrupt controller has per-cpu registers.
  12424. */
  12425. int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
  12426. diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
  12427. index 320579d89091..2df2d4445b1e 100644
  12428. --- a/kernel/irq/settings.h
  12429. +++ b/kernel/irq/settings.h
  12430. @@ -16,6 +16,7 @@ enum {
  12431. _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID,
  12432. _IRQ_IS_POLLED = IRQ_IS_POLLED,
  12433. _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY,
  12434. + _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL,
  12435. _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
  12436. };
  12437. @@ -30,6 +31,7 @@ enum {
  12438. #define IRQ_PER_CPU_DEVID GOT_YOU_MORON
  12439. #define IRQ_IS_POLLED GOT_YOU_MORON
  12440. #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON
  12441. +#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON
  12442. #undef IRQF_MODIFY_MASK
  12443. #define IRQF_MODIFY_MASK GOT_YOU_MORON
  12444. @@ -40,6 +42,16 @@ irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set)
  12445. desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
  12446. }
  12447. +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc)
  12448. +{
  12449. + return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL;
  12450. +}
  12451. +
  12452. +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc)
  12453. +{
  12454. + desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL;
  12455. +}
  12456. +
  12457. static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
  12458. {
  12459. return desc->status_use_accessors & _IRQ_PER_CPU;
  12460. diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
  12461. index 5707f97a3e6a..73f38dc7a7fb 100644
  12462. --- a/kernel/irq/spurious.c
  12463. +++ b/kernel/irq/spurious.c
  12464. @@ -442,6 +442,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
  12465. static int __init irqfixup_setup(char *str)
  12466. {
  12467. +#ifdef CONFIG_PREEMPT_RT_BASE
  12468. + pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
  12469. + return 1;
  12470. +#endif
  12471. irqfixup = 1;
  12472. printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
  12473. printk(KERN_WARNING "This may impact system performance.\n");
  12474. @@ -454,6 +458,10 @@ module_param(irqfixup, int, 0644);
  12475. static int __init irqpoll_setup(char *str)
  12476. {
  12477. +#ifdef CONFIG_PREEMPT_RT_BASE
  12478. + pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
  12479. + return 1;
  12480. +#endif
  12481. irqfixup = 2;
  12482. printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
  12483. "enabled\n");
  12484. diff --git a/kernel/irq_work.c b/kernel/irq_work.c
  12485. index bcf107ce0854..2899ba0d23d1 100644
  12486. --- a/kernel/irq_work.c
  12487. +++ b/kernel/irq_work.c
  12488. @@ -17,6 +17,7 @@
  12489. #include <linux/cpu.h>
  12490. #include <linux/notifier.h>
  12491. #include <linux/smp.h>
  12492. +#include <linux/interrupt.h>
  12493. #include <asm/processor.h>
  12494. @@ -65,6 +66,8 @@ void __weak arch_irq_work_raise(void)
  12495. */
  12496. bool irq_work_queue_on(struct irq_work *work, int cpu)
  12497. {
  12498. + struct llist_head *list;
  12499. +
  12500. /* All work should have been flushed before going offline */
  12501. WARN_ON_ONCE(cpu_is_offline(cpu));
  12502. @@ -75,7 +78,12 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
  12503. if (!irq_work_claim(work))
  12504. return false;
  12505. - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
  12506. + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ))
  12507. + list = &per_cpu(lazy_list, cpu);
  12508. + else
  12509. + list = &per_cpu(raised_list, cpu);
  12510. +
  12511. + if (llist_add(&work->llnode, list))
  12512. arch_send_call_function_single_ipi(cpu);
  12513. return true;
  12514. @@ -86,6 +94,9 @@ EXPORT_SYMBOL_GPL(irq_work_queue_on);
  12515. /* Enqueue the irq work @work on the current CPU */
  12516. bool irq_work_queue(struct irq_work *work)
  12517. {
  12518. + struct llist_head *list;
  12519. + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL);
  12520. +
  12521. /* Only queue if not already pending */
  12522. if (!irq_work_claim(work))
  12523. return false;
  12524. @@ -93,13 +104,15 @@ bool irq_work_queue(struct irq_work *work)
  12525. /* Queue the entry and raise the IPI if needed. */
  12526. preempt_disable();
  12527. - /* If the work is "lazy", handle it from next tick if any */
  12528. - if (work->flags & IRQ_WORK_LAZY) {
  12529. - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
  12530. - tick_nohz_tick_stopped())
  12531. - arch_irq_work_raise();
  12532. - } else {
  12533. - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
  12534. + lazy_work = work->flags & IRQ_WORK_LAZY;
  12535. +
  12536. + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ)))
  12537. + list = this_cpu_ptr(&lazy_list);
  12538. + else
  12539. + list = this_cpu_ptr(&raised_list);
  12540. +
  12541. + if (llist_add(&work->llnode, list)) {
  12542. + if (!lazy_work || tick_nohz_tick_stopped())
  12543. arch_irq_work_raise();
  12544. }
  12545. @@ -116,9 +129,8 @@ bool irq_work_needs_cpu(void)
  12546. raised = this_cpu_ptr(&raised_list);
  12547. lazy = this_cpu_ptr(&lazy_list);
  12548. - if (llist_empty(raised) || arch_irq_work_has_interrupt())
  12549. - if (llist_empty(lazy))
  12550. - return false;
  12551. + if (llist_empty(raised) && llist_empty(lazy))
  12552. + return false;
  12553. /* All work should have been flushed before going offline */
  12554. WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
  12555. @@ -132,7 +144,7 @@ static void irq_work_run_list(struct llist_head *list)
  12556. struct irq_work *work;
  12557. struct llist_node *llnode;
  12558. - BUG_ON(!irqs_disabled());
  12559. + BUG_ON_NONRT(!irqs_disabled());
  12560. if (llist_empty(list))
  12561. return;
  12562. @@ -169,7 +181,16 @@ static void irq_work_run_list(struct llist_head *list)
  12563. void irq_work_run(void)
  12564. {
  12565. irq_work_run_list(this_cpu_ptr(&raised_list));
  12566. - irq_work_run_list(this_cpu_ptr(&lazy_list));
  12567. + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) {
  12568. + /*
  12569. + * NOTE: we raise softirq via IPI for safety,
  12570. + * and execute in irq_work_tick() to move the
  12571. + * overhead from hard to soft irq context.
  12572. + */
  12573. + if (!llist_empty(this_cpu_ptr(&lazy_list)))
  12574. + raise_softirq(TIMER_SOFTIRQ);
  12575. + } else
  12576. + irq_work_run_list(this_cpu_ptr(&lazy_list));
  12577. }
  12578. EXPORT_SYMBOL_GPL(irq_work_run);
  12579. @@ -179,8 +200,17 @@ void irq_work_tick(void)
  12580. if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
  12581. irq_work_run_list(raised);
  12582. +
  12583. + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
  12584. + irq_work_run_list(this_cpu_ptr(&lazy_list));
  12585. +}
  12586. +
  12587. +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
  12588. +void irq_work_tick_soft(void)
  12589. +{
  12590. irq_work_run_list(this_cpu_ptr(&lazy_list));
  12591. }
  12592. +#endif
  12593. /*
  12594. * Synchronize against the irq_work @entry, ensures the entry is not
  12595. diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
  12596. index ee1bc1bb8feb..ddef07958840 100644
  12597. --- a/kernel/ksysfs.c
  12598. +++ b/kernel/ksysfs.c
  12599. @@ -136,6 +136,15 @@ KERNEL_ATTR_RO(vmcoreinfo);
  12600. #endif /* CONFIG_KEXEC_CORE */
  12601. +#if defined(CONFIG_PREEMPT_RT_FULL)
  12602. +static ssize_t realtime_show(struct kobject *kobj,
  12603. + struct kobj_attribute *attr, char *buf)
  12604. +{
  12605. + return sprintf(buf, "%d\n", 1);
  12606. +}
  12607. +KERNEL_ATTR_RO(realtime);
  12608. +#endif
  12609. +
  12610. /* whether file capabilities are enabled */
  12611. static ssize_t fscaps_show(struct kobject *kobj,
  12612. struct kobj_attribute *attr, char *buf)
  12613. @@ -224,6 +233,9 @@ static struct attribute * kernel_attrs[] = {
  12614. #ifndef CONFIG_TINY_RCU
  12615. &rcu_expedited_attr.attr,
  12616. &rcu_normal_attr.attr,
  12617. +#endif
  12618. +#ifdef CONFIG_PREEMPT_RT_FULL
  12619. + &realtime_attr.attr,
  12620. #endif
  12621. NULL
  12622. };
  12623. diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
  12624. index 6f88e352cd4f..6ff9e8011dd0 100644
  12625. --- a/kernel/locking/Makefile
  12626. +++ b/kernel/locking/Makefile
  12627. @@ -2,7 +2,7 @@
  12628. # and is generally not a function of system call inputs.
  12629. KCOV_INSTRUMENT := n
  12630. -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
  12631. +obj-y += semaphore.o percpu-rwsem.o
  12632. ifdef CONFIG_FUNCTION_TRACER
  12633. CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
  12634. @@ -11,7 +11,11 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE)
  12635. CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE)
  12636. endif
  12637. +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
  12638. +obj-y += mutex.o
  12639. obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
  12640. +endif
  12641. +obj-y += rwsem.o
  12642. obj-$(CONFIG_LOCKDEP) += lockdep.o
  12643. ifeq ($(CONFIG_PROC_FS),y)
  12644. obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
  12645. @@ -24,7 +28,10 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
  12646. obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
  12647. obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
  12648. obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
  12649. +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
  12650. obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
  12651. obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
  12652. +endif
  12653. +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o rwsem-rt.o
  12654. obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
  12655. obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
  12656. diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
  12657. index 6599c7f3071d..79f8e00e802e 100644
  12658. --- a/kernel/locking/lockdep.c
  12659. +++ b/kernel/locking/lockdep.c
  12660. @@ -658,6 +658,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
  12661. struct lockdep_subclass_key *key;
  12662. struct hlist_head *hash_head;
  12663. struct lock_class *class;
  12664. + bool is_static = false;
  12665. if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
  12666. debug_locks_off();
  12667. @@ -671,10 +672,23 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
  12668. /*
  12669. * Static locks do not have their class-keys yet - for them the key
  12670. - * is the lock object itself:
  12671. + * is the lock object itself. If the lock is in the per cpu area,
  12672. + * the canonical address of the lock (per cpu offset removed) is
  12673. + * used.
  12674. */
  12675. - if (unlikely(!lock->key))
  12676. - lock->key = (void *)lock;
  12677. + if (unlikely(!lock->key)) {
  12678. + unsigned long can_addr, addr = (unsigned long)lock;
  12679. +
  12680. + if (__is_kernel_percpu_address(addr, &can_addr))
  12681. + lock->key = (void *)can_addr;
  12682. + else if (__is_module_percpu_address(addr, &can_addr))
  12683. + lock->key = (void *)can_addr;
  12684. + else if (static_obj(lock))
  12685. + lock->key = (void *)lock;
  12686. + else
  12687. + return ERR_PTR(-EINVAL);
  12688. + is_static = true;
  12689. + }
  12690. /*
  12691. * NOTE: the class-key must be unique. For dynamic locks, a static
  12692. @@ -706,7 +720,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
  12693. }
  12694. }
  12695. - return NULL;
  12696. + return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL);
  12697. }
  12698. /*
  12699. @@ -724,19 +738,18 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
  12700. DEBUG_LOCKS_WARN_ON(!irqs_disabled());
  12701. class = look_up_lock_class(lock, subclass);
  12702. - if (likely(class))
  12703. + if (likely(!IS_ERR_OR_NULL(class)))
  12704. goto out_set_class_cache;
  12705. /*
  12706. * Debug-check: all keys must be persistent!
  12707. - */
  12708. - if (!static_obj(lock->key)) {
  12709. + */
  12710. + if (IS_ERR(class)) {
  12711. debug_locks_off();
  12712. printk("INFO: trying to register non-static key.\n");
  12713. printk("the code is fine but needs lockdep annotation.\n");
  12714. printk("turning off the locking correctness validator.\n");
  12715. dump_stack();
  12716. -
  12717. return NULL;
  12718. }
  12719. @@ -3417,7 +3430,7 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
  12720. * Clearly if the lock hasn't been acquired _ever_, we're not
  12721. * holding it either, so report failure.
  12722. */
  12723. - if (!class)
  12724. + if (IS_ERR_OR_NULL(class))
  12725. return 0;
  12726. /*
  12727. @@ -3696,6 +3709,7 @@ static void check_flags(unsigned long flags)
  12728. }
  12729. }
  12730. +#ifndef CONFIG_PREEMPT_RT_FULL
  12731. /*
  12732. * We dont accurately track softirq state in e.g.
  12733. * hardirq contexts (such as on 4KSTACKS), so only
  12734. @@ -3710,6 +3724,7 @@ static void check_flags(unsigned long flags)
  12735. DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
  12736. }
  12737. }
  12738. +#endif
  12739. if (!debug_locks)
  12740. print_irqtrace_events(current);
  12741. @@ -4166,7 +4181,7 @@ void lockdep_reset_lock(struct lockdep_map *lock)
  12742. * If the class exists we look it up and zap it:
  12743. */
  12744. class = look_up_lock_class(lock, j);
  12745. - if (class)
  12746. + if (!IS_ERR_OR_NULL(class))
  12747. zap_class(class);
  12748. }
  12749. /*
  12750. diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
  12751. index d3de04b12f8c..0f49abeae337 100644
  12752. --- a/kernel/locking/locktorture.c
  12753. +++ b/kernel/locking/locktorture.c
  12754. @@ -26,7 +26,6 @@
  12755. #include <linux/kthread.h>
  12756. #include <linux/sched/rt.h>
  12757. #include <linux/spinlock.h>
  12758. -#include <linux/rwlock.h>
  12759. #include <linux/mutex.h>
  12760. #include <linux/rwsem.h>
  12761. #include <linux/smp.h>
  12762. diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
  12763. index ce182599cf2e..2ad3a1e8344c 100644
  12764. --- a/kernel/locking/percpu-rwsem.c
  12765. +++ b/kernel/locking/percpu-rwsem.c
  12766. @@ -18,7 +18,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
  12767. /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
  12768. rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
  12769. __init_rwsem(&sem->rw_sem, name, rwsem_key);
  12770. - init_waitqueue_head(&sem->writer);
  12771. + init_swait_queue_head(&sem->writer);
  12772. sem->readers_block = 0;
  12773. return 0;
  12774. }
  12775. @@ -103,7 +103,7 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem)
  12776. __this_cpu_dec(*sem->read_count);
  12777. /* Prod writer to recheck readers_active */
  12778. - wake_up(&sem->writer);
  12779. + swake_up(&sem->writer);
  12780. }
  12781. EXPORT_SYMBOL_GPL(__percpu_up_read);
  12782. @@ -160,7 +160,7 @@ void percpu_down_write(struct percpu_rw_semaphore *sem)
  12783. */
  12784. /* Wait for all now active readers to complete. */
  12785. - wait_event(sem->writer, readers_active_check(sem));
  12786. + swait_event(sem->writer, readers_active_check(sem));
  12787. }
  12788. EXPORT_SYMBOL_GPL(percpu_down_write);
  12789. diff --git a/kernel/locking/rt.c b/kernel/locking/rt.c
  12790. new file mode 100644
  12791. index 000000000000..6284e3b15091
  12792. --- /dev/null
  12793. +++ b/kernel/locking/rt.c
  12794. @@ -0,0 +1,331 @@
  12795. +/*
  12796. + * kernel/rt.c
  12797. + *
  12798. + * Real-Time Preemption Support
  12799. + *
  12800. + * started by Ingo Molnar:
  12801. + *
  12802. + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  12803. + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  12804. + *
  12805. + * historic credit for proving that Linux spinlocks can be implemented via
  12806. + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow
  12807. + * and others) who prototyped it on 2.4 and did lots of comparative
  12808. + * research and analysis; TimeSys, for proving that you can implement a
  12809. + * fully preemptible kernel via the use of IRQ threading and mutexes;
  12810. + * Bill Huey for persuasively arguing on lkml that the mutex model is the
  12811. + * right one; and to MontaVista, who ported pmutexes to 2.6.
  12812. + *
  12813. + * This code is a from-scratch implementation and is not based on pmutexes,
  12814. + * but the idea of converting spinlocks to mutexes is used here too.
  12815. + *
  12816. + * lock debugging, locking tree, deadlock detection:
  12817. + *
  12818. + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
  12819. + * Released under the General Public License (GPL).
  12820. + *
  12821. + * Includes portions of the generic R/W semaphore implementation from:
  12822. + *
  12823. + * Copyright (c) 2001 David Howells (dhowells@redhat.com).
  12824. + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
  12825. + * - Derived also from comments by Linus
  12826. + *
  12827. + * Pending ownership of locks and ownership stealing:
  12828. + *
  12829. + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt
  12830. + *
  12831. + * (also by Steven Rostedt)
  12832. + * - Converted single pi_lock to individual task locks.
  12833. + *
  12834. + * By Esben Nielsen:
  12835. + * Doing priority inheritance with help of the scheduler.
  12836. + *
  12837. + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  12838. + * - major rework based on Esben Nielsens initial patch
  12839. + * - replaced thread_info references by task_struct refs
  12840. + * - removed task->pending_owner dependency
  12841. + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks
  12842. + * in the scheduler return path as discussed with Steven Rostedt
  12843. + *
  12844. + * Copyright (C) 2006, Kihon Technologies Inc.
  12845. + * Steven Rostedt <rostedt@goodmis.org>
  12846. + * - debugged and patched Thomas Gleixner's rework.
  12847. + * - added back the cmpxchg to the rework.
  12848. + * - turned atomic require back on for SMP.
  12849. + */
  12850. +
  12851. +#include <linux/spinlock.h>
  12852. +#include <linux/rtmutex.h>
  12853. +#include <linux/sched.h>
  12854. +#include <linux/delay.h>
  12855. +#include <linux/module.h>
  12856. +#include <linux/kallsyms.h>
  12857. +#include <linux/syscalls.h>
  12858. +#include <linux/interrupt.h>
  12859. +#include <linux/plist.h>
  12860. +#include <linux/fs.h>
  12861. +#include <linux/futex.h>
  12862. +#include <linux/hrtimer.h>
  12863. +
  12864. +#include "rtmutex_common.h"
  12865. +
  12866. +/*
  12867. + * struct mutex functions
  12868. + */
  12869. +void __mutex_do_init(struct mutex *mutex, const char *name,
  12870. + struct lock_class_key *key)
  12871. +{
  12872. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12873. + /*
  12874. + * Make sure we are not reinitializing a held lock:
  12875. + */
  12876. + debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
  12877. + lockdep_init_map(&mutex->dep_map, name, key, 0);
  12878. +#endif
  12879. + mutex->lock.save_state = 0;
  12880. +}
  12881. +EXPORT_SYMBOL(__mutex_do_init);
  12882. +
  12883. +void __lockfunc _mutex_lock(struct mutex *lock)
  12884. +{
  12885. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  12886. + rt_mutex_lock(&lock->lock);
  12887. +}
  12888. +EXPORT_SYMBOL(_mutex_lock);
  12889. +
  12890. +int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
  12891. +{
  12892. + int ret;
  12893. +
  12894. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  12895. + ret = rt_mutex_lock_interruptible(&lock->lock);
  12896. + if (ret)
  12897. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12898. + return ret;
  12899. +}
  12900. +EXPORT_SYMBOL(_mutex_lock_interruptible);
  12901. +
  12902. +int __lockfunc _mutex_lock_killable(struct mutex *lock)
  12903. +{
  12904. + int ret;
  12905. +
  12906. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  12907. + ret = rt_mutex_lock_killable(&lock->lock);
  12908. + if (ret)
  12909. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12910. + return ret;
  12911. +}
  12912. +EXPORT_SYMBOL(_mutex_lock_killable);
  12913. +
  12914. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12915. +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
  12916. +{
  12917. + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
  12918. + rt_mutex_lock(&lock->lock);
  12919. +}
  12920. +EXPORT_SYMBOL(_mutex_lock_nested);
  12921. +
  12922. +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
  12923. +{
  12924. + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
  12925. + rt_mutex_lock(&lock->lock);
  12926. +}
  12927. +EXPORT_SYMBOL(_mutex_lock_nest_lock);
  12928. +
  12929. +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass)
  12930. +{
  12931. + int ret;
  12932. +
  12933. + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
  12934. + ret = rt_mutex_lock_interruptible(&lock->lock);
  12935. + if (ret)
  12936. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12937. + return ret;
  12938. +}
  12939. +EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
  12940. +
  12941. +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
  12942. +{
  12943. + int ret;
  12944. +
  12945. + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
  12946. + ret = rt_mutex_lock_killable(&lock->lock);
  12947. + if (ret)
  12948. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12949. + return ret;
  12950. +}
  12951. +EXPORT_SYMBOL(_mutex_lock_killable_nested);
  12952. +#endif
  12953. +
  12954. +int __lockfunc _mutex_trylock(struct mutex *lock)
  12955. +{
  12956. + int ret = rt_mutex_trylock(&lock->lock);
  12957. +
  12958. + if (ret)
  12959. + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  12960. +
  12961. + return ret;
  12962. +}
  12963. +EXPORT_SYMBOL(_mutex_trylock);
  12964. +
  12965. +void __lockfunc _mutex_unlock(struct mutex *lock)
  12966. +{
  12967. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12968. + rt_mutex_unlock(&lock->lock);
  12969. +}
  12970. +EXPORT_SYMBOL(_mutex_unlock);
  12971. +
  12972. +/*
  12973. + * rwlock_t functions
  12974. + */
  12975. +int __lockfunc rt_write_trylock(rwlock_t *rwlock)
  12976. +{
  12977. + int ret;
  12978. +
  12979. + migrate_disable();
  12980. + ret = rt_mutex_trylock(&rwlock->lock);
  12981. + if (ret)
  12982. + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
  12983. + else
  12984. + migrate_enable();
  12985. +
  12986. + return ret;
  12987. +}
  12988. +EXPORT_SYMBOL(rt_write_trylock);
  12989. +
  12990. +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
  12991. +{
  12992. + int ret;
  12993. +
  12994. + *flags = 0;
  12995. + ret = rt_write_trylock(rwlock);
  12996. + return ret;
  12997. +}
  12998. +EXPORT_SYMBOL(rt_write_trylock_irqsave);
  12999. +
  13000. +int __lockfunc rt_read_trylock(rwlock_t *rwlock)
  13001. +{
  13002. + struct rt_mutex *lock = &rwlock->lock;
  13003. + int ret = 1;
  13004. +
  13005. + /*
  13006. + * recursive read locks succeed when current owns the lock,
  13007. + * but not when read_depth == 0 which means that the lock is
  13008. + * write locked.
  13009. + */
  13010. + if (rt_mutex_owner(lock) != current) {
  13011. + migrate_disable();
  13012. + ret = rt_mutex_trylock(lock);
  13013. + if (ret)
  13014. + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
  13015. + else
  13016. + migrate_enable();
  13017. +
  13018. + } else if (!rwlock->read_depth) {
  13019. + ret = 0;
  13020. + }
  13021. +
  13022. + if (ret)
  13023. + rwlock->read_depth++;
  13024. +
  13025. + return ret;
  13026. +}
  13027. +EXPORT_SYMBOL(rt_read_trylock);
  13028. +
  13029. +void __lockfunc rt_write_lock(rwlock_t *rwlock)
  13030. +{
  13031. + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
  13032. + __rt_spin_lock(&rwlock->lock);
  13033. +}
  13034. +EXPORT_SYMBOL(rt_write_lock);
  13035. +
  13036. +void __lockfunc rt_read_lock(rwlock_t *rwlock)
  13037. +{
  13038. + struct rt_mutex *lock = &rwlock->lock;
  13039. +
  13040. +
  13041. + /*
  13042. + * recursive read locks succeed when current owns the lock
  13043. + */
  13044. + if (rt_mutex_owner(lock) != current) {
  13045. + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
  13046. + __rt_spin_lock(lock);
  13047. + }
  13048. + rwlock->read_depth++;
  13049. +}
  13050. +
  13051. +EXPORT_SYMBOL(rt_read_lock);
  13052. +
  13053. +void __lockfunc rt_write_unlock(rwlock_t *rwlock)
  13054. +{
  13055. + /* NOTE: we always pass in '1' for nested, for simplicity */
  13056. + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
  13057. + __rt_spin_unlock(&rwlock->lock);
  13058. + migrate_enable();
  13059. +}
  13060. +EXPORT_SYMBOL(rt_write_unlock);
  13061. +
  13062. +void __lockfunc rt_read_unlock(rwlock_t *rwlock)
  13063. +{
  13064. + /* Release the lock only when read_depth is down to 0 */
  13065. + if (--rwlock->read_depth == 0) {
  13066. + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
  13067. + __rt_spin_unlock(&rwlock->lock);
  13068. + migrate_enable();
  13069. + }
  13070. +}
  13071. +EXPORT_SYMBOL(rt_read_unlock);
  13072. +
  13073. +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
  13074. +{
  13075. + rt_write_lock(rwlock);
  13076. +
  13077. + return 0;
  13078. +}
  13079. +EXPORT_SYMBOL(rt_write_lock_irqsave);
  13080. +
  13081. +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
  13082. +{
  13083. + rt_read_lock(rwlock);
  13084. +
  13085. + return 0;
  13086. +}
  13087. +EXPORT_SYMBOL(rt_read_lock_irqsave);
  13088. +
  13089. +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
  13090. +{
  13091. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13092. + /*
  13093. + * Make sure we are not reinitializing a held lock:
  13094. + */
  13095. + debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
  13096. + lockdep_init_map(&rwlock->dep_map, name, key, 0);
  13097. +#endif
  13098. + rwlock->lock.save_state = 1;
  13099. + rwlock->read_depth = 0;
  13100. +}
  13101. +EXPORT_SYMBOL(__rt_rwlock_init);
  13102. +
  13103. +/**
  13104. + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
  13105. + * @cnt: the atomic which we are to dec
  13106. + * @lock: the mutex to return holding if we dec to 0
  13107. + *
  13108. + * return true and hold lock if we dec to 0, return false otherwise
  13109. + */
  13110. +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
  13111. +{
  13112. + /* dec if we can't possibly hit 0 */
  13113. + if (atomic_add_unless(cnt, -1, 1))
  13114. + return 0;
  13115. + /* we might hit 0, so take the lock */
  13116. + mutex_lock(lock);
  13117. + if (!atomic_dec_and_test(cnt)) {
  13118. + /* when we actually did the dec, we didn't hit 0 */
  13119. + mutex_unlock(lock);
  13120. + return 0;
  13121. + }
  13122. + /* we hit 0, and we hold the lock */
  13123. + return 1;
  13124. +}
  13125. +EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
  13126. diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c
  13127. index 62b6cee8ea7f..0613c4b1d059 100644
  13128. --- a/kernel/locking/rtmutex-debug.c
  13129. +++ b/kernel/locking/rtmutex-debug.c
  13130. @@ -173,12 +173,3 @@ void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
  13131. lock->name = name;
  13132. }
  13133. -void
  13134. -rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
  13135. -{
  13136. -}
  13137. -
  13138. -void rt_mutex_deadlock_account_unlock(struct task_struct *task)
  13139. -{
  13140. -}
  13141. -
  13142. diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h
  13143. index d0519c3432b6..b585af9a1b50 100644
  13144. --- a/kernel/locking/rtmutex-debug.h
  13145. +++ b/kernel/locking/rtmutex-debug.h
  13146. @@ -9,9 +9,6 @@
  13147. * This file contains macros used solely by rtmutex.c. Debug version.
  13148. */
  13149. -extern void
  13150. -rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);
  13151. -extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);
  13152. extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
  13153. extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
  13154. extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
  13155. diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
  13156. index 2c49d76f96c3..3a8b5d44aaf8 100644
  13157. --- a/kernel/locking/rtmutex.c
  13158. +++ b/kernel/locking/rtmutex.c
  13159. @@ -7,6 +7,11 @@
  13160. * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  13161. * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
  13162. * Copyright (C) 2006 Esben Nielsen
  13163. + * Adaptive Spinlocks:
  13164. + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
  13165. + * and Peter Morreale,
  13166. + * Adaptive Spinlocks simplification:
  13167. + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
  13168. *
  13169. * See Documentation/locking/rt-mutex-design.txt for details.
  13170. */
  13171. @@ -16,6 +21,8 @@
  13172. #include <linux/sched/rt.h>
  13173. #include <linux/sched/deadline.h>
  13174. #include <linux/timer.h>
  13175. +#include <linux/ww_mutex.h>
  13176. +#include <linux/blkdev.h>
  13177. #include "rtmutex_common.h"
  13178. @@ -133,6 +140,12 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
  13179. WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
  13180. }
  13181. +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
  13182. +{
  13183. + return waiter && waiter != PI_WAKEUP_INPROGRESS &&
  13184. + waiter != PI_REQUEUE_INPROGRESS;
  13185. +}
  13186. +
  13187. /*
  13188. * We can speed up the acquire/release, if there's no debugging state to be
  13189. * set up.
  13190. @@ -222,6 +235,12 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
  13191. }
  13192. #endif
  13193. +/*
  13194. + * Only use with rt_mutex_waiter_{less,equal}()
  13195. + */
  13196. +#define task_to_waiter(p) &(struct rt_mutex_waiter) \
  13197. + { .prio = (p)->prio, .deadline = (p)->dl.deadline, .task = (p) }
  13198. +
  13199. static inline int
  13200. rt_mutex_waiter_less(struct rt_mutex_waiter *left,
  13201. struct rt_mutex_waiter *right)
  13202. @@ -236,12 +255,51 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
  13203. * then right waiter has a dl_prio() too.
  13204. */
  13205. if (dl_prio(left->prio))
  13206. - return dl_time_before(left->task->dl.deadline,
  13207. - right->task->dl.deadline);
  13208. + return dl_time_before(left->deadline, right->deadline);
  13209. return 0;
  13210. }
  13211. +static inline int
  13212. +rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
  13213. + struct rt_mutex_waiter *right)
  13214. +{
  13215. + if (left->prio != right->prio)
  13216. + return 0;
  13217. +
  13218. + /*
  13219. + * If both waiters have dl_prio(), we check the deadlines of the
  13220. + * associated tasks.
  13221. + * If left waiter has a dl_prio(), and we didn't return 0 above,
  13222. + * then right waiter has a dl_prio() too.
  13223. + */
  13224. + if (dl_prio(left->prio))
  13225. + return left->deadline == right->deadline;
  13226. +
  13227. + return 1;
  13228. +}
  13229. +
  13230. +#define STEAL_NORMAL 0
  13231. +#define STEAL_LATERAL 1
  13232. +
  13233. +static inline int
  13234. +rt_mutex_steal(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int mode)
  13235. +{
  13236. + struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
  13237. +
  13238. + if (waiter == top_waiter || rt_mutex_waiter_less(waiter, top_waiter))
  13239. + return 1;
  13240. +
  13241. + /*
  13242. + * Note that RT tasks are excluded from lateral-steals
  13243. + * to prevent the introduction of an unbounded latency.
  13244. + */
  13245. + if (mode == STEAL_NORMAL || rt_task(waiter->task))
  13246. + return 0;
  13247. +
  13248. + return rt_mutex_waiter_equal(waiter, top_waiter);
  13249. +}
  13250. +
  13251. static void
  13252. rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
  13253. {
  13254. @@ -320,72 +378,16 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
  13255. RB_CLEAR_NODE(&waiter->pi_tree_entry);
  13256. }
  13257. -/*
  13258. - * Calculate task priority from the waiter tree priority
  13259. - *
  13260. - * Return task->normal_prio when the waiter tree is empty or when
  13261. - * the waiter is not allowed to do priority boosting
  13262. - */
  13263. -int rt_mutex_getprio(struct task_struct *task)
  13264. -{
  13265. - if (likely(!task_has_pi_waiters(task)))
  13266. - return task->normal_prio;
  13267. -
  13268. - return min(task_top_pi_waiter(task)->prio,
  13269. - task->normal_prio);
  13270. -}
  13271. -
  13272. -struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
  13273. -{
  13274. - if (likely(!task_has_pi_waiters(task)))
  13275. - return NULL;
  13276. -
  13277. - return task_top_pi_waiter(task)->task;
  13278. -}
  13279. -
  13280. -/*
  13281. - * Called by sched_setscheduler() to get the priority which will be
  13282. - * effective after the change.
  13283. - */
  13284. -int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
  13285. -{
  13286. - if (!task_has_pi_waiters(task))
  13287. - return newprio;
  13288. -
  13289. - if (task_top_pi_waiter(task)->task->prio <= newprio)
  13290. - return task_top_pi_waiter(task)->task->prio;
  13291. - return newprio;
  13292. -}
  13293. -
  13294. -/*
  13295. - * Adjust the priority of a task, after its pi_waiters got modified.
  13296. - *
  13297. - * This can be both boosting and unboosting. task->pi_lock must be held.
  13298. - */
  13299. -static void __rt_mutex_adjust_prio(struct task_struct *task)
  13300. +static void rt_mutex_adjust_prio(struct task_struct *p)
  13301. {
  13302. - int prio = rt_mutex_getprio(task);
  13303. + struct task_struct *pi_task = NULL;
  13304. - if (task->prio != prio || dl_prio(prio))
  13305. - rt_mutex_setprio(task, prio);
  13306. -}
  13307. + lockdep_assert_held(&p->pi_lock);
  13308. -/*
  13309. - * Adjust task priority (undo boosting). Called from the exit path of
  13310. - * rt_mutex_slowunlock() and rt_mutex_slowlock().
  13311. - *
  13312. - * (Note: We do this outside of the protection of lock->wait_lock to
  13313. - * allow the lock to be taken while or before we readjust the priority
  13314. - * of task. We do not use the spin_xx_mutex() variants here as we are
  13315. - * outside of the debug path.)
  13316. - */
  13317. -void rt_mutex_adjust_prio(struct task_struct *task)
  13318. -{
  13319. - unsigned long flags;
  13320. + if (task_has_pi_waiters(p))
  13321. + pi_task = task_top_pi_waiter(p)->task;
  13322. - raw_spin_lock_irqsave(&task->pi_lock, flags);
  13323. - __rt_mutex_adjust_prio(task);
  13324. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  13325. + rt_mutex_setprio(p, pi_task);
  13326. }
  13327. /*
  13328. @@ -414,6 +416,14 @@ static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
  13329. return debug_rt_mutex_detect_deadlock(waiter, chwalk);
  13330. }
  13331. +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
  13332. +{
  13333. + if (waiter->savestate)
  13334. + wake_up_lock_sleeper(waiter->task);
  13335. + else
  13336. + wake_up_process(waiter->task);
  13337. +}
  13338. +
  13339. /*
  13340. * Max number of times we'll walk the boosting chain:
  13341. */
  13342. @@ -421,7 +431,8 @@ int max_lock_depth = 1024;
  13343. static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
  13344. {
  13345. - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
  13346. + return rt_mutex_real_waiter(p->pi_blocked_on) ?
  13347. + p->pi_blocked_on->lock : NULL;
  13348. }
  13349. /*
  13350. @@ -557,7 +568,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  13351. * reached or the state of the chain has changed while we
  13352. * dropped the locks.
  13353. */
  13354. - if (!waiter)
  13355. + if (!rt_mutex_real_waiter(waiter))
  13356. goto out_unlock_pi;
  13357. /*
  13358. @@ -608,7 +619,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  13359. * enabled we continue, but stop the requeueing in the chain
  13360. * walk.
  13361. */
  13362. - if (waiter->prio == task->prio) {
  13363. + if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
  13364. if (!detect_deadlock)
  13365. goto out_unlock_pi;
  13366. else
  13367. @@ -704,7 +715,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  13368. /* [7] Requeue the waiter in the lock waiter tree. */
  13369. rt_mutex_dequeue(lock, waiter);
  13370. +
  13371. + /*
  13372. + * Update the waiter prio fields now that we're dequeued.
  13373. + *
  13374. + * These values can have changed through either:
  13375. + *
  13376. + * sys_sched_set_scheduler() / sys_sched_setattr()
  13377. + *
  13378. + * or
  13379. + *
  13380. + * DL CBS enforcement advancing the effective deadline.
  13381. + *
  13382. + * Even though pi_waiters also uses these fields, and that tree is only
  13383. + * updated in [11], we can do this here, since we hold [L], which
  13384. + * serializes all pi_waiters access and rb_erase() does not care about
  13385. + * the values of the node being removed.
  13386. + */
  13387. waiter->prio = task->prio;
  13388. + waiter->deadline = task->dl.deadline;
  13389. +
  13390. rt_mutex_enqueue(lock, waiter);
  13391. /* [8] Release the task */
  13392. @@ -719,13 +749,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  13393. * follow here. This is the end of the chain we are walking.
  13394. */
  13395. if (!rt_mutex_owner(lock)) {
  13396. + struct rt_mutex_waiter *lock_top_waiter;
  13397. +
  13398. /*
  13399. * If the requeue [7] above changed the top waiter,
  13400. * then we need to wake the new top waiter up to try
  13401. * to get the lock.
  13402. */
  13403. - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
  13404. - wake_up_process(rt_mutex_top_waiter(lock)->task);
  13405. + lock_top_waiter = rt_mutex_top_waiter(lock);
  13406. + if (prerequeue_top_waiter != lock_top_waiter)
  13407. + rt_mutex_wake_waiter(lock_top_waiter);
  13408. raw_spin_unlock_irq(&lock->wait_lock);
  13409. return 0;
  13410. }
  13411. @@ -745,7 +778,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  13412. */
  13413. rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
  13414. rt_mutex_enqueue_pi(task, waiter);
  13415. - __rt_mutex_adjust_prio(task);
  13416. + rt_mutex_adjust_prio(task);
  13417. } else if (prerequeue_top_waiter == waiter) {
  13418. /*
  13419. @@ -761,7 +794,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  13420. rt_mutex_dequeue_pi(task, waiter);
  13421. waiter = rt_mutex_top_waiter(lock);
  13422. rt_mutex_enqueue_pi(task, waiter);
  13423. - __rt_mutex_adjust_prio(task);
  13424. + rt_mutex_adjust_prio(task);
  13425. } else {
  13426. /*
  13427. * Nothing changed. No need to do any priority
  13428. @@ -818,6 +851,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  13429. return ret;
  13430. }
  13431. +
  13432. /*
  13433. * Try to take an rt-mutex
  13434. *
  13435. @@ -827,10 +861,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  13436. * @task: The task which wants to acquire the lock
  13437. * @waiter: The waiter that is queued to the lock's wait tree if the
  13438. * callsite called task_blocked_on_lock(), otherwise NULL
  13439. + * @mode: Lock steal mode (STEAL_NORMAL, STEAL_LATERAL)
  13440. */
  13441. -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  13442. - struct rt_mutex_waiter *waiter)
  13443. +static int __try_to_take_rt_mutex(struct rt_mutex *lock,
  13444. + struct task_struct *task,
  13445. + struct rt_mutex_waiter *waiter, int mode)
  13446. {
  13447. + lockdep_assert_held(&lock->wait_lock);
  13448. +
  13449. /*
  13450. * Before testing whether we can acquire @lock, we set the
  13451. * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
  13452. @@ -863,12 +901,11 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  13453. */
  13454. if (waiter) {
  13455. /*
  13456. - * If waiter is not the highest priority waiter of
  13457. - * @lock, give up.
  13458. + * If waiter is not the highest priority waiter of @lock,
  13459. + * or its peer when lateral steal is allowed, give up.
  13460. */
  13461. - if (waiter != rt_mutex_top_waiter(lock))
  13462. + if (!rt_mutex_steal(lock, waiter, mode))
  13463. return 0;
  13464. -
  13465. /*
  13466. * We can acquire the lock. Remove the waiter from the
  13467. * lock waiters tree.
  13468. @@ -886,13 +923,12 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  13469. */
  13470. if (rt_mutex_has_waiters(lock)) {
  13471. /*
  13472. - * If @task->prio is greater than or equal to
  13473. - * the top waiter priority (kernel view),
  13474. - * @task lost.
  13475. + * If @task->prio is greater than the top waiter
  13476. + * priority (kernel view), or equal to it when a
  13477. + * lateral steal is forbidden, @task lost.
  13478. */
  13479. - if (task->prio >= rt_mutex_top_waiter(lock)->prio)
  13480. + if (!rt_mutex_steal(lock, task_to_waiter(task), mode))
  13481. return 0;
  13482. -
  13483. /*
  13484. * The current top waiter stays enqueued. We
  13485. * don't have to change anything in the lock
  13486. @@ -936,177 +972,589 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  13487. */
  13488. rt_mutex_set_owner(lock, task);
  13489. - rt_mutex_deadlock_account_lock(lock, task);
  13490. -
  13491. return 1;
  13492. }
  13493. +#ifdef CONFIG_PREEMPT_RT_FULL
  13494. /*
  13495. - * Task blocks on lock.
  13496. - *
  13497. - * Prepare waiter and propagate pi chain
  13498. - *
  13499. - * This must be called with lock->wait_lock held and interrupts disabled
  13500. + * preemptible spin_lock functions:
  13501. */
  13502. -static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  13503. - struct rt_mutex_waiter *waiter,
  13504. - struct task_struct *task,
  13505. - enum rtmutex_chainwalk chwalk)
  13506. +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
  13507. + void (*slowfn)(struct rt_mutex *lock,
  13508. + bool mg_off),
  13509. + bool do_mig_dis)
  13510. {
  13511. - struct task_struct *owner = rt_mutex_owner(lock);
  13512. - struct rt_mutex_waiter *top_waiter = waiter;
  13513. - struct rt_mutex *next_lock;
  13514. - int chain_walk = 0, res;
  13515. + might_sleep_no_state_check();
  13516. - /*
  13517. - * Early deadlock detection. We really don't want the task to
  13518. - * enqueue on itself just to untangle the mess later. It's not
  13519. - * only an optimization. We drop the locks, so another waiter
  13520. - * can come in before the chain walk detects the deadlock. So
  13521. - * the other will detect the deadlock and return -EDEADLOCK,
  13522. - * which is wrong, as the other waiter is not in a deadlock
  13523. - * situation.
  13524. - */
  13525. - if (owner == task)
  13526. - return -EDEADLK;
  13527. + if (do_mig_dis)
  13528. + migrate_disable();
  13529. - raw_spin_lock(&task->pi_lock);
  13530. - __rt_mutex_adjust_prio(task);
  13531. - waiter->task = task;
  13532. - waiter->lock = lock;
  13533. - waiter->prio = task->prio;
  13534. + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
  13535. + return;
  13536. + else
  13537. + slowfn(lock, do_mig_dis);
  13538. +}
  13539. - /* Get the top priority waiter on the lock */
  13540. - if (rt_mutex_has_waiters(lock))
  13541. - top_waiter = rt_mutex_top_waiter(lock);
  13542. - rt_mutex_enqueue(lock, waiter);
  13543. +static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
  13544. + void (*slowfn)(struct rt_mutex *lock))
  13545. +{
  13546. + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
  13547. + return;
  13548. + else
  13549. + slowfn(lock);
  13550. +}
  13551. +#ifdef CONFIG_SMP
  13552. +/*
  13553. + * Note that owner is a speculative pointer and dereferencing relies
  13554. + * on rcu_read_lock() and the check against the lock owner.
  13555. + */
  13556. +static int adaptive_wait(struct rt_mutex *lock,
  13557. + struct task_struct *owner)
  13558. +{
  13559. + int res = 0;
  13560. - task->pi_blocked_on = waiter;
  13561. + rcu_read_lock();
  13562. + for (;;) {
  13563. + if (owner != rt_mutex_owner(lock))
  13564. + break;
  13565. + /*
  13566. + * Ensure that owner->on_cpu is dereferenced _after_
  13567. + * checking the above to be valid.
  13568. + */
  13569. + barrier();
  13570. + if (!owner->on_cpu) {
  13571. + res = 1;
  13572. + break;
  13573. + }
  13574. + cpu_relax();
  13575. + }
  13576. + rcu_read_unlock();
  13577. + return res;
  13578. +}
  13579. +#else
  13580. +static int adaptive_wait(struct rt_mutex *lock,
  13581. + struct task_struct *orig_owner)
  13582. +{
  13583. + return 1;
  13584. +}
  13585. +#endif
  13586. - raw_spin_unlock(&task->pi_lock);
  13587. +static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  13588. + struct rt_mutex_waiter *waiter,
  13589. + struct task_struct *task,
  13590. + enum rtmutex_chainwalk chwalk);
  13591. +/*
  13592. + * Slow path lock function spin_lock style: this variant is very
  13593. + * careful not to miss any non-lock wakeups.
  13594. + *
  13595. + * We store the current state under p->pi_lock in p->saved_state and
  13596. + * the try_to_wake_up() code handles this accordingly.
  13597. + */
  13598. +static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock,
  13599. + bool mg_off)
  13600. +{
  13601. + struct task_struct *lock_owner, *self = current;
  13602. + struct rt_mutex_waiter waiter, *top_waiter;
  13603. + unsigned long flags;
  13604. + int ret;
  13605. - if (!owner)
  13606. - return 0;
  13607. + rt_mutex_init_waiter(&waiter, true);
  13608. - raw_spin_lock(&owner->pi_lock);
  13609. - if (waiter == rt_mutex_top_waiter(lock)) {
  13610. - rt_mutex_dequeue_pi(owner, top_waiter);
  13611. - rt_mutex_enqueue_pi(owner, waiter);
  13612. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  13613. - __rt_mutex_adjust_prio(owner);
  13614. - if (owner->pi_blocked_on)
  13615. - chain_walk = 1;
  13616. - } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
  13617. - chain_walk = 1;
  13618. + if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
  13619. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13620. + return;
  13621. }
  13622. - /* Store the lock on which owner is blocked or NULL */
  13623. - next_lock = task_blocked_on_lock(owner);
  13624. + BUG_ON(rt_mutex_owner(lock) == self);
  13625. - raw_spin_unlock(&owner->pi_lock);
  13626. /*
  13627. - * Even if full deadlock detection is on, if the owner is not
  13628. - * blocked itself, we can avoid finding this out in the chain
  13629. - * walk.
  13630. + * We save whatever state the task is in and we'll restore it
  13631. + * after acquiring the lock taking real wakeups into account
  13632. + * as well. We are serialized via pi_lock against wakeups. See
  13633. + * try_to_wake_up().
  13634. */
  13635. - if (!chain_walk || !next_lock)
  13636. - return 0;
  13637. + raw_spin_lock(&self->pi_lock);
  13638. + self->saved_state = self->state;
  13639. + __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
  13640. + raw_spin_unlock(&self->pi_lock);
  13641. - /*
  13642. - * The owner can't disappear while holding a lock,
  13643. - * so the owner struct is protected by wait_lock.
  13644. - * Gets dropped in rt_mutex_adjust_prio_chain()!
  13645. - */
  13646. - get_task_struct(owner);
  13647. + ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK);
  13648. + BUG_ON(ret);
  13649. - raw_spin_unlock_irq(&lock->wait_lock);
  13650. + for (;;) {
  13651. + /* Try to acquire the lock again. */
  13652. + if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
  13653. + break;
  13654. - res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
  13655. - next_lock, waiter, task);
  13656. + top_waiter = rt_mutex_top_waiter(lock);
  13657. + lock_owner = rt_mutex_owner(lock);
  13658. - raw_spin_lock_irq(&lock->wait_lock);
  13659. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13660. - return res;
  13661. -}
  13662. + debug_rt_mutex_print_deadlock(&waiter);
  13663. -/*
  13664. - * Remove the top waiter from the current tasks pi waiter tree and
  13665. - * queue it up.
  13666. - *
  13667. - * Called with lock->wait_lock held and interrupts disabled.
  13668. - */
  13669. -static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
  13670. - struct rt_mutex *lock)
  13671. -{
  13672. - struct rt_mutex_waiter *waiter;
  13673. + if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) {
  13674. + if (mg_off)
  13675. + migrate_enable();
  13676. + schedule();
  13677. + if (mg_off)
  13678. + migrate_disable();
  13679. + }
  13680. - raw_spin_lock(&current->pi_lock);
  13681. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  13682. - waiter = rt_mutex_top_waiter(lock);
  13683. + raw_spin_lock(&self->pi_lock);
  13684. + __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
  13685. + raw_spin_unlock(&self->pi_lock);
  13686. + }
  13687. /*
  13688. - * Remove it from current->pi_waiters. We do not adjust a
  13689. - * possible priority boost right now. We execute wakeup in the
  13690. - * boosted mode and go back to normal after releasing
  13691. - * lock->wait_lock.
  13692. + * Restore the task state to current->saved_state. We set it
  13693. + * to the original state above and the try_to_wake_up() code
  13694. + * has possibly updated it when a real (non-rtmutex) wakeup
  13695. + * happened while we were blocked. Clear saved_state so
  13696. + * try_to_wakeup() does not get confused.
  13697. */
  13698. - rt_mutex_dequeue_pi(current, waiter);
  13699. + raw_spin_lock(&self->pi_lock);
  13700. + __set_current_state_no_track(self->saved_state);
  13701. + self->saved_state = TASK_RUNNING;
  13702. + raw_spin_unlock(&self->pi_lock);
  13703. /*
  13704. - * As we are waking up the top waiter, and the waiter stays
  13705. - * queued on the lock until it gets the lock, this lock
  13706. - * obviously has waiters. Just set the bit here and this has
  13707. - * the added benefit of forcing all new tasks into the
  13708. - * slow path making sure no task of lower priority than
  13709. - * the top waiter can steal this lock.
  13710. + * try_to_take_rt_mutex() sets the waiter bit
  13711. + * unconditionally. We might have to fix that up:
  13712. */
  13713. - lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
  13714. + fixup_rt_mutex_waiters(lock);
  13715. - raw_spin_unlock(&current->pi_lock);
  13716. + BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
  13717. + BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
  13718. - wake_q_add(wake_q, waiter->task);
  13719. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13720. +
  13721. + debug_rt_mutex_free_waiter(&waiter);
  13722. }
  13723. +static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock,
  13724. + struct wake_q_head *wake_q,
  13725. + struct wake_q_head *wq_sleeper);
  13726. /*
  13727. - * Remove a waiter from a lock and give up
  13728. - *
  13729. - * Must be called with lock->wait_lock held and interrupts disabled. I must
  13730. - * have just failed to try_to_take_rt_mutex().
  13731. + * Slow path to release a rt_mutex spin_lock style
  13732. */
  13733. -static void remove_waiter(struct rt_mutex *lock,
  13734. - struct rt_mutex_waiter *waiter)
  13735. +static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
  13736. {
  13737. - bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
  13738. - struct task_struct *owner = rt_mutex_owner(lock);
  13739. - struct rt_mutex *next_lock;
  13740. + unsigned long flags;
  13741. + WAKE_Q(wake_q);
  13742. + WAKE_Q(wake_sleeper_q);
  13743. + bool postunlock;
  13744. - raw_spin_lock(&current->pi_lock);
  13745. - rt_mutex_dequeue(lock, waiter);
  13746. - current->pi_blocked_on = NULL;
  13747. - raw_spin_unlock(&current->pi_lock);
  13748. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  13749. + postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q);
  13750. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13751. - /*
  13752. - * Only update priority if the waiter was the highest priority
  13753. - * waiter of the lock and there is an owner to update.
  13754. - */
  13755. - if (!owner || !is_top_waiter)
  13756. - return;
  13757. + if (postunlock)
  13758. + rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
  13759. +}
  13760. - raw_spin_lock(&owner->pi_lock);
  13761. +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
  13762. +{
  13763. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false);
  13764. + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  13765. +}
  13766. +EXPORT_SYMBOL(rt_spin_lock__no_mg);
  13767. - rt_mutex_dequeue_pi(owner, waiter);
  13768. +void __lockfunc rt_spin_lock(spinlock_t *lock)
  13769. +{
  13770. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
  13771. + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  13772. +}
  13773. +EXPORT_SYMBOL(rt_spin_lock);
  13774. - if (rt_mutex_has_waiters(lock))
  13775. - rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
  13776. +void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
  13777. +{
  13778. + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true);
  13779. +}
  13780. +EXPORT_SYMBOL(__rt_spin_lock);
  13781. - __rt_mutex_adjust_prio(owner);
  13782. +void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock)
  13783. +{
  13784. + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false);
  13785. +}
  13786. +EXPORT_SYMBOL(__rt_spin_lock__no_mg);
  13787. - /* Store the lock on which owner is blocked or NULL */
  13788. - next_lock = task_blocked_on_lock(owner);
  13789. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13790. +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
  13791. +{
  13792. + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
  13793. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
  13794. +}
  13795. +EXPORT_SYMBOL(rt_spin_lock_nested);
  13796. +#endif
  13797. - raw_spin_unlock(&owner->pi_lock);
  13798. +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock)
  13799. +{
  13800. + /* NOTE: we always pass in '1' for nested, for simplicity */
  13801. + spin_release(&lock->dep_map, 1, _RET_IP_);
  13802. + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
  13803. +}
  13804. +EXPORT_SYMBOL(rt_spin_unlock__no_mg);
  13805. - /*
  13806. +void __lockfunc rt_spin_unlock(spinlock_t *lock)
  13807. +{
  13808. + /* NOTE: we always pass in '1' for nested, for simplicity */
  13809. + spin_release(&lock->dep_map, 1, _RET_IP_);
  13810. + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
  13811. + migrate_enable();
  13812. +}
  13813. +EXPORT_SYMBOL(rt_spin_unlock);
  13814. +
  13815. +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
  13816. +{
  13817. + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
  13818. +}
  13819. +EXPORT_SYMBOL(__rt_spin_unlock);
  13820. +
  13821. +/*
  13822. + * Wait for the lock to get unlocked: instead of polling for an unlock
  13823. + * (like raw spinlocks do), we lock and unlock, to force the kernel to
  13824. + * schedule if there's contention:
  13825. + */
  13826. +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
  13827. +{
  13828. + spin_lock(lock);
  13829. + spin_unlock(lock);
  13830. +}
  13831. +EXPORT_SYMBOL(rt_spin_unlock_wait);
  13832. +
  13833. +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock)
  13834. +{
  13835. + int ret;
  13836. +
  13837. + ret = rt_mutex_trylock(&lock->lock);
  13838. + if (ret)
  13839. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  13840. + return ret;
  13841. +}
  13842. +EXPORT_SYMBOL(rt_spin_trylock__no_mg);
  13843. +
  13844. +int __lockfunc rt_spin_trylock(spinlock_t *lock)
  13845. +{
  13846. + int ret;
  13847. +
  13848. + migrate_disable();
  13849. + ret = rt_mutex_trylock(&lock->lock);
  13850. + if (ret)
  13851. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  13852. + else
  13853. + migrate_enable();
  13854. + return ret;
  13855. +}
  13856. +EXPORT_SYMBOL(rt_spin_trylock);
  13857. +
  13858. +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
  13859. +{
  13860. + int ret;
  13861. +
  13862. + local_bh_disable();
  13863. + ret = rt_mutex_trylock(&lock->lock);
  13864. + if (ret) {
  13865. + migrate_disable();
  13866. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  13867. + } else
  13868. + local_bh_enable();
  13869. + return ret;
  13870. +}
  13871. +EXPORT_SYMBOL(rt_spin_trylock_bh);
  13872. +
  13873. +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
  13874. +{
  13875. + int ret;
  13876. +
  13877. + *flags = 0;
  13878. + ret = rt_mutex_trylock(&lock->lock);
  13879. + if (ret) {
  13880. + migrate_disable();
  13881. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  13882. + }
  13883. + return ret;
  13884. +}
  13885. +EXPORT_SYMBOL(rt_spin_trylock_irqsave);
  13886. +
  13887. +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
  13888. +{
  13889. + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
  13890. + if (atomic_add_unless(atomic, -1, 1))
  13891. + return 0;
  13892. + rt_spin_lock(lock);
  13893. + if (atomic_dec_and_test(atomic))
  13894. + return 1;
  13895. + rt_spin_unlock(lock);
  13896. + return 0;
  13897. +}
  13898. +EXPORT_SYMBOL(atomic_dec_and_spin_lock);
  13899. +
  13900. + void
  13901. +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
  13902. +{
  13903. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13904. + /*
  13905. + * Make sure we are not reinitializing a held lock:
  13906. + */
  13907. + debug_check_no_locks_freed((void *)lock, sizeof(*lock));
  13908. + lockdep_init_map(&lock->dep_map, name, key, 0);
  13909. +#endif
  13910. +}
  13911. +EXPORT_SYMBOL(__rt_spin_lock_init);
  13912. +
  13913. +#endif /* PREEMPT_RT_FULL */
  13914. +
  13915. +#ifdef CONFIG_PREEMPT_RT_FULL
  13916. + static inline int __sched
  13917. +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
  13918. +{
  13919. + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
  13920. + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
  13921. +
  13922. + if (!hold_ctx)
  13923. + return 0;
  13924. +
  13925. + if (unlikely(ctx == hold_ctx))
  13926. + return -EALREADY;
  13927. +
  13928. + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
  13929. + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
  13930. +#ifdef CONFIG_DEBUG_MUTEXES
  13931. + DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
  13932. + ctx->contending_lock = ww;
  13933. +#endif
  13934. + return -EDEADLK;
  13935. + }
  13936. +
  13937. + return 0;
  13938. +}
  13939. +#else
  13940. + static inline int __sched
  13941. +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
  13942. +{
  13943. + BUG();
  13944. + return 0;
  13945. +}
  13946. +
  13947. +#endif
  13948. +
  13949. +static inline int
  13950. +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  13951. + struct rt_mutex_waiter *waiter)
  13952. +{
  13953. + return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
  13954. +}
  13955. +
  13956. +/*
  13957. + * Task blocks on lock.
  13958. + *
  13959. + * Prepare waiter and propagate pi chain
  13960. + *
  13961. + * This must be called with lock->wait_lock held and interrupts disabled
  13962. + */
  13963. +static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  13964. + struct rt_mutex_waiter *waiter,
  13965. + struct task_struct *task,
  13966. + enum rtmutex_chainwalk chwalk)
  13967. +{
  13968. + struct task_struct *owner = rt_mutex_owner(lock);
  13969. + struct rt_mutex_waiter *top_waiter = waiter;
  13970. + struct rt_mutex *next_lock;
  13971. + int chain_walk = 0, res;
  13972. +
  13973. + lockdep_assert_held(&lock->wait_lock);
  13974. +
  13975. + /*
  13976. + * Early deadlock detection. We really don't want the task to
  13977. + * enqueue on itself just to untangle the mess later. It's not
  13978. + * only an optimization. We drop the locks, so another waiter
  13979. + * can come in before the chain walk detects the deadlock. So
  13980. + * the other will detect the deadlock and return -EDEADLOCK,
  13981. + * which is wrong, as the other waiter is not in a deadlock
  13982. + * situation.
  13983. + */
  13984. + if (owner == task)
  13985. + return -EDEADLK;
  13986. +
  13987. + raw_spin_lock(&task->pi_lock);
  13988. +
  13989. + /*
  13990. + * In the case of futex requeue PI, this will be a proxy
  13991. + * lock. The task will wake unaware that it is enqueueed on
  13992. + * this lock. Avoid blocking on two locks and corrupting
  13993. + * pi_blocked_on via the PI_WAKEUP_INPROGRESS
  13994. + * flag. futex_wait_requeue_pi() sets this when it wakes up
  13995. + * before requeue (due to a signal or timeout). Do not enqueue
  13996. + * the task if PI_WAKEUP_INPROGRESS is set.
  13997. + */
  13998. + if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
  13999. + raw_spin_unlock(&task->pi_lock);
  14000. + return -EAGAIN;
  14001. + }
  14002. +
  14003. + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
  14004. +
  14005. + rt_mutex_adjust_prio(task);
  14006. + waiter->task = task;
  14007. + waiter->lock = lock;
  14008. + waiter->prio = task->prio;
  14009. + waiter->deadline = task->dl.deadline;
  14010. +
  14011. + /* Get the top priority waiter on the lock */
  14012. + if (rt_mutex_has_waiters(lock))
  14013. + top_waiter = rt_mutex_top_waiter(lock);
  14014. + rt_mutex_enqueue(lock, waiter);
  14015. +
  14016. + task->pi_blocked_on = waiter;
  14017. +
  14018. + raw_spin_unlock(&task->pi_lock);
  14019. +
  14020. + if (!owner)
  14021. + return 0;
  14022. +
  14023. + raw_spin_lock(&owner->pi_lock);
  14024. + if (waiter == rt_mutex_top_waiter(lock)) {
  14025. + rt_mutex_dequeue_pi(owner, top_waiter);
  14026. + rt_mutex_enqueue_pi(owner, waiter);
  14027. +
  14028. + rt_mutex_adjust_prio(owner);
  14029. + if (rt_mutex_real_waiter(owner->pi_blocked_on))
  14030. + chain_walk = 1;
  14031. + } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
  14032. + chain_walk = 1;
  14033. + }
  14034. +
  14035. + /* Store the lock on which owner is blocked or NULL */
  14036. + next_lock = task_blocked_on_lock(owner);
  14037. +
  14038. + raw_spin_unlock(&owner->pi_lock);
  14039. + /*
  14040. + * Even if full deadlock detection is on, if the owner is not
  14041. + * blocked itself, we can avoid finding this out in the chain
  14042. + * walk.
  14043. + */
  14044. + if (!chain_walk || !next_lock)
  14045. + return 0;
  14046. +
  14047. + /*
  14048. + * The owner can't disappear while holding a lock,
  14049. + * so the owner struct is protected by wait_lock.
  14050. + * Gets dropped in rt_mutex_adjust_prio_chain()!
  14051. + */
  14052. + get_task_struct(owner);
  14053. +
  14054. + raw_spin_unlock_irq(&lock->wait_lock);
  14055. +
  14056. + res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
  14057. + next_lock, waiter, task);
  14058. +
  14059. + raw_spin_lock_irq(&lock->wait_lock);
  14060. +
  14061. + return res;
  14062. +}
  14063. +
  14064. +/*
  14065. + * Remove the top waiter from the current tasks pi waiter tree and
  14066. + * queue it up.
  14067. + *
  14068. + * Called with lock->wait_lock held and interrupts disabled.
  14069. + */
  14070. +static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
  14071. + struct wake_q_head *wake_sleeper_q,
  14072. + struct rt_mutex *lock)
  14073. +{
  14074. + struct rt_mutex_waiter *waiter;
  14075. +
  14076. + raw_spin_lock(&current->pi_lock);
  14077. +
  14078. + waiter = rt_mutex_top_waiter(lock);
  14079. +
  14080. + /*
  14081. + * Remove it from current->pi_waiters and deboost.
  14082. + *
  14083. + * We must in fact deboost here in order to ensure we call
  14084. + * rt_mutex_setprio() to update p->pi_top_task before the
  14085. + * task unblocks.
  14086. + */
  14087. + rt_mutex_dequeue_pi(current, waiter);
  14088. + rt_mutex_adjust_prio(current);
  14089. +
  14090. + /*
  14091. + * As we are waking up the top waiter, and the waiter stays
  14092. + * queued on the lock until it gets the lock, this lock
  14093. + * obviously has waiters. Just set the bit here and this has
  14094. + * the added benefit of forcing all new tasks into the
  14095. + * slow path making sure no task of lower priority than
  14096. + * the top waiter can steal this lock.
  14097. + */
  14098. + lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
  14099. +
  14100. + /*
  14101. + * We deboosted before waking the top waiter task such that we don't
  14102. + * run two tasks with the 'same' priority (and ensure the
  14103. + * p->pi_top_task pointer points to a blocked task). This however can
  14104. + * lead to priority inversion if we would get preempted after the
  14105. + * deboost but before waking our donor task, hence the preempt_disable()
  14106. + * before unlock.
  14107. + *
  14108. + * Pairs with preempt_enable() in rt_mutex_postunlock();
  14109. + */
  14110. + preempt_disable();
  14111. + if (waiter->savestate)
  14112. + wake_q_add_sleeper(wake_sleeper_q, waiter->task);
  14113. + else
  14114. + wake_q_add(wake_q, waiter->task);
  14115. + raw_spin_unlock(&current->pi_lock);
  14116. +}
  14117. +
  14118. +/*
  14119. + * Remove a waiter from a lock and give up
  14120. + *
  14121. + * Must be called with lock->wait_lock held and interrupts disabled. I must
  14122. + * have just failed to try_to_take_rt_mutex().
  14123. + */
  14124. +static void remove_waiter(struct rt_mutex *lock,
  14125. + struct rt_mutex_waiter *waiter)
  14126. +{
  14127. + bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
  14128. + struct task_struct *owner = rt_mutex_owner(lock);
  14129. + struct rt_mutex *next_lock = NULL;
  14130. +
  14131. + lockdep_assert_held(&lock->wait_lock);
  14132. +
  14133. + raw_spin_lock(&current->pi_lock);
  14134. + rt_mutex_dequeue(lock, waiter);
  14135. + current->pi_blocked_on = NULL;
  14136. + raw_spin_unlock(&current->pi_lock);
  14137. +
  14138. + /*
  14139. + * Only update priority if the waiter was the highest priority
  14140. + * waiter of the lock and there is an owner to update.
  14141. + */
  14142. + if (!owner || !is_top_waiter)
  14143. + return;
  14144. +
  14145. + raw_spin_lock(&owner->pi_lock);
  14146. +
  14147. + rt_mutex_dequeue_pi(owner, waiter);
  14148. +
  14149. + if (rt_mutex_has_waiters(lock))
  14150. + rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
  14151. +
  14152. + rt_mutex_adjust_prio(owner);
  14153. +
  14154. + /* Store the lock on which owner is blocked or NULL */
  14155. + if (rt_mutex_real_waiter(owner->pi_blocked_on))
  14156. + next_lock = task_blocked_on_lock(owner);
  14157. +
  14158. + raw_spin_unlock(&owner->pi_lock);
  14159. +
  14160. + /*
  14161. * Don't walk the chain, if the owner task is not blocked
  14162. * itself.
  14163. */
  14164. @@ -1138,21 +1586,30 @@ void rt_mutex_adjust_pi(struct task_struct *task)
  14165. raw_spin_lock_irqsave(&task->pi_lock, flags);
  14166. waiter = task->pi_blocked_on;
  14167. - if (!waiter || (waiter->prio == task->prio &&
  14168. - !dl_prio(task->prio))) {
  14169. + if (!rt_mutex_real_waiter(waiter) ||
  14170. + rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
  14171. raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  14172. return;
  14173. }
  14174. next_lock = waiter->lock;
  14175. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  14176. /* gets dropped in rt_mutex_adjust_prio_chain()! */
  14177. get_task_struct(task);
  14178. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  14179. rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
  14180. next_lock, NULL, task);
  14181. }
  14182. +void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
  14183. +{
  14184. + debug_rt_mutex_init_waiter(waiter);
  14185. + RB_CLEAR_NODE(&waiter->pi_tree_entry);
  14186. + RB_CLEAR_NODE(&waiter->tree_entry);
  14187. + waiter->task = NULL;
  14188. + waiter->savestate = savestate;
  14189. +}
  14190. +
  14191. /**
  14192. * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
  14193. * @lock: the rt_mutex to take
  14194. @@ -1166,7 +1623,8 @@ void rt_mutex_adjust_pi(struct task_struct *task)
  14195. static int __sched
  14196. __rt_mutex_slowlock(struct rt_mutex *lock, int state,
  14197. struct hrtimer_sleeper *timeout,
  14198. - struct rt_mutex_waiter *waiter)
  14199. + struct rt_mutex_waiter *waiter,
  14200. + struct ww_acquire_ctx *ww_ctx)
  14201. {
  14202. int ret = 0;
  14203. @@ -1175,16 +1633,17 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
  14204. if (try_to_take_rt_mutex(lock, current, waiter))
  14205. break;
  14206. - /*
  14207. - * TASK_INTERRUPTIBLE checks for signals and
  14208. - * timeout. Ignored otherwise.
  14209. - */
  14210. - if (unlikely(state == TASK_INTERRUPTIBLE)) {
  14211. - /* Signal pending? */
  14212. - if (signal_pending(current))
  14213. - ret = -EINTR;
  14214. - if (timeout && !timeout->task)
  14215. - ret = -ETIMEDOUT;
  14216. + if (timeout && !timeout->task) {
  14217. + ret = -ETIMEDOUT;
  14218. + break;
  14219. + }
  14220. + if (signal_pending_state(state, current)) {
  14221. + ret = -EINTR;
  14222. + break;
  14223. + }
  14224. +
  14225. + if (ww_ctx && ww_ctx->acquired > 0) {
  14226. + ret = __mutex_lock_check_stamp(lock, ww_ctx);
  14227. if (ret)
  14228. break;
  14229. }
  14230. @@ -1223,35 +1682,94 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
  14231. }
  14232. }
  14233. -/*
  14234. - * Slow path lock function:
  14235. - */
  14236. -static int __sched
  14237. -rt_mutex_slowlock(struct rt_mutex *lock, int state,
  14238. - struct hrtimer_sleeper *timeout,
  14239. - enum rtmutex_chainwalk chwalk)
  14240. +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
  14241. + struct ww_acquire_ctx *ww_ctx)
  14242. +{
  14243. +#ifdef CONFIG_DEBUG_MUTEXES
  14244. + /*
  14245. + * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
  14246. + * but released with a normal mutex_unlock in this call.
  14247. + *
  14248. + * This should never happen, always use ww_mutex_unlock.
  14249. + */
  14250. + DEBUG_LOCKS_WARN_ON(ww->ctx);
  14251. +
  14252. + /*
  14253. + * Not quite done after calling ww_acquire_done() ?
  14254. + */
  14255. + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
  14256. +
  14257. + if (ww_ctx->contending_lock) {
  14258. + /*
  14259. + * After -EDEADLK you tried to
  14260. + * acquire a different ww_mutex? Bad!
  14261. + */
  14262. + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
  14263. +
  14264. + /*
  14265. + * You called ww_mutex_lock after receiving -EDEADLK,
  14266. + * but 'forgot' to unlock everything else first?
  14267. + */
  14268. + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
  14269. + ww_ctx->contending_lock = NULL;
  14270. + }
  14271. +
  14272. + /*
  14273. + * Naughty, using a different class will lead to undefined behavior!
  14274. + */
  14275. + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
  14276. +#endif
  14277. + ww_ctx->acquired++;
  14278. +}
  14279. +
  14280. +#ifdef CONFIG_PREEMPT_RT_FULL
  14281. +static void ww_mutex_account_lock(struct rt_mutex *lock,
  14282. + struct ww_acquire_ctx *ww_ctx)
  14283. {
  14284. - struct rt_mutex_waiter waiter;
  14285. - unsigned long flags;
  14286. - int ret = 0;
  14287. + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
  14288. + struct rt_mutex_waiter *waiter, *n;
  14289. - debug_rt_mutex_init_waiter(&waiter);
  14290. - RB_CLEAR_NODE(&waiter.pi_tree_entry);
  14291. - RB_CLEAR_NODE(&waiter.tree_entry);
  14292. + /*
  14293. + * This branch gets optimized out for the common case,
  14294. + * and is only important for ww_mutex_lock.
  14295. + */
  14296. + ww_mutex_lock_acquired(ww, ww_ctx);
  14297. + ww->ctx = ww_ctx;
  14298. /*
  14299. - * Technically we could use raw_spin_[un]lock_irq() here, but this can
  14300. - * be called in early boot if the cmpxchg() fast path is disabled
  14301. - * (debug, no architecture support). In this case we will acquire the
  14302. - * rtmutex with lock->wait_lock held. But we cannot unconditionally
  14303. - * enable interrupts in that early boot case. So we need to use the
  14304. - * irqsave/restore variants.
  14305. + * Give any possible sleeping processes the chance to wake up,
  14306. + * so they can recheck if they have to back off.
  14307. */
  14308. - raw_spin_lock_irqsave(&lock->wait_lock, flags);
  14309. + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters,
  14310. + tree_entry) {
  14311. + /* XXX debug rt mutex waiter wakeup */
  14312. +
  14313. + BUG_ON(waiter->lock != lock);
  14314. + rt_mutex_wake_waiter(waiter);
  14315. + }
  14316. +}
  14317. +
  14318. +#else
  14319. +
  14320. +static void ww_mutex_account_lock(struct rt_mutex *lock,
  14321. + struct ww_acquire_ctx *ww_ctx)
  14322. +{
  14323. + BUG();
  14324. +}
  14325. +#endif
  14326. +
  14327. +int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state,
  14328. + struct hrtimer_sleeper *timeout,
  14329. + enum rtmutex_chainwalk chwalk,
  14330. + struct ww_acquire_ctx *ww_ctx,
  14331. + struct rt_mutex_waiter *waiter)
  14332. +{
  14333. + int ret;
  14334. /* Try to acquire the lock again: */
  14335. if (try_to_take_rt_mutex(lock, current, NULL)) {
  14336. - raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  14337. + if (ww_ctx)
  14338. + ww_mutex_account_lock(lock, ww_ctx);
  14339. return 0;
  14340. }
  14341. @@ -1261,17 +1779,27 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
  14342. if (unlikely(timeout))
  14343. hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
  14344. - ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
  14345. + ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk);
  14346. - if (likely(!ret))
  14347. + if (likely(!ret)) {
  14348. /* sleep on the mutex */
  14349. - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
  14350. + ret = __rt_mutex_slowlock(lock, state, timeout, waiter,
  14351. + ww_ctx);
  14352. + } else if (ww_ctx) {
  14353. + /* ww_mutex received EDEADLK, let it become EALREADY */
  14354. + ret = __mutex_lock_check_stamp(lock, ww_ctx);
  14355. + BUG_ON(!ret);
  14356. + }
  14357. if (unlikely(ret)) {
  14358. __set_current_state(TASK_RUNNING);
  14359. if (rt_mutex_has_waiters(lock))
  14360. - remove_waiter(lock, &waiter);
  14361. - rt_mutex_handle_deadlock(ret, chwalk, &waiter);
  14362. + remove_waiter(lock, waiter);
  14363. + /* ww_mutex want to report EDEADLK/EALREADY, let them */
  14364. + if (!ww_ctx)
  14365. + rt_mutex_handle_deadlock(ret, chwalk, waiter);
  14366. + } else if (ww_ctx) {
  14367. + ww_mutex_account_lock(lock, ww_ctx);
  14368. }
  14369. /*
  14370. @@ -1279,6 +1807,36 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
  14371. * unconditionally. We might have to fix that up.
  14372. */
  14373. fixup_rt_mutex_waiters(lock);
  14374. + return ret;
  14375. +}
  14376. +
  14377. +/*
  14378. + * Slow path lock function:
  14379. + */
  14380. +static int __sched
  14381. +rt_mutex_slowlock(struct rt_mutex *lock, int state,
  14382. + struct hrtimer_sleeper *timeout,
  14383. + enum rtmutex_chainwalk chwalk,
  14384. + struct ww_acquire_ctx *ww_ctx)
  14385. +{
  14386. + struct rt_mutex_waiter waiter;
  14387. + unsigned long flags;
  14388. + int ret = 0;
  14389. +
  14390. + rt_mutex_init_waiter(&waiter, false);
  14391. +
  14392. + /*
  14393. + * Technically we could use raw_spin_[un]lock_irq() here, but this can
  14394. + * be called in early boot if the cmpxchg() fast path is disabled
  14395. + * (debug, no architecture support). In this case we will acquire the
  14396. + * rtmutex with lock->wait_lock held. But we cannot unconditionally
  14397. + * enable interrupts in that early boot case. So we need to use the
  14398. + * irqsave/restore variants.
  14399. + */
  14400. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  14401. +
  14402. + ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, ww_ctx,
  14403. + &waiter);
  14404. raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  14405. @@ -1328,10 +1886,12 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
  14406. /*
  14407. * Slow path to release a rt-mutex.
  14408. - * Return whether the current task needs to undo a potential priority boosting.
  14409. + *
  14410. + * Return whether the current task needs to call rt_mutex_postunlock().
  14411. */
  14412. static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
  14413. - struct wake_q_head *wake_q)
  14414. + struct wake_q_head *wake_q,
  14415. + struct wake_q_head *wake_sleeper_q)
  14416. {
  14417. unsigned long flags;
  14418. @@ -1340,8 +1900,6 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
  14419. debug_rt_mutex_unlock(lock);
  14420. - rt_mutex_deadlock_account_unlock(current);
  14421. -
  14422. /*
  14423. * We must be careful here if the fast path is enabled. If we
  14424. * have no waiters queued we cannot set owner to NULL here
  14425. @@ -1387,12 +1945,10 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
  14426. *
  14427. * Queue the next waiter for wakeup once we release the wait_lock.
  14428. */
  14429. - mark_wakeup_next_waiter(wake_q, lock);
  14430. -
  14431. + mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock);
  14432. raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  14433. - /* check PI boosting */
  14434. - return true;
  14435. + return true; /* call rt_mutex_postunlock() */
  14436. }
  14437. /*
  14438. @@ -1403,63 +1959,97 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
  14439. */
  14440. static inline int
  14441. rt_mutex_fastlock(struct rt_mutex *lock, int state,
  14442. + struct ww_acquire_ctx *ww_ctx,
  14443. int (*slowfn)(struct rt_mutex *lock, int state,
  14444. struct hrtimer_sleeper *timeout,
  14445. - enum rtmutex_chainwalk chwalk))
  14446. + enum rtmutex_chainwalk chwalk,
  14447. + struct ww_acquire_ctx *ww_ctx))
  14448. {
  14449. - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
  14450. - rt_mutex_deadlock_account_lock(lock, current);
  14451. + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
  14452. return 0;
  14453. - } else
  14454. - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
  14455. +
  14456. + /*
  14457. + * If rt_mutex blocks, the function sched_submit_work will not call
  14458. + * blk_schedule_flush_plug (because tsk_is_pi_blocked would be true).
  14459. + * We must call blk_schedule_flush_plug here, if we don't call it,
  14460. + * a deadlock in device mapper may happen.
  14461. + */
  14462. + if (unlikely(blk_needs_flush_plug(current)))
  14463. + blk_schedule_flush_plug(current);
  14464. +
  14465. + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx);
  14466. }
  14467. static inline int
  14468. rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
  14469. struct hrtimer_sleeper *timeout,
  14470. enum rtmutex_chainwalk chwalk,
  14471. + struct ww_acquire_ctx *ww_ctx,
  14472. int (*slowfn)(struct rt_mutex *lock, int state,
  14473. struct hrtimer_sleeper *timeout,
  14474. - enum rtmutex_chainwalk chwalk))
  14475. + enum rtmutex_chainwalk chwalk,
  14476. + struct ww_acquire_ctx *ww_ctx))
  14477. {
  14478. if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
  14479. - likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
  14480. - rt_mutex_deadlock_account_lock(lock, current);
  14481. + likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
  14482. return 0;
  14483. - } else
  14484. - return slowfn(lock, state, timeout, chwalk);
  14485. +
  14486. + if (unlikely(blk_needs_flush_plug(current)))
  14487. + blk_schedule_flush_plug(current);
  14488. +
  14489. + return slowfn(lock, state, timeout, chwalk, ww_ctx);
  14490. }
  14491. static inline int
  14492. rt_mutex_fasttrylock(struct rt_mutex *lock,
  14493. int (*slowfn)(struct rt_mutex *lock))
  14494. {
  14495. - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
  14496. - rt_mutex_deadlock_account_lock(lock, current);
  14497. + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
  14498. return 1;
  14499. - }
  14500. +
  14501. return slowfn(lock);
  14502. }
  14503. +/*
  14504. + * Performs the wakeup of the the top-waiter and re-enables preemption.
  14505. + */
  14506. +void rt_mutex_postunlock(struct wake_q_head *wake_q,
  14507. + struct wake_q_head *wq_sleeper)
  14508. +{
  14509. + wake_up_q(wake_q);
  14510. + wake_up_q_sleeper(wq_sleeper);
  14511. +
  14512. + /* Pairs with preempt_disable() in rt_mutex_slowunlock() */
  14513. + preempt_enable();
  14514. +}
  14515. +
  14516. static inline void
  14517. rt_mutex_fastunlock(struct rt_mutex *lock,
  14518. bool (*slowfn)(struct rt_mutex *lock,
  14519. - struct wake_q_head *wqh))
  14520. + struct wake_q_head *wqh,
  14521. + struct wake_q_head *wq_sleeper))
  14522. {
  14523. WAKE_Q(wake_q);
  14524. + WAKE_Q(wake_sleeper_q);
  14525. - if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
  14526. - rt_mutex_deadlock_account_unlock(current);
  14527. + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
  14528. + return;
  14529. - } else {
  14530. - bool deboost = slowfn(lock, &wake_q);
  14531. + if (slowfn(lock, &wake_q, &wake_sleeper_q))
  14532. + rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
  14533. +}
  14534. - wake_up_q(&wake_q);
  14535. +/**
  14536. + * rt_mutex_lock_state - lock a rt_mutex with a given state
  14537. + *
  14538. + * @lock: The rt_mutex to be locked
  14539. + * @state: The state to set when blocking on the rt_mutex
  14540. + */
  14541. +int __sched rt_mutex_lock_state(struct rt_mutex *lock, int state)
  14542. +{
  14543. + might_sleep();
  14544. - /* Undo pi boosting if necessary: */
  14545. - if (deboost)
  14546. - rt_mutex_adjust_prio(current);
  14547. - }
  14548. + return rt_mutex_fastlock(lock, state, NULL, rt_mutex_slowlock);
  14549. }
  14550. /**
  14551. @@ -1469,15 +2059,13 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
  14552. */
  14553. void __sched rt_mutex_lock(struct rt_mutex *lock)
  14554. {
  14555. - might_sleep();
  14556. -
  14557. - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
  14558. + rt_mutex_lock_state(lock, TASK_UNINTERRUPTIBLE);
  14559. }
  14560. EXPORT_SYMBOL_GPL(rt_mutex_lock);
  14561. /**
  14562. * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
  14563. - *
  14564. + **
  14565. * @lock: the rt_mutex to be locked
  14566. *
  14567. * Returns:
  14568. @@ -1486,23 +2074,32 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
  14569. */
  14570. int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
  14571. {
  14572. - might_sleep();
  14573. -
  14574. - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
  14575. + return rt_mutex_lock_state(lock, TASK_INTERRUPTIBLE);
  14576. }
  14577. EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
  14578. -/*
  14579. - * Futex variant with full deadlock detection.
  14580. +/**
  14581. + * rt_mutex_lock_killable - lock a rt_mutex killable
  14582. + *
  14583. + * @lock: the rt_mutex to be locked
  14584. + * @detect_deadlock: deadlock detection on/off
  14585. + *
  14586. + * Returns:
  14587. + * 0 on success
  14588. + * -EINTR when interrupted by a signal
  14589. */
  14590. -int rt_mutex_timed_futex_lock(struct rt_mutex *lock,
  14591. - struct hrtimer_sleeper *timeout)
  14592. +int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
  14593. {
  14594. - might_sleep();
  14595. + return rt_mutex_lock_state(lock, TASK_KILLABLE);
  14596. +}
  14597. +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
  14598. - return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  14599. - RT_MUTEX_FULL_CHAINWALK,
  14600. - rt_mutex_slowlock);
  14601. +/*
  14602. + * Futex variant, must not use fastpath.
  14603. + */
  14604. +int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
  14605. +{
  14606. + return rt_mutex_slowtrylock(lock);
  14607. }
  14608. /**
  14609. @@ -1525,6 +2122,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
  14610. return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  14611. RT_MUTEX_MIN_CHAINWALK,
  14612. + NULL,
  14613. rt_mutex_slowlock);
  14614. }
  14615. EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  14616. @@ -1542,7 +2140,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  14617. */
  14618. int __sched rt_mutex_trylock(struct rt_mutex *lock)
  14619. {
  14620. +#ifdef CONFIG_PREEMPT_RT_FULL
  14621. + if (WARN_ON_ONCE(in_irq() || in_nmi()))
  14622. +#else
  14623. if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
  14624. +#endif
  14625. return 0;
  14626. return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
  14627. @@ -1560,21 +2162,53 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock)
  14628. }
  14629. EXPORT_SYMBOL_GPL(rt_mutex_unlock);
  14630. +static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock,
  14631. + struct wake_q_head *wake_q,
  14632. + struct wake_q_head *wq_sleeper)
  14633. +{
  14634. + lockdep_assert_held(&lock->wait_lock);
  14635. +
  14636. + debug_rt_mutex_unlock(lock);
  14637. +
  14638. + if (!rt_mutex_has_waiters(lock)) {
  14639. + lock->owner = NULL;
  14640. + return false; /* done */
  14641. + }
  14642. +
  14643. + /*
  14644. + * We've already deboosted, mark_wakeup_next_waiter() will
  14645. + * retain preempt_disabled when we drop the wait_lock, to
  14646. + * avoid inversion prior to the wakeup. preempt_disable()
  14647. + * therein pairs with rt_mutex_postunlock().
  14648. + */
  14649. + mark_wakeup_next_waiter(wake_q, wq_sleeper, lock);
  14650. +
  14651. + return true; /* call postunlock() */
  14652. +}
  14653. +
  14654. /**
  14655. - * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock
  14656. - * @lock: the rt_mutex to be unlocked
  14657. - *
  14658. - * Returns: true/false indicating whether priority adjustment is
  14659. - * required or not.
  14660. + * Futex variant, that since futex variants do not use the fast-path, can be
  14661. + * simple and will not need to retry.
  14662. */
  14663. -bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
  14664. - struct wake_q_head *wqh)
  14665. +bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
  14666. + struct wake_q_head *wake_q,
  14667. + struct wake_q_head *wq_sleeper)
  14668. {
  14669. - if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
  14670. - rt_mutex_deadlock_account_unlock(current);
  14671. - return false;
  14672. - }
  14673. - return rt_mutex_slowunlock(lock, wqh);
  14674. + return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper);
  14675. +}
  14676. +
  14677. +void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
  14678. +{
  14679. + WAKE_Q(wake_q);
  14680. + WAKE_Q(wake_sleeper_q);
  14681. + bool postunlock;
  14682. +
  14683. + raw_spin_lock_irq(&lock->wait_lock);
  14684. + postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q);
  14685. + raw_spin_unlock_irq(&lock->wait_lock);
  14686. +
  14687. + if (postunlock)
  14688. + rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
  14689. }
  14690. /**
  14691. @@ -1607,13 +2241,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
  14692. void __rt_mutex_init(struct rt_mutex *lock, const char *name)
  14693. {
  14694. lock->owner = NULL;
  14695. - raw_spin_lock_init(&lock->wait_lock);
  14696. lock->waiters = RB_ROOT;
  14697. lock->waiters_leftmost = NULL;
  14698. debug_rt_mutex_init(lock, name);
  14699. }
  14700. -EXPORT_SYMBOL_GPL(__rt_mutex_init);
  14701. +EXPORT_SYMBOL(__rt_mutex_init);
  14702. /**
  14703. * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
  14704. @@ -1628,10 +2261,9 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
  14705. void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  14706. struct task_struct *proxy_owner)
  14707. {
  14708. - __rt_mutex_init(lock, NULL);
  14709. + rt_mutex_init(lock);
  14710. debug_rt_mutex_proxy_lock(lock, proxy_owner);
  14711. rt_mutex_set_owner(lock, proxy_owner);
  14712. - rt_mutex_deadlock_account_lock(lock, proxy_owner);
  14713. }
  14714. /**
  14715. @@ -1647,34 +2279,44 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
  14716. {
  14717. debug_rt_mutex_proxy_unlock(lock);
  14718. rt_mutex_set_owner(lock, NULL);
  14719. - rt_mutex_deadlock_account_unlock(proxy_owner);
  14720. }
  14721. -/**
  14722. - * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
  14723. - * @lock: the rt_mutex to take
  14724. - * @waiter: the pre-initialized rt_mutex_waiter
  14725. - * @task: the task to prepare
  14726. - *
  14727. - * Returns:
  14728. - * 0 - task blocked on lock
  14729. - * 1 - acquired the lock for task, caller should wake it up
  14730. - * <0 - error
  14731. - *
  14732. - * Special API call for FUTEX_REQUEUE_PI support.
  14733. - */
  14734. -int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  14735. +int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  14736. struct rt_mutex_waiter *waiter,
  14737. struct task_struct *task)
  14738. {
  14739. int ret;
  14740. - raw_spin_lock_irq(&lock->wait_lock);
  14741. -
  14742. - if (try_to_take_rt_mutex(lock, task, NULL)) {
  14743. - raw_spin_unlock_irq(&lock->wait_lock);
  14744. + if (try_to_take_rt_mutex(lock, task, NULL))
  14745. return 1;
  14746. +
  14747. +#ifdef CONFIG_PREEMPT_RT_FULL
  14748. + /*
  14749. + * In PREEMPT_RT there's an added race.
  14750. + * If the task, that we are about to requeue, times out,
  14751. + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
  14752. + * to skip this task. But right after the task sets
  14753. + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
  14754. + * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
  14755. + * This will replace the PI_WAKEUP_INPROGRESS with the actual
  14756. + * lock that it blocks on. We *must not* place this task
  14757. + * on this proxy lock in that case.
  14758. + *
  14759. + * To prevent this race, we first take the task's pi_lock
  14760. + * and check if it has updated its pi_blocked_on. If it has,
  14761. + * we assume that it woke up and we return -EAGAIN.
  14762. + * Otherwise, we set the task's pi_blocked_on to
  14763. + * PI_REQUEUE_INPROGRESS, so that if the task is waking up
  14764. + * it will know that we are in the process of requeuing it.
  14765. + */
  14766. + raw_spin_lock(&task->pi_lock);
  14767. + if (task->pi_blocked_on) {
  14768. + raw_spin_unlock(&task->pi_lock);
  14769. + return -EAGAIN;
  14770. }
  14771. + task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
  14772. + raw_spin_unlock(&task->pi_lock);
  14773. +#endif
  14774. /* We enforce deadlock detection for futexes */
  14775. ret = task_blocks_on_rt_mutex(lock, waiter, task,
  14776. @@ -1690,16 +2332,40 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  14777. ret = 0;
  14778. }
  14779. - if (unlikely(ret))
  14780. + if (ret && rt_mutex_has_waiters(lock))
  14781. remove_waiter(lock, waiter);
  14782. - raw_spin_unlock_irq(&lock->wait_lock);
  14783. -
  14784. debug_rt_mutex_print_deadlock(waiter);
  14785. return ret;
  14786. }
  14787. +/**
  14788. + * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
  14789. + * @lock: the rt_mutex to take
  14790. + * @waiter: the pre-initialized rt_mutex_waiter
  14791. + * @task: the task to prepare
  14792. + *
  14793. + * Returns:
  14794. + * 0 - task blocked on lock
  14795. + * 1 - acquired the lock for task, caller should wake it up
  14796. + * <0 - error
  14797. + *
  14798. + * Special API call for FUTEX_REQUEUE_PI support.
  14799. + */
  14800. +int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  14801. + struct rt_mutex_waiter *waiter,
  14802. + struct task_struct *task)
  14803. +{
  14804. + int ret;
  14805. +
  14806. + raw_spin_lock_irq(&lock->wait_lock);
  14807. + ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
  14808. + raw_spin_unlock_irq(&lock->wait_lock);
  14809. +
  14810. + return ret;
  14811. +}
  14812. +
  14813. /**
  14814. * rt_mutex_next_owner - return the next owner of the lock
  14815. *
  14816. @@ -1721,36 +2387,106 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
  14817. }
  14818. /**
  14819. - * rt_mutex_finish_proxy_lock() - Complete lock acquisition
  14820. + * rt_mutex_wait_proxy_lock() - Wait for lock acquisition
  14821. * @lock: the rt_mutex we were woken on
  14822. * @to: the timeout, null if none. hrtimer should already have
  14823. * been started.
  14824. * @waiter: the pre-initialized rt_mutex_waiter
  14825. *
  14826. - * Complete the lock acquisition started our behalf by another thread.
  14827. + * Wait for the the lock acquisition started on our behalf by
  14828. + * rt_mutex_start_proxy_lock(). Upon failure, the caller must call
  14829. + * rt_mutex_cleanup_proxy_lock().
  14830. *
  14831. * Returns:
  14832. * 0 - success
  14833. * <0 - error, one of -EINTR, -ETIMEDOUT
  14834. *
  14835. - * Special API call for PI-futex requeue support
  14836. + * Special API call for PI-futex support
  14837. */
  14838. -int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
  14839. +int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
  14840. struct hrtimer_sleeper *to,
  14841. struct rt_mutex_waiter *waiter)
  14842. {
  14843. + struct task_struct *tsk = current;
  14844. int ret;
  14845. raw_spin_lock_irq(&lock->wait_lock);
  14846. -
  14847. + /* sleep on the mutex */
  14848. set_current_state(TASK_INTERRUPTIBLE);
  14849. + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
  14850. + /*
  14851. + * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
  14852. + * have to fix that up.
  14853. + */
  14854. + fixup_rt_mutex_waiters(lock);
  14855. - /* sleep on the mutex */
  14856. - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
  14857. + /*
  14858. + * RT has a problem here when the wait got interrupted by a timeout
  14859. + * or a signal. task->pi_blocked_on is still set. The task must
  14860. + * acquire the hash bucket lock when returning from this function.
  14861. + *
  14862. + * If the hash bucket lock is contended then the
  14863. + * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in
  14864. + * task_blocks_on_rt_mutex() will trigger. This can be avoided by
  14865. + * clearing task->pi_blocked_on which removes the task from the
  14866. + * boosting chain of the rtmutex. That's correct because the task
  14867. + * is not longer blocked on it.
  14868. + */
  14869. + if (ret) {
  14870. + raw_spin_lock(&tsk->pi_lock);
  14871. + tsk->pi_blocked_on = NULL;
  14872. + raw_spin_unlock(&tsk->pi_lock);
  14873. + }
  14874. + raw_spin_unlock_irq(&lock->wait_lock);
  14875. - if (unlikely(ret))
  14876. - remove_waiter(lock, waiter);
  14877. + return ret;
  14878. +}
  14879. +
  14880. +/**
  14881. + * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition
  14882. + * @lock: the rt_mutex we were woken on
  14883. + * @waiter: the pre-initialized rt_mutex_waiter
  14884. + *
  14885. + * Attempt to clean up after a failed rt_mutex_wait_proxy_lock().
  14886. + *
  14887. + * Unless we acquired the lock; we're still enqueued on the wait-list and can
  14888. + * in fact still be granted ownership until we're removed. Therefore we can
  14889. + * find we are in fact the owner and must disregard the
  14890. + * rt_mutex_wait_proxy_lock() failure.
  14891. + *
  14892. + * Returns:
  14893. + * true - did the cleanup, we done.
  14894. + * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned,
  14895. + * caller should disregards its return value.
  14896. + *
  14897. + * Special API call for PI-futex support
  14898. + */
  14899. +bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
  14900. + struct rt_mutex_waiter *waiter)
  14901. +{
  14902. + bool cleanup = false;
  14903. + raw_spin_lock_irq(&lock->wait_lock);
  14904. + /*
  14905. + * Do an unconditional try-lock, this deals with the lock stealing
  14906. + * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()
  14907. + * sets a NULL owner.
  14908. + *
  14909. + * We're not interested in the return value, because the subsequent
  14910. + * test on rt_mutex_owner() will infer that. If the trylock succeeded,
  14911. + * we will own the lock and it will have removed the waiter. If we
  14912. + * failed the trylock, we're still not owner and we need to remove
  14913. + * ourselves.
  14914. + */
  14915. + try_to_take_rt_mutex(lock, current, waiter);
  14916. + /*
  14917. + * Unless we're the owner; we're still enqueued on the wait_list.
  14918. + * So check if we became owner, if not, take us off the wait_list.
  14919. + */
  14920. + if (rt_mutex_owner(lock) != current) {
  14921. + remove_waiter(lock, waiter);
  14922. + cleanup = true;
  14923. + }
  14924. /*
  14925. * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
  14926. * have to fix that up.
  14927. @@ -1759,5 +2495,91 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
  14928. raw_spin_unlock_irq(&lock->wait_lock);
  14929. + return cleanup;
  14930. +}
  14931. +
  14932. +static inline int
  14933. +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
  14934. +{
  14935. +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
  14936. + unsigned tmp;
  14937. +
  14938. + if (ctx->deadlock_inject_countdown-- == 0) {
  14939. + tmp = ctx->deadlock_inject_interval;
  14940. + if (tmp > UINT_MAX/4)
  14941. + tmp = UINT_MAX;
  14942. + else
  14943. + tmp = tmp*2 + tmp + tmp/2;
  14944. +
  14945. + ctx->deadlock_inject_interval = tmp;
  14946. + ctx->deadlock_inject_countdown = tmp;
  14947. + ctx->contending_lock = lock;
  14948. +
  14949. + ww_mutex_unlock(lock);
  14950. +
  14951. + return -EDEADLK;
  14952. + }
  14953. +#endif
  14954. +
  14955. + return 0;
  14956. +}
  14957. +
  14958. +#ifdef CONFIG_PREEMPT_RT_FULL
  14959. +int __sched
  14960. +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
  14961. +{
  14962. + int ret;
  14963. +
  14964. + might_sleep();
  14965. +
  14966. + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
  14967. + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx);
  14968. + if (ret)
  14969. + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
  14970. + else if (!ret && ww_ctx->acquired > 1)
  14971. + return ww_mutex_deadlock_injection(lock, ww_ctx);
  14972. +
  14973. + return ret;
  14974. +}
  14975. +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
  14976. +
  14977. +int __sched
  14978. +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
  14979. +{
  14980. + int ret;
  14981. +
  14982. + might_sleep();
  14983. +
  14984. + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
  14985. + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx);
  14986. + if (ret)
  14987. + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
  14988. + else if (!ret && ww_ctx->acquired > 1)
  14989. + return ww_mutex_deadlock_injection(lock, ww_ctx);
  14990. +
  14991. return ret;
  14992. }
  14993. +EXPORT_SYMBOL_GPL(__ww_mutex_lock);
  14994. +
  14995. +void __sched ww_mutex_unlock(struct ww_mutex *lock)
  14996. +{
  14997. + int nest = !!lock->ctx;
  14998. +
  14999. + /*
  15000. + * The unlocking fastpath is the 0->1 transition from 'locked'
  15001. + * into 'unlocked' state:
  15002. + */
  15003. + if (nest) {
  15004. +#ifdef CONFIG_DEBUG_MUTEXES
  15005. + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
  15006. +#endif
  15007. + if (lock->ctx->acquired > 0)
  15008. + lock->ctx->acquired--;
  15009. + lock->ctx = NULL;
  15010. + }
  15011. +
  15012. + mutex_release(&lock->base.dep_map, nest, _RET_IP_);
  15013. + rt_mutex_unlock(&lock->base.lock);
  15014. +}
  15015. +EXPORT_SYMBOL(ww_mutex_unlock);
  15016. +#endif
  15017. diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h
  15018. index c4060584c407..6607802efa8b 100644
  15019. --- a/kernel/locking/rtmutex.h
  15020. +++ b/kernel/locking/rtmutex.h
  15021. @@ -11,8 +11,6 @@
  15022. */
  15023. #define rt_mutex_deadlock_check(l) (0)
  15024. -#define rt_mutex_deadlock_account_lock(m, t) do { } while (0)
  15025. -#define rt_mutex_deadlock_account_unlock(l) do { } while (0)
  15026. #define debug_rt_mutex_init_waiter(w) do { } while (0)
  15027. #define debug_rt_mutex_free_waiter(w) do { } while (0)
  15028. #define debug_rt_mutex_lock(l) do { } while (0)
  15029. diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
  15030. index e317e1cbb3eb..64d89d780059 100644
  15031. --- a/kernel/locking/rtmutex_common.h
  15032. +++ b/kernel/locking/rtmutex_common.h
  15033. @@ -27,12 +27,14 @@ struct rt_mutex_waiter {
  15034. struct rb_node pi_tree_entry;
  15035. struct task_struct *task;
  15036. struct rt_mutex *lock;
  15037. + bool savestate;
  15038. #ifdef CONFIG_DEBUG_RT_MUTEXES
  15039. unsigned long ip;
  15040. struct pid *deadlock_task_pid;
  15041. struct rt_mutex *deadlock_lock;
  15042. #endif
  15043. int prio;
  15044. + u64 deadline;
  15045. };
  15046. /*
  15047. @@ -98,21 +100,45 @@ enum rtmutex_chainwalk {
  15048. /*
  15049. * PI-futex support (proxy locking functions, etc.):
  15050. */
  15051. +#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
  15052. +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
  15053. +
  15054. extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
  15055. extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  15056. struct task_struct *proxy_owner);
  15057. extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
  15058. struct task_struct *proxy_owner);
  15059. +extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savetate);
  15060. +extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  15061. + struct rt_mutex_waiter *waiter,
  15062. + struct task_struct *task);
  15063. extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  15064. struct rt_mutex_waiter *waiter,
  15065. struct task_struct *task);
  15066. -extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
  15067. - struct hrtimer_sleeper *to,
  15068. - struct rt_mutex_waiter *waiter);
  15069. -extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
  15070. -extern bool rt_mutex_futex_unlock(struct rt_mutex *lock,
  15071. - struct wake_q_head *wqh);
  15072. -extern void rt_mutex_adjust_prio(struct task_struct *task);
  15073. +extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
  15074. + struct hrtimer_sleeper *to,
  15075. + struct rt_mutex_waiter *waiter);
  15076. +extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
  15077. + struct rt_mutex_waiter *waiter);
  15078. +
  15079. +extern int rt_mutex_futex_trylock(struct rt_mutex *l);
  15080. +
  15081. +extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
  15082. +extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
  15083. + struct wake_q_head *wqh,
  15084. + struct wake_q_head *wq_sleeper);
  15085. +
  15086. +extern void rt_mutex_postunlock(struct wake_q_head *wake_q,
  15087. + struct wake_q_head *wq_sleeper);
  15088. +
  15089. +/* RW semaphore special interface */
  15090. +struct ww_acquire_ctx;
  15091. +
  15092. +int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state,
  15093. + struct hrtimer_sleeper *timeout,
  15094. + enum rtmutex_chainwalk chwalk,
  15095. + struct ww_acquire_ctx *ww_ctx,
  15096. + struct rt_mutex_waiter *waiter);
  15097. #ifdef CONFIG_DEBUG_RT_MUTEXES
  15098. # include "rtmutex-debug.h"
  15099. diff --git a/kernel/locking/rwsem-rt.c b/kernel/locking/rwsem-rt.c
  15100. new file mode 100644
  15101. index 000000000000..4a708ffcded6
  15102. --- /dev/null
  15103. +++ b/kernel/locking/rwsem-rt.c
  15104. @@ -0,0 +1,268 @@
  15105. +/*
  15106. + */
  15107. +#include <linux/rwsem.h>
  15108. +#include <linux/sched.h>
  15109. +#include <linux/export.h>
  15110. +
  15111. +#include "rtmutex_common.h"
  15112. +
  15113. +/*
  15114. + * RT-specific reader/writer semaphores
  15115. + *
  15116. + * down_write()
  15117. + * 1) Lock sem->rtmutex
  15118. + * 2) Remove the reader BIAS to force readers into the slow path
  15119. + * 3) Wait until all readers have left the critical region
  15120. + * 4) Mark it write locked
  15121. + *
  15122. + * up_write()
  15123. + * 1) Remove the write locked marker
  15124. + * 2) Set the reader BIAS so readers can use the fast path again
  15125. + * 3) Unlock sem->rtmutex to release blocked readers
  15126. + *
  15127. + * down_read()
  15128. + * 1) Try fast path acquisition (reader BIAS is set)
  15129. + * 2) Take sem->rtmutex.wait_lock which protects the writelocked flag
  15130. + * 3) If !writelocked, acquire it for read
  15131. + * 4) If writelocked, block on sem->rtmutex
  15132. + * 5) unlock sem->rtmutex, goto 1)
  15133. + *
  15134. + * up_read()
  15135. + * 1) Try fast path release (reader count != 1)
  15136. + * 2) Wake the writer waiting in down_write()#3
  15137. + *
  15138. + * down_read()#3 has the consequence, that rw semaphores on RT are not writer
  15139. + * fair, but writers, which should be avoided in RT tasks (think mmap_sem),
  15140. + * are subject to the rtmutex priority/DL inheritance mechanism.
  15141. + *
  15142. + * It's possible to make the rw semaphores writer fair by keeping a list of
  15143. + * active readers. A blocked writer would force all newly incoming readers to
  15144. + * block on the rtmutex, but the rtmutex would have to be proxy locked for one
  15145. + * reader after the other. We can't use multi-reader inheritance because there
  15146. + * is no way to support that with SCHED_DEADLINE. Implementing the one by one
  15147. + * reader boosting/handover mechanism is a major surgery for a very dubious
  15148. + * value.
  15149. + *
  15150. + * The risk of writer starvation is there, but the pathological use cases
  15151. + * which trigger it are not necessarily the typical RT workloads.
  15152. + */
  15153. +
  15154. +void __rwsem_init(struct rw_semaphore *sem, const char *name,
  15155. + struct lock_class_key *key)
  15156. +{
  15157. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  15158. + /*
  15159. + * Make sure we are not reinitializing a held semaphore:
  15160. + */
  15161. + debug_check_no_locks_freed((void *)sem, sizeof(*sem));
  15162. + lockdep_init_map(&sem->dep_map, name, key, 0);
  15163. +#endif
  15164. + atomic_set(&sem->readers, READER_BIAS);
  15165. +}
  15166. +EXPORT_SYMBOL(__rwsem_init);
  15167. +
  15168. +int __down_read_trylock(struct rw_semaphore *sem)
  15169. +{
  15170. + int r, old;
  15171. +
  15172. + /*
  15173. + * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is
  15174. + * set.
  15175. + */
  15176. + for (r = atomic_read(&sem->readers); r < 0;) {
  15177. + old = atomic_cmpxchg(&sem->readers, r, r + 1);
  15178. + if (likely(old == r))
  15179. + return 1;
  15180. + r = old;
  15181. + }
  15182. + return 0;
  15183. +}
  15184. +
  15185. +void __sched __down_read(struct rw_semaphore *sem)
  15186. +{
  15187. + struct rt_mutex *m = &sem->rtmutex;
  15188. + struct rt_mutex_waiter waiter;
  15189. +
  15190. + if (__down_read_trylock(sem))
  15191. + return;
  15192. +
  15193. + might_sleep();
  15194. + raw_spin_lock_irq(&m->wait_lock);
  15195. + /*
  15196. + * Allow readers as long as the writer has not completely
  15197. + * acquired the semaphore for write.
  15198. + */
  15199. + if (atomic_read(&sem->readers) != WRITER_BIAS) {
  15200. + atomic_inc(&sem->readers);
  15201. + raw_spin_unlock_irq(&m->wait_lock);
  15202. + return;
  15203. + }
  15204. +
  15205. + /*
  15206. + * Call into the slow lock path with the rtmutex->wait_lock
  15207. + * held, so this can't result in the following race:
  15208. + *
  15209. + * Reader1 Reader2 Writer
  15210. + * down_read()
  15211. + * down_write()
  15212. + * rtmutex_lock(m)
  15213. + * swait()
  15214. + * down_read()
  15215. + * unlock(m->wait_lock)
  15216. + * up_read()
  15217. + * swake()
  15218. + * lock(m->wait_lock)
  15219. + * sem->writelocked=true
  15220. + * unlock(m->wait_lock)
  15221. + *
  15222. + * up_write()
  15223. + * sem->writelocked=false
  15224. + * rtmutex_unlock(m)
  15225. + * down_read()
  15226. + * down_write()
  15227. + * rtmutex_lock(m)
  15228. + * swait()
  15229. + * rtmutex_lock(m)
  15230. + *
  15231. + * That would put Reader1 behind the writer waiting on
  15232. + * Reader2 to call up_read() which might be unbound.
  15233. + */
  15234. + rt_mutex_init_waiter(&waiter, false);
  15235. + rt_mutex_slowlock_locked(m, TASK_UNINTERRUPTIBLE, NULL,
  15236. + RT_MUTEX_MIN_CHAINWALK, NULL,
  15237. + &waiter);
  15238. + /*
  15239. + * The slowlock() above is guaranteed to return with the rtmutex is
  15240. + * now held, so there can't be a writer active. Increment the reader
  15241. + * count and immediately drop the rtmutex again.
  15242. + */
  15243. + atomic_inc(&sem->readers);
  15244. + raw_spin_unlock_irq(&m->wait_lock);
  15245. + rt_mutex_unlock(m);
  15246. +
  15247. + debug_rt_mutex_free_waiter(&waiter);
  15248. +}
  15249. +
  15250. +void __up_read(struct rw_semaphore *sem)
  15251. +{
  15252. + struct rt_mutex *m = &sem->rtmutex;
  15253. + struct task_struct *tsk;
  15254. +
  15255. + /*
  15256. + * sem->readers can only hit 0 when a writer is waiting for the
  15257. + * active readers to leave the critical region.
  15258. + */
  15259. + if (!atomic_dec_and_test(&sem->readers))
  15260. + return;
  15261. +
  15262. + might_sleep();
  15263. + raw_spin_lock_irq(&m->wait_lock);
  15264. + /*
  15265. + * Wake the writer, i.e. the rtmutex owner. It might release the
  15266. + * rtmutex concurrently in the fast path (due to a signal), but to
  15267. + * clean up the rwsem it needs to acquire m->wait_lock. The worst
  15268. + * case which can happen is a spurious wakeup.
  15269. + */
  15270. + tsk = rt_mutex_owner(m);
  15271. + if (tsk)
  15272. + wake_up_process(tsk);
  15273. +
  15274. + raw_spin_unlock_irq(&m->wait_lock);
  15275. +}
  15276. +
  15277. +static void __up_write_unlock(struct rw_semaphore *sem, int bias,
  15278. + unsigned long flags)
  15279. +{
  15280. + struct rt_mutex *m = &sem->rtmutex;
  15281. +
  15282. + atomic_add(READER_BIAS - bias, &sem->readers);
  15283. + raw_spin_unlock_irqrestore(&m->wait_lock, flags);
  15284. + rt_mutex_unlock(m);
  15285. +}
  15286. +
  15287. +static int __sched __down_write_common(struct rw_semaphore *sem, int state)
  15288. +{
  15289. + struct rt_mutex *m = &sem->rtmutex;
  15290. + unsigned long flags;
  15291. +
  15292. + /* Take the rtmutex as a first step */
  15293. + if (rt_mutex_lock_state(m, state))
  15294. + return -EINTR;
  15295. +
  15296. + /* Force readers into slow path */
  15297. + atomic_sub(READER_BIAS, &sem->readers);
  15298. + might_sleep();
  15299. +
  15300. + set_current_state(state);
  15301. + for (;;) {
  15302. + raw_spin_lock_irqsave(&m->wait_lock, flags);
  15303. + /* Have all readers left the critical region? */
  15304. + if (!atomic_read(&sem->readers)) {
  15305. + atomic_set(&sem->readers, WRITER_BIAS);
  15306. + __set_current_state(TASK_RUNNING);
  15307. + raw_spin_unlock_irqrestore(&m->wait_lock, flags);
  15308. + return 0;
  15309. + }
  15310. +
  15311. + if (signal_pending_state(state, current)) {
  15312. + __set_current_state(TASK_RUNNING);
  15313. + __up_write_unlock(sem, 0, flags);
  15314. + return -EINTR;
  15315. + }
  15316. + raw_spin_unlock_irqrestore(&m->wait_lock, flags);
  15317. +
  15318. + if (atomic_read(&sem->readers) != 0) {
  15319. + schedule();
  15320. + set_current_state(state);
  15321. + }
  15322. + }
  15323. +}
  15324. +
  15325. +void __sched __down_write(struct rw_semaphore *sem)
  15326. +{
  15327. + __down_write_common(sem, TASK_UNINTERRUPTIBLE);
  15328. +}
  15329. +
  15330. +int __sched __down_write_killable(struct rw_semaphore *sem)
  15331. +{
  15332. + return __down_write_common(sem, TASK_KILLABLE);
  15333. +}
  15334. +
  15335. +int __down_write_trylock(struct rw_semaphore *sem)
  15336. +{
  15337. + struct rt_mutex *m = &sem->rtmutex;
  15338. + unsigned long flags;
  15339. +
  15340. + if (!rt_mutex_trylock(m))
  15341. + return 0;
  15342. +
  15343. + atomic_sub(READER_BIAS, &sem->readers);
  15344. +
  15345. + raw_spin_lock_irqsave(&m->wait_lock, flags);
  15346. + if (!atomic_read(&sem->readers)) {
  15347. + atomic_set(&sem->readers, WRITER_BIAS);
  15348. + raw_spin_unlock_irqrestore(&m->wait_lock, flags);
  15349. + return 1;
  15350. + }
  15351. + __up_write_unlock(sem, 0, flags);
  15352. + return 0;
  15353. +}
  15354. +
  15355. +void __up_write(struct rw_semaphore *sem)
  15356. +{
  15357. + struct rt_mutex *m = &sem->rtmutex;
  15358. + unsigned long flags;
  15359. +
  15360. + raw_spin_lock_irqsave(&m->wait_lock, flags);
  15361. + __up_write_unlock(sem, WRITER_BIAS, flags);
  15362. +}
  15363. +
  15364. +void __downgrade_write(struct rw_semaphore *sem)
  15365. +{
  15366. + struct rt_mutex *m = &sem->rtmutex;
  15367. + unsigned long flags;
  15368. +
  15369. + raw_spin_lock_irqsave(&m->wait_lock, flags);
  15370. + /* Release it and account current as reader */
  15371. + __up_write_unlock(sem, WRITER_BIAS - 1, flags);
  15372. +}
  15373. diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
  15374. index db3ccb1dd614..909779647bd1 100644
  15375. --- a/kernel/locking/spinlock.c
  15376. +++ b/kernel/locking/spinlock.c
  15377. @@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
  15378. * __[spin|read|write]_lock_bh()
  15379. */
  15380. BUILD_LOCK_OPS(spin, raw_spinlock);
  15381. +
  15382. +#ifndef CONFIG_PREEMPT_RT_FULL
  15383. BUILD_LOCK_OPS(read, rwlock);
  15384. BUILD_LOCK_OPS(write, rwlock);
  15385. +#endif
  15386. #endif
  15387. @@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
  15388. EXPORT_SYMBOL(_raw_spin_unlock_bh);
  15389. #endif
  15390. +#ifndef CONFIG_PREEMPT_RT_FULL
  15391. +
  15392. #ifndef CONFIG_INLINE_READ_TRYLOCK
  15393. int __lockfunc _raw_read_trylock(rwlock_t *lock)
  15394. {
  15395. @@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
  15396. EXPORT_SYMBOL(_raw_write_unlock_bh);
  15397. #endif
  15398. +#endif /* !PREEMPT_RT_FULL */
  15399. +
  15400. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  15401. void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
  15402. diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
  15403. index 9aa0fccd5d43..76d0b40d9193 100644
  15404. --- a/kernel/locking/spinlock_debug.c
  15405. +++ b/kernel/locking/spinlock_debug.c
  15406. @@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
  15407. EXPORT_SYMBOL(__raw_spin_lock_init);
  15408. +#ifndef CONFIG_PREEMPT_RT_FULL
  15409. void __rwlock_init(rwlock_t *lock, const char *name,
  15410. struct lock_class_key *key)
  15411. {
  15412. @@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
  15413. }
  15414. EXPORT_SYMBOL(__rwlock_init);
  15415. +#endif
  15416. static void spin_dump(raw_spinlock_t *lock, const char *msg)
  15417. {
  15418. @@ -135,6 +137,7 @@ void do_raw_spin_unlock(raw_spinlock_t *lock)
  15419. arch_spin_unlock(&lock->raw_lock);
  15420. }
  15421. +#ifndef CONFIG_PREEMPT_RT_FULL
  15422. static void rwlock_bug(rwlock_t *lock, const char *msg)
  15423. {
  15424. if (!debug_locks_off())
  15425. @@ -224,3 +227,5 @@ void do_raw_write_unlock(rwlock_t *lock)
  15426. debug_write_unlock(lock);
  15427. arch_write_unlock(&lock->raw_lock);
  15428. }
  15429. +
  15430. +#endif
  15431. diff --git a/kernel/module.c b/kernel/module.c
  15432. index 0e54d5bf0097..f27764fbfa24 100644
  15433. --- a/kernel/module.c
  15434. +++ b/kernel/module.c
  15435. @@ -660,16 +660,7 @@ static void percpu_modcopy(struct module *mod,
  15436. memcpy(per_cpu_ptr(mod->percpu, cpu), from, size);
  15437. }
  15438. -/**
  15439. - * is_module_percpu_address - test whether address is from module static percpu
  15440. - * @addr: address to test
  15441. - *
  15442. - * Test whether @addr belongs to module static percpu area.
  15443. - *
  15444. - * RETURNS:
  15445. - * %true if @addr is from module static percpu area
  15446. - */
  15447. -bool is_module_percpu_address(unsigned long addr)
  15448. +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
  15449. {
  15450. struct module *mod;
  15451. unsigned int cpu;
  15452. @@ -683,9 +674,15 @@ bool is_module_percpu_address(unsigned long addr)
  15453. continue;
  15454. for_each_possible_cpu(cpu) {
  15455. void *start = per_cpu_ptr(mod->percpu, cpu);
  15456. -
  15457. - if ((void *)addr >= start &&
  15458. - (void *)addr < start + mod->percpu_size) {
  15459. + void *va = (void *)addr;
  15460. +
  15461. + if (va >= start && va < start + mod->percpu_size) {
  15462. + if (can_addr) {
  15463. + *can_addr = (unsigned long) (va - start);
  15464. + *can_addr += (unsigned long)
  15465. + per_cpu_ptr(mod->percpu,
  15466. + get_boot_cpu_id());
  15467. + }
  15468. preempt_enable();
  15469. return true;
  15470. }
  15471. @@ -696,6 +693,20 @@ bool is_module_percpu_address(unsigned long addr)
  15472. return false;
  15473. }
  15474. +/**
  15475. + * is_module_percpu_address - test whether address is from module static percpu
  15476. + * @addr: address to test
  15477. + *
  15478. + * Test whether @addr belongs to module static percpu area.
  15479. + *
  15480. + * RETURNS:
  15481. + * %true if @addr is from module static percpu area
  15482. + */
  15483. +bool is_module_percpu_address(unsigned long addr)
  15484. +{
  15485. + return __is_module_percpu_address(addr, NULL);
  15486. +}
  15487. +
  15488. #else /* ... !CONFIG_SMP */
  15489. static inline void __percpu *mod_percpu(struct module *mod)
  15490. @@ -727,6 +738,11 @@ bool is_module_percpu_address(unsigned long addr)
  15491. return false;
  15492. }
  15493. +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
  15494. +{
  15495. + return false;
  15496. +}
  15497. +
  15498. #endif /* CONFIG_SMP */
  15499. #define MODINFO_ATTR(field) \
  15500. diff --git a/kernel/panic.c b/kernel/panic.c
  15501. index dbec387099b1..b67a4803ff2b 100644
  15502. --- a/kernel/panic.c
  15503. +++ b/kernel/panic.c
  15504. @@ -482,9 +482,11 @@ static u64 oops_id;
  15505. static int init_oops_id(void)
  15506. {
  15507. +#ifndef CONFIG_PREEMPT_RT_FULL
  15508. if (!oops_id)
  15509. get_random_bytes(&oops_id, sizeof(oops_id));
  15510. else
  15511. +#endif
  15512. oops_id++;
  15513. return 0;
  15514. diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
  15515. index b26dbc48c75b..968255f27a33 100644
  15516. --- a/kernel/power/hibernate.c
  15517. +++ b/kernel/power/hibernate.c
  15518. @@ -286,6 +286,8 @@ static int create_image(int platform_mode)
  15519. local_irq_disable();
  15520. + system_state = SYSTEM_SUSPEND;
  15521. +
  15522. error = syscore_suspend();
  15523. if (error) {
  15524. printk(KERN_ERR "PM: Some system devices failed to power down, "
  15525. @@ -317,6 +319,7 @@ static int create_image(int platform_mode)
  15526. syscore_resume();
  15527. Enable_irqs:
  15528. + system_state = SYSTEM_RUNNING;
  15529. local_irq_enable();
  15530. Enable_cpus:
  15531. @@ -446,6 +449,7 @@ static int resume_target_kernel(bool platform_mode)
  15532. goto Enable_cpus;
  15533. local_irq_disable();
  15534. + system_state = SYSTEM_SUSPEND;
  15535. error = syscore_suspend();
  15536. if (error)
  15537. @@ -479,6 +483,7 @@ static int resume_target_kernel(bool platform_mode)
  15538. syscore_resume();
  15539. Enable_irqs:
  15540. + system_state = SYSTEM_RUNNING;
  15541. local_irq_enable();
  15542. Enable_cpus:
  15543. @@ -564,6 +569,7 @@ int hibernation_platform_enter(void)
  15544. goto Enable_cpus;
  15545. local_irq_disable();
  15546. + system_state = SYSTEM_SUSPEND;
  15547. syscore_suspend();
  15548. if (pm_wakeup_pending()) {
  15549. error = -EAGAIN;
  15550. @@ -576,6 +582,7 @@ int hibernation_platform_enter(void)
  15551. Power_up:
  15552. syscore_resume();
  15553. + system_state = SYSTEM_RUNNING;
  15554. local_irq_enable();
  15555. Enable_cpus:
  15556. @@ -676,6 +683,10 @@ static int load_image_and_restore(void)
  15557. return error;
  15558. }
  15559. +#ifndef CONFIG_SUSPEND
  15560. +bool pm_in_action;
  15561. +#endif
  15562. +
  15563. /**
  15564. * hibernate - Carry out system hibernation, including saving the image.
  15565. */
  15566. @@ -689,6 +700,8 @@ int hibernate(void)
  15567. return -EPERM;
  15568. }
  15569. + pm_in_action = true;
  15570. +
  15571. lock_system_sleep();
  15572. /* The snapshot device should not be opened while we're running */
  15573. if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
  15574. @@ -766,6 +779,7 @@ int hibernate(void)
  15575. atomic_inc(&snapshot_device_available);
  15576. Unlock:
  15577. unlock_system_sleep();
  15578. + pm_in_action = false;
  15579. return error;
  15580. }
  15581. diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
  15582. index 6ccb08f57fcb..c8cbb5ed2fe3 100644
  15583. --- a/kernel/power/suspend.c
  15584. +++ b/kernel/power/suspend.c
  15585. @@ -369,6 +369,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
  15586. arch_suspend_disable_irqs();
  15587. BUG_ON(!irqs_disabled());
  15588. + system_state = SYSTEM_SUSPEND;
  15589. +
  15590. error = syscore_suspend();
  15591. if (!error) {
  15592. *wakeup = pm_wakeup_pending();
  15593. @@ -385,6 +387,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
  15594. syscore_resume();
  15595. }
  15596. + system_state = SYSTEM_RUNNING;
  15597. +
  15598. arch_suspend_enable_irqs();
  15599. BUG_ON(irqs_disabled());
  15600. @@ -527,6 +531,8 @@ static int enter_state(suspend_state_t state)
  15601. return error;
  15602. }
  15603. +bool pm_in_action;
  15604. +
  15605. /**
  15606. * pm_suspend - Externally visible function for suspending the system.
  15607. * @state: System sleep state to enter.
  15608. @@ -541,6 +547,8 @@ int pm_suspend(suspend_state_t state)
  15609. if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
  15610. return -EINVAL;
  15611. + pm_in_action = true;
  15612. +
  15613. error = enter_state(state);
  15614. if (error) {
  15615. suspend_stats.fail++;
  15616. @@ -548,6 +556,7 @@ int pm_suspend(suspend_state_t state)
  15617. } else {
  15618. suspend_stats.success++;
  15619. }
  15620. + pm_in_action = false;
  15621. return error;
  15622. }
  15623. EXPORT_SYMBOL(pm_suspend);
  15624. diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
  15625. index 9c5b231684d0..cf15bdb6855b 100644
  15626. --- a/kernel/printk/printk.c
  15627. +++ b/kernel/printk/printk.c
  15628. @@ -351,6 +351,65 @@ __packed __aligned(4)
  15629. */
  15630. DEFINE_RAW_SPINLOCK(logbuf_lock);
  15631. +#ifdef CONFIG_EARLY_PRINTK
  15632. +struct console *early_console;
  15633. +
  15634. +static void early_vprintk(const char *fmt, va_list ap)
  15635. +{
  15636. + if (early_console) {
  15637. + char buf[512];
  15638. + int n = vscnprintf(buf, sizeof(buf), fmt, ap);
  15639. +
  15640. + early_console->write(early_console, buf, n);
  15641. + }
  15642. +}
  15643. +
  15644. +asmlinkage void early_printk(const char *fmt, ...)
  15645. +{
  15646. + va_list ap;
  15647. +
  15648. + va_start(ap, fmt);
  15649. + early_vprintk(fmt, ap);
  15650. + va_end(ap);
  15651. +}
  15652. +
  15653. +/*
  15654. + * This is independent of any log levels - a global
  15655. + * kill switch that turns off all of printk.
  15656. + *
  15657. + * Used by the NMI watchdog if early-printk is enabled.
  15658. + */
  15659. +static bool __read_mostly printk_killswitch;
  15660. +
  15661. +static int __init force_early_printk_setup(char *str)
  15662. +{
  15663. + printk_killswitch = true;
  15664. + return 0;
  15665. +}
  15666. +early_param("force_early_printk", force_early_printk_setup);
  15667. +
  15668. +void printk_kill(void)
  15669. +{
  15670. + printk_killswitch = true;
  15671. +}
  15672. +
  15673. +#ifdef CONFIG_PRINTK
  15674. +static int forced_early_printk(const char *fmt, va_list ap)
  15675. +{
  15676. + if (!printk_killswitch)
  15677. + return 0;
  15678. + early_vprintk(fmt, ap);
  15679. + return 1;
  15680. +}
  15681. +#endif
  15682. +
  15683. +#else
  15684. +static inline int forced_early_printk(const char *fmt, va_list ap)
  15685. +{
  15686. + return 0;
  15687. +}
  15688. +#endif
  15689. +
  15690. #ifdef CONFIG_PRINTK
  15691. DECLARE_WAIT_QUEUE_HEAD(log_wait);
  15692. /* the next printk record to read by syslog(READ) or /proc/kmsg */
  15693. @@ -1337,6 +1396,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  15694. {
  15695. char *text;
  15696. int len = 0;
  15697. + int attempts = 0;
  15698. text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
  15699. if (!text)
  15700. @@ -1348,6 +1408,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  15701. u64 seq;
  15702. u32 idx;
  15703. enum log_flags prev;
  15704. + int num_msg;
  15705. +try_again:
  15706. + attempts++;
  15707. + if (attempts > 10) {
  15708. + len = -EBUSY;
  15709. + goto out;
  15710. + }
  15711. + num_msg = 0;
  15712. /*
  15713. * Find first record that fits, including all following records,
  15714. @@ -1363,6 +1431,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  15715. prev = msg->flags;
  15716. idx = log_next(idx);
  15717. seq++;
  15718. + num_msg++;
  15719. + if (num_msg > 5) {
  15720. + num_msg = 0;
  15721. + raw_spin_unlock_irq(&logbuf_lock);
  15722. + raw_spin_lock_irq(&logbuf_lock);
  15723. + if (clear_seq < log_first_seq)
  15724. + goto try_again;
  15725. + }
  15726. }
  15727. /* move first record forward until length fits into the buffer */
  15728. @@ -1376,6 +1452,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  15729. prev = msg->flags;
  15730. idx = log_next(idx);
  15731. seq++;
  15732. + num_msg++;
  15733. + if (num_msg > 5) {
  15734. + num_msg = 0;
  15735. + raw_spin_unlock_irq(&logbuf_lock);
  15736. + raw_spin_lock_irq(&logbuf_lock);
  15737. + if (clear_seq < log_first_seq)
  15738. + goto try_again;
  15739. + }
  15740. }
  15741. /* last message fitting into this dump */
  15742. @@ -1416,6 +1500,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  15743. clear_seq = log_next_seq;
  15744. clear_idx = log_next_idx;
  15745. }
  15746. +out:
  15747. raw_spin_unlock_irq(&logbuf_lock);
  15748. kfree(text);
  15749. @@ -1569,6 +1654,12 @@ static void call_console_drivers(int level,
  15750. if (!console_drivers)
  15751. return;
  15752. + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
  15753. + if (in_irq() || in_nmi())
  15754. + return;
  15755. + }
  15756. +
  15757. + migrate_disable();
  15758. for_each_console(con) {
  15759. if (exclusive_console && con != exclusive_console)
  15760. continue;
  15761. @@ -1584,6 +1675,7 @@ static void call_console_drivers(int level,
  15762. else
  15763. con->write(con, text, len);
  15764. }
  15765. + migrate_enable();
  15766. }
  15767. /*
  15768. @@ -1781,6 +1873,13 @@ asmlinkage int vprintk_emit(int facility, int level,
  15769. /* cpu currently holding logbuf_lock in this function */
  15770. static unsigned int logbuf_cpu = UINT_MAX;
  15771. + /*
  15772. + * Fall back to early_printk if a debugging subsystem has
  15773. + * killed printk output
  15774. + */
  15775. + if (unlikely(forced_early_printk(fmt, args)))
  15776. + return 1;
  15777. +
  15778. if (level == LOGLEVEL_SCHED) {
  15779. level = LOGLEVEL_DEFAULT;
  15780. in_sched = true;
  15781. @@ -1885,13 +1984,23 @@ asmlinkage int vprintk_emit(int facility, int level,
  15782. /* If called from the scheduler, we can not call up(). */
  15783. if (!in_sched) {
  15784. + int may_trylock = 1;
  15785. +
  15786. lockdep_off();
  15787. +#ifdef CONFIG_PREEMPT_RT_FULL
  15788. + /*
  15789. + * we can't take a sleeping lock with IRQs or preeption disabled
  15790. + * so we can't print in these contexts
  15791. + */
  15792. + if (!(preempt_count() == 0 && !irqs_disabled()))
  15793. + may_trylock = 0;
  15794. +#endif
  15795. /*
  15796. * Try to acquire and then immediately release the console
  15797. * semaphore. The release will print out buffers and wake up
  15798. * /dev/kmsg and syslog() users.
  15799. */
  15800. - if (console_trylock())
  15801. + if (may_trylock && console_trylock())
  15802. console_unlock();
  15803. lockdep_on();
  15804. }
  15805. @@ -2014,26 +2123,6 @@ DEFINE_PER_CPU(printk_func_t, printk_func);
  15806. #endif /* CONFIG_PRINTK */
  15807. -#ifdef CONFIG_EARLY_PRINTK
  15808. -struct console *early_console;
  15809. -
  15810. -asmlinkage __visible void early_printk(const char *fmt, ...)
  15811. -{
  15812. - va_list ap;
  15813. - char buf[512];
  15814. - int n;
  15815. -
  15816. - if (!early_console)
  15817. - return;
  15818. -
  15819. - va_start(ap, fmt);
  15820. - n = vscnprintf(buf, sizeof(buf), fmt, ap);
  15821. - va_end(ap);
  15822. -
  15823. - early_console->write(early_console, buf, n);
  15824. -}
  15825. -#endif
  15826. -
  15827. static int __add_preferred_console(char *name, int idx, char *options,
  15828. char *brl_options)
  15829. {
  15830. @@ -2303,11 +2392,16 @@ static void console_cont_flush(char *text, size_t size)
  15831. goto out;
  15832. len = cont_print_text(text, size);
  15833. +#ifdef CONFIG_PREEMPT_RT_FULL
  15834. + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  15835. + call_console_drivers(cont.level, NULL, 0, text, len);
  15836. +#else
  15837. raw_spin_unlock(&logbuf_lock);
  15838. stop_critical_timings();
  15839. call_console_drivers(cont.level, NULL, 0, text, len);
  15840. start_critical_timings();
  15841. local_irq_restore(flags);
  15842. +#endif
  15843. return;
  15844. out:
  15845. raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  15846. @@ -2431,13 +2525,17 @@ void console_unlock(void)
  15847. console_idx = log_next(console_idx);
  15848. console_seq++;
  15849. console_prev = msg->flags;
  15850. +#ifdef CONFIG_PREEMPT_RT_FULL
  15851. + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  15852. + call_console_drivers(level, ext_text, ext_len, text, len);
  15853. +#else
  15854. raw_spin_unlock(&logbuf_lock);
  15855. stop_critical_timings(); /* don't trace print latency */
  15856. call_console_drivers(level, ext_text, ext_len, text, len);
  15857. start_critical_timings();
  15858. local_irq_restore(flags);
  15859. -
  15860. +#endif
  15861. if (do_cond_resched)
  15862. cond_resched();
  15863. }
  15864. @@ -2489,6 +2587,11 @@ void console_unblank(void)
  15865. {
  15866. struct console *c;
  15867. + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
  15868. + if (in_irq() || in_nmi())
  15869. + return;
  15870. + }
  15871. +
  15872. /*
  15873. * console_unblank can no longer be called in interrupt context unless
  15874. * oops_in_progress is set to 1..
  15875. diff --git a/kernel/ptrace.c b/kernel/ptrace.c
  15876. index f39a7be98fc1..583ce3aad891 100644
  15877. --- a/kernel/ptrace.c
  15878. +++ b/kernel/ptrace.c
  15879. @@ -172,7 +172,14 @@ static bool ptrace_freeze_traced(struct task_struct *task)
  15880. spin_lock_irq(&task->sighand->siglock);
  15881. if (task_is_traced(task) && !__fatal_signal_pending(task)) {
  15882. - task->state = __TASK_TRACED;
  15883. + unsigned long flags;
  15884. +
  15885. + raw_spin_lock_irqsave(&task->pi_lock, flags);
  15886. + if (task->state & __TASK_TRACED)
  15887. + task->state = __TASK_TRACED;
  15888. + else
  15889. + task->saved_state = __TASK_TRACED;
  15890. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  15891. ret = true;
  15892. }
  15893. spin_unlock_irq(&task->sighand->siglock);
  15894. diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
  15895. index bf08fee53dc7..eeb8ce4ad7b6 100644
  15896. --- a/kernel/rcu/rcutorture.c
  15897. +++ b/kernel/rcu/rcutorture.c
  15898. @@ -404,6 +404,7 @@ static struct rcu_torture_ops rcu_ops = {
  15899. .name = "rcu"
  15900. };
  15901. +#ifndef CONFIG_PREEMPT_RT_FULL
  15902. /*
  15903. * Definitions for rcu_bh torture testing.
  15904. */
  15905. @@ -443,6 +444,12 @@ static struct rcu_torture_ops rcu_bh_ops = {
  15906. .name = "rcu_bh"
  15907. };
  15908. +#else
  15909. +static struct rcu_torture_ops rcu_bh_ops = {
  15910. + .ttype = INVALID_RCU_FLAVOR,
  15911. +};
  15912. +#endif
  15913. +
  15914. /*
  15915. * Don't even think about trying any of these in real life!!!
  15916. * The names includes "busted", and they really means it!
  15917. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
  15918. index d1a02877a42c..a7b11a29e03a 100644
  15919. --- a/kernel/rcu/tree.c
  15920. +++ b/kernel/rcu/tree.c
  15921. @@ -55,6 +55,11 @@
  15922. #include <linux/random.h>
  15923. #include <linux/trace_events.h>
  15924. #include <linux/suspend.h>
  15925. +#include <linux/delay.h>
  15926. +#include <linux/gfp.h>
  15927. +#include <linux/oom.h>
  15928. +#include <linux/smpboot.h>
  15929. +#include "../time/tick-internal.h"
  15930. #include "tree.h"
  15931. #include "rcu.h"
  15932. @@ -260,6 +265,19 @@ void rcu_sched_qs(void)
  15933. this_cpu_ptr(&rcu_sched_data), true);
  15934. }
  15935. +#ifdef CONFIG_PREEMPT_RT_FULL
  15936. +static void rcu_preempt_qs(void);
  15937. +
  15938. +void rcu_bh_qs(void)
  15939. +{
  15940. + unsigned long flags;
  15941. +
  15942. + /* Callers to this function, rcu_preempt_qs(), must disable irqs. */
  15943. + local_irq_save(flags);
  15944. + rcu_preempt_qs();
  15945. + local_irq_restore(flags);
  15946. +}
  15947. +#else
  15948. void rcu_bh_qs(void)
  15949. {
  15950. if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {
  15951. @@ -269,6 +287,7 @@ void rcu_bh_qs(void)
  15952. __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false);
  15953. }
  15954. }
  15955. +#endif
  15956. static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
  15957. @@ -449,11 +468,13 @@ EXPORT_SYMBOL_GPL(rcu_batches_started_sched);
  15958. /*
  15959. * Return the number of RCU BH batches started thus far for debug & stats.
  15960. */
  15961. +#ifndef CONFIG_PREEMPT_RT_FULL
  15962. unsigned long rcu_batches_started_bh(void)
  15963. {
  15964. return rcu_bh_state.gpnum;
  15965. }
  15966. EXPORT_SYMBOL_GPL(rcu_batches_started_bh);
  15967. +#endif
  15968. /*
  15969. * Return the number of RCU batches completed thus far for debug & stats.
  15970. @@ -473,6 +494,7 @@ unsigned long rcu_batches_completed_sched(void)
  15971. }
  15972. EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
  15973. +#ifndef CONFIG_PREEMPT_RT_FULL
  15974. /*
  15975. * Return the number of RCU BH batches completed thus far for debug & stats.
  15976. */
  15977. @@ -481,6 +503,7 @@ unsigned long rcu_batches_completed_bh(void)
  15978. return rcu_bh_state.completed;
  15979. }
  15980. EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
  15981. +#endif
  15982. /*
  15983. * Return the number of RCU expedited batches completed thus far for
  15984. @@ -504,6 +527,7 @@ unsigned long rcu_exp_batches_completed_sched(void)
  15985. }
  15986. EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched);
  15987. +#ifndef CONFIG_PREEMPT_RT_FULL
  15988. /*
  15989. * Force a quiescent state.
  15990. */
  15991. @@ -522,6 +546,13 @@ void rcu_bh_force_quiescent_state(void)
  15992. }
  15993. EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
  15994. +#else
  15995. +void rcu_force_quiescent_state(void)
  15996. +{
  15997. +}
  15998. +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
  15999. +#endif
  16000. +
  16001. /*
  16002. * Force a quiescent state for RCU-sched.
  16003. */
  16004. @@ -572,9 +603,11 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
  16005. case RCU_FLAVOR:
  16006. rsp = rcu_state_p;
  16007. break;
  16008. +#ifndef CONFIG_PREEMPT_RT_FULL
  16009. case RCU_BH_FLAVOR:
  16010. rsp = &rcu_bh_state;
  16011. break;
  16012. +#endif
  16013. case RCU_SCHED_FLAVOR:
  16014. rsp = &rcu_sched_state;
  16015. break;
  16016. @@ -3026,18 +3059,17 @@ __rcu_process_callbacks(struct rcu_state *rsp)
  16017. /*
  16018. * Do RCU core processing for the current CPU.
  16019. */
  16020. -static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
  16021. +static __latent_entropy void rcu_process_callbacks(void)
  16022. {
  16023. struct rcu_state *rsp;
  16024. if (cpu_is_offline(smp_processor_id()))
  16025. return;
  16026. - trace_rcu_utilization(TPS("Start RCU core"));
  16027. for_each_rcu_flavor(rsp)
  16028. __rcu_process_callbacks(rsp);
  16029. - trace_rcu_utilization(TPS("End RCU core"));
  16030. }
  16031. +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
  16032. /*
  16033. * Schedule RCU callback invocation. If the specified type of RCU
  16034. * does not support RCU priority boosting, just do a direct call,
  16035. @@ -3049,19 +3081,106 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
  16036. {
  16037. if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
  16038. return;
  16039. - if (likely(!rsp->boost)) {
  16040. - rcu_do_batch(rsp, rdp);
  16041. - return;
  16042. - }
  16043. - invoke_rcu_callbacks_kthread();
  16044. + rcu_do_batch(rsp, rdp);
  16045. +}
  16046. +
  16047. +static void rcu_wake_cond(struct task_struct *t, int status)
  16048. +{
  16049. + /*
  16050. + * If the thread is yielding, only wake it when this
  16051. + * is invoked from idle
  16052. + */
  16053. + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
  16054. + wake_up_process(t);
  16055. }
  16056. +/*
  16057. + * Wake up this CPU's rcuc kthread to do RCU core processing.
  16058. + */
  16059. static void invoke_rcu_core(void)
  16060. {
  16061. - if (cpu_online(smp_processor_id()))
  16062. - raise_softirq(RCU_SOFTIRQ);
  16063. + unsigned long flags;
  16064. + struct task_struct *t;
  16065. +
  16066. + if (!cpu_online(smp_processor_id()))
  16067. + return;
  16068. + local_irq_save(flags);
  16069. + __this_cpu_write(rcu_cpu_has_work, 1);
  16070. + t = __this_cpu_read(rcu_cpu_kthread_task);
  16071. + if (t != NULL && current != t)
  16072. + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
  16073. + local_irq_restore(flags);
  16074. +}
  16075. +
  16076. +static void rcu_cpu_kthread_park(unsigned int cpu)
  16077. +{
  16078. + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  16079. +}
  16080. +
  16081. +static int rcu_cpu_kthread_should_run(unsigned int cpu)
  16082. +{
  16083. + return __this_cpu_read(rcu_cpu_has_work);
  16084. }
  16085. +/*
  16086. + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  16087. + * RCU softirq used in flavors and configurations of RCU that do not
  16088. + * support RCU priority boosting.
  16089. + */
  16090. +static void rcu_cpu_kthread(unsigned int cpu)
  16091. +{
  16092. + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
  16093. + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
  16094. + int spincnt;
  16095. +
  16096. + for (spincnt = 0; spincnt < 10; spincnt++) {
  16097. + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
  16098. + local_bh_disable();
  16099. + *statusp = RCU_KTHREAD_RUNNING;
  16100. + this_cpu_inc(rcu_cpu_kthread_loops);
  16101. + local_irq_disable();
  16102. + work = *workp;
  16103. + *workp = 0;
  16104. + local_irq_enable();
  16105. + if (work)
  16106. + rcu_process_callbacks();
  16107. + local_bh_enable();
  16108. + if (*workp == 0) {
  16109. + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
  16110. + *statusp = RCU_KTHREAD_WAITING;
  16111. + return;
  16112. + }
  16113. + }
  16114. + *statusp = RCU_KTHREAD_YIELDING;
  16115. + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
  16116. + schedule_timeout_interruptible(2);
  16117. + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
  16118. + *statusp = RCU_KTHREAD_WAITING;
  16119. +}
  16120. +
  16121. +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  16122. + .store = &rcu_cpu_kthread_task,
  16123. + .thread_should_run = rcu_cpu_kthread_should_run,
  16124. + .thread_fn = rcu_cpu_kthread,
  16125. + .thread_comm = "rcuc/%u",
  16126. + .setup = rcu_cpu_kthread_setup,
  16127. + .park = rcu_cpu_kthread_park,
  16128. +};
  16129. +
  16130. +/*
  16131. + * Spawn per-CPU RCU core processing kthreads.
  16132. + */
  16133. +static int __init rcu_spawn_core_kthreads(void)
  16134. +{
  16135. + int cpu;
  16136. +
  16137. + for_each_possible_cpu(cpu)
  16138. + per_cpu(rcu_cpu_has_work, cpu) = 0;
  16139. + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  16140. + return 0;
  16141. +}
  16142. +early_initcall(rcu_spawn_core_kthreads);
  16143. +
  16144. /*
  16145. * Handle any core-RCU processing required by a call_rcu() invocation.
  16146. */
  16147. @@ -3205,6 +3324,7 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
  16148. }
  16149. EXPORT_SYMBOL_GPL(call_rcu_sched);
  16150. +#ifndef CONFIG_PREEMPT_RT_FULL
  16151. /*
  16152. * Queue an RCU callback for invocation after a quicker grace period.
  16153. */
  16154. @@ -3213,6 +3333,7 @@ void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
  16155. __call_rcu(head, func, &rcu_bh_state, -1, 0);
  16156. }
  16157. EXPORT_SYMBOL_GPL(call_rcu_bh);
  16158. +#endif
  16159. /*
  16160. * Queue an RCU callback for lazy invocation after a grace period.
  16161. @@ -3304,6 +3425,7 @@ void synchronize_sched(void)
  16162. }
  16163. EXPORT_SYMBOL_GPL(synchronize_sched);
  16164. +#ifndef CONFIG_PREEMPT_RT_FULL
  16165. /**
  16166. * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
  16167. *
  16168. @@ -3330,6 +3452,7 @@ void synchronize_rcu_bh(void)
  16169. wait_rcu_gp(call_rcu_bh);
  16170. }
  16171. EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
  16172. +#endif
  16173. /**
  16174. * get_state_synchronize_rcu - Snapshot current RCU state
  16175. @@ -3708,6 +3831,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
  16176. mutex_unlock(&rsp->barrier_mutex);
  16177. }
  16178. +#ifndef CONFIG_PREEMPT_RT_FULL
  16179. /**
  16180. * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
  16181. */
  16182. @@ -3716,6 +3840,7 @@ void rcu_barrier_bh(void)
  16183. _rcu_barrier(&rcu_bh_state);
  16184. }
  16185. EXPORT_SYMBOL_GPL(rcu_barrier_bh);
  16186. +#endif
  16187. /**
  16188. * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
  16189. @@ -4237,12 +4362,13 @@ void __init rcu_init(void)
  16190. rcu_bootup_announce();
  16191. rcu_init_geometry();
  16192. +#ifndef CONFIG_PREEMPT_RT_FULL
  16193. rcu_init_one(&rcu_bh_state);
  16194. +#endif
  16195. rcu_init_one(&rcu_sched_state);
  16196. if (dump_tree)
  16197. rcu_dump_rcu_node_tree(&rcu_sched_state);
  16198. __rcu_init_preempt();
  16199. - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
  16200. /*
  16201. * We don't need protection against CPU-hotplug here because
  16202. diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
  16203. index e99a5234d9ed..958ac107062c 100644
  16204. --- a/kernel/rcu/tree.h
  16205. +++ b/kernel/rcu/tree.h
  16206. @@ -588,18 +588,18 @@ extern struct list_head rcu_struct_flavors;
  16207. */
  16208. extern struct rcu_state rcu_sched_state;
  16209. +#ifndef CONFIG_PREEMPT_RT_FULL
  16210. extern struct rcu_state rcu_bh_state;
  16211. +#endif
  16212. #ifdef CONFIG_PREEMPT_RCU
  16213. extern struct rcu_state rcu_preempt_state;
  16214. #endif /* #ifdef CONFIG_PREEMPT_RCU */
  16215. -#ifdef CONFIG_RCU_BOOST
  16216. DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  16217. DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
  16218. DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  16219. DECLARE_PER_CPU(char, rcu_cpu_has_work);
  16220. -#endif /* #ifdef CONFIG_RCU_BOOST */
  16221. #ifndef RCU_TREE_NONCORE
  16222. @@ -619,10 +619,9 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
  16223. static void __init __rcu_init_preempt(void);
  16224. static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
  16225. static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
  16226. -static void invoke_rcu_callbacks_kthread(void);
  16227. static bool rcu_is_callbacks_kthread(void);
  16228. +static void rcu_cpu_kthread_setup(unsigned int cpu);
  16229. #ifdef CONFIG_RCU_BOOST
  16230. -static void rcu_preempt_do_callbacks(void);
  16231. static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
  16232. struct rcu_node *rnp);
  16233. #endif /* #ifdef CONFIG_RCU_BOOST */
  16234. diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
  16235. index e3944c4b072d..be12d1aac840 100644
  16236. --- a/kernel/rcu/tree_plugin.h
  16237. +++ b/kernel/rcu/tree_plugin.h
  16238. @@ -24,25 +24,10 @@
  16239. * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  16240. */
  16241. -#include <linux/delay.h>
  16242. -#include <linux/gfp.h>
  16243. -#include <linux/oom.h>
  16244. -#include <linux/smpboot.h>
  16245. -#include "../time/tick-internal.h"
  16246. -
  16247. #ifdef CONFIG_RCU_BOOST
  16248. #include "../locking/rtmutex_common.h"
  16249. -/*
  16250. - * Control variables for per-CPU and per-rcu_node kthreads. These
  16251. - * handle all flavors of RCU.
  16252. - */
  16253. -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
  16254. -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  16255. -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  16256. -DEFINE_PER_CPU(char, rcu_cpu_has_work);
  16257. -
  16258. #else /* #ifdef CONFIG_RCU_BOOST */
  16259. /*
  16260. @@ -55,6 +40,14 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work);
  16261. #endif /* #else #ifdef CONFIG_RCU_BOOST */
  16262. +/*
  16263. + * Control variables for per-CPU and per-rcu_node kthreads. These
  16264. + * handle all flavors of RCU.
  16265. + */
  16266. +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  16267. +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  16268. +DEFINE_PER_CPU(char, rcu_cpu_has_work);
  16269. +
  16270. #ifdef CONFIG_RCU_NOCB_CPU
  16271. static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
  16272. static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
  16273. @@ -426,7 +419,7 @@ void rcu_read_unlock_special(struct task_struct *t)
  16274. }
  16275. /* Hardware IRQ handlers cannot block, complain if they get here. */
  16276. - if (in_irq() || in_serving_softirq()) {
  16277. + if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) {
  16278. lockdep_rcu_suspicious(__FILE__, __LINE__,
  16279. "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
  16280. pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",
  16281. @@ -632,15 +625,6 @@ static void rcu_preempt_check_callbacks(void)
  16282. t->rcu_read_unlock_special.b.need_qs = true;
  16283. }
  16284. -#ifdef CONFIG_RCU_BOOST
  16285. -
  16286. -static void rcu_preempt_do_callbacks(void)
  16287. -{
  16288. - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
  16289. -}
  16290. -
  16291. -#endif /* #ifdef CONFIG_RCU_BOOST */
  16292. -
  16293. /*
  16294. * Queue a preemptible-RCU callback for invocation after a grace period.
  16295. */
  16296. @@ -829,6 +813,19 @@ void exit_rcu(void)
  16297. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  16298. +/*
  16299. + * If boosting, set rcuc kthreads to realtime priority.
  16300. + */
  16301. +static void rcu_cpu_kthread_setup(unsigned int cpu)
  16302. +{
  16303. +#ifdef CONFIG_RCU_BOOST
  16304. + struct sched_param sp;
  16305. +
  16306. + sp.sched_priority = kthread_prio;
  16307. + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  16308. +#endif /* #ifdef CONFIG_RCU_BOOST */
  16309. +}
  16310. +
  16311. #ifdef CONFIG_RCU_BOOST
  16312. #include "../locking/rtmutex_common.h"
  16313. @@ -860,16 +857,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp)
  16314. #endif /* #else #ifdef CONFIG_RCU_TRACE */
  16315. -static void rcu_wake_cond(struct task_struct *t, int status)
  16316. -{
  16317. - /*
  16318. - * If the thread is yielding, only wake it when this
  16319. - * is invoked from idle
  16320. - */
  16321. - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
  16322. - wake_up_process(t);
  16323. -}
  16324. -
  16325. /*
  16326. * Carry out RCU priority boosting on the task indicated by ->exp_tasks
  16327. * or ->boost_tasks, advancing the pointer to the next task in the
  16328. @@ -1012,23 +999,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
  16329. }
  16330. }
  16331. -/*
  16332. - * Wake up the per-CPU kthread to invoke RCU callbacks.
  16333. - */
  16334. -static void invoke_rcu_callbacks_kthread(void)
  16335. -{
  16336. - unsigned long flags;
  16337. -
  16338. - local_irq_save(flags);
  16339. - __this_cpu_write(rcu_cpu_has_work, 1);
  16340. - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
  16341. - current != __this_cpu_read(rcu_cpu_kthread_task)) {
  16342. - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
  16343. - __this_cpu_read(rcu_cpu_kthread_status));
  16344. - }
  16345. - local_irq_restore(flags);
  16346. -}
  16347. -
  16348. /*
  16349. * Is the current CPU running the RCU-callbacks kthread?
  16350. * Caller must have preemption disabled.
  16351. @@ -1083,67 +1053,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
  16352. return 0;
  16353. }
  16354. -static void rcu_kthread_do_work(void)
  16355. -{
  16356. - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
  16357. - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
  16358. - rcu_preempt_do_callbacks();
  16359. -}
  16360. -
  16361. -static void rcu_cpu_kthread_setup(unsigned int cpu)
  16362. -{
  16363. - struct sched_param sp;
  16364. -
  16365. - sp.sched_priority = kthread_prio;
  16366. - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  16367. -}
  16368. -
  16369. -static void rcu_cpu_kthread_park(unsigned int cpu)
  16370. -{
  16371. - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  16372. -}
  16373. -
  16374. -static int rcu_cpu_kthread_should_run(unsigned int cpu)
  16375. -{
  16376. - return __this_cpu_read(rcu_cpu_has_work);
  16377. -}
  16378. -
  16379. -/*
  16380. - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  16381. - * RCU softirq used in flavors and configurations of RCU that do not
  16382. - * support RCU priority boosting.
  16383. - */
  16384. -static void rcu_cpu_kthread(unsigned int cpu)
  16385. -{
  16386. - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
  16387. - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
  16388. - int spincnt;
  16389. -
  16390. - for (spincnt = 0; spincnt < 10; spincnt++) {
  16391. - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
  16392. - local_bh_disable();
  16393. - *statusp = RCU_KTHREAD_RUNNING;
  16394. - this_cpu_inc(rcu_cpu_kthread_loops);
  16395. - local_irq_disable();
  16396. - work = *workp;
  16397. - *workp = 0;
  16398. - local_irq_enable();
  16399. - if (work)
  16400. - rcu_kthread_do_work();
  16401. - local_bh_enable();
  16402. - if (*workp == 0) {
  16403. - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
  16404. - *statusp = RCU_KTHREAD_WAITING;
  16405. - return;
  16406. - }
  16407. - }
  16408. - *statusp = RCU_KTHREAD_YIELDING;
  16409. - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
  16410. - schedule_timeout_interruptible(2);
  16411. - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
  16412. - *statusp = RCU_KTHREAD_WAITING;
  16413. -}
  16414. -
  16415. /*
  16416. * Set the per-rcu_node kthread's affinity to cover all CPUs that are
  16417. * served by the rcu_node in question. The CPU hotplug lock is still
  16418. @@ -1174,26 +1083,12 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
  16419. free_cpumask_var(cm);
  16420. }
  16421. -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  16422. - .store = &rcu_cpu_kthread_task,
  16423. - .thread_should_run = rcu_cpu_kthread_should_run,
  16424. - .thread_fn = rcu_cpu_kthread,
  16425. - .thread_comm = "rcuc/%u",
  16426. - .setup = rcu_cpu_kthread_setup,
  16427. - .park = rcu_cpu_kthread_park,
  16428. -};
  16429. -
  16430. /*
  16431. * Spawn boost kthreads -- called as soon as the scheduler is running.
  16432. */
  16433. static void __init rcu_spawn_boost_kthreads(void)
  16434. {
  16435. struct rcu_node *rnp;
  16436. - int cpu;
  16437. -
  16438. - for_each_possible_cpu(cpu)
  16439. - per_cpu(rcu_cpu_has_work, cpu) = 0;
  16440. - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  16441. rcu_for_each_leaf_node(rcu_state_p, rnp)
  16442. (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
  16443. }
  16444. @@ -1216,11 +1111,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
  16445. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  16446. }
  16447. -static void invoke_rcu_callbacks_kthread(void)
  16448. -{
  16449. - WARN_ON_ONCE(1);
  16450. -}
  16451. -
  16452. static bool rcu_is_callbacks_kthread(void)
  16453. {
  16454. return false;
  16455. @@ -1244,7 +1134,7 @@ static void rcu_prepare_kthreads(int cpu)
  16456. #endif /* #else #ifdef CONFIG_RCU_BOOST */
  16457. -#if !defined(CONFIG_RCU_FAST_NO_HZ)
  16458. +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL)
  16459. /*
  16460. * Check to see if any future RCU-related work will need to be done
  16461. @@ -1261,7 +1151,9 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt)
  16462. return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
  16463. ? 0 : rcu_cpu_has_callbacks(NULL);
  16464. }
  16465. +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */
  16466. +#if !defined(CONFIG_RCU_FAST_NO_HZ)
  16467. /*
  16468. * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
  16469. * after it.
  16470. @@ -1357,6 +1249,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
  16471. return cbs_ready;
  16472. }
  16473. +#ifndef CONFIG_PREEMPT_RT_FULL
  16474. +
  16475. /*
  16476. * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
  16477. * to invoke. If the CPU has callbacks, try to advance them. Tell the
  16478. @@ -1402,6 +1296,7 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt)
  16479. *nextevt = basemono + dj * TICK_NSEC;
  16480. return 0;
  16481. }
  16482. +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */
  16483. /*
  16484. * Prepare a CPU for idle from an RCU perspective. The first major task
  16485. diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
  16486. index 4f6db7e6a117..ee02e1e1b3e5 100644
  16487. --- a/kernel/rcu/update.c
  16488. +++ b/kernel/rcu/update.c
  16489. @@ -62,7 +62,7 @@
  16490. #ifndef CONFIG_TINY_RCU
  16491. module_param(rcu_expedited, int, 0);
  16492. module_param(rcu_normal, int, 0);
  16493. -static int rcu_normal_after_boot;
  16494. +static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT_FULL);
  16495. module_param(rcu_normal_after_boot, int, 0);
  16496. #endif /* #ifndef CONFIG_TINY_RCU */
  16497. @@ -132,8 +132,7 @@ bool rcu_gp_is_normal(void)
  16498. }
  16499. EXPORT_SYMBOL_GPL(rcu_gp_is_normal);
  16500. -static atomic_t rcu_expedited_nesting =
  16501. - ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0);
  16502. +static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1);
  16503. /*
  16504. * Should normal grace-period primitives be expedited? Intended for
  16505. @@ -182,8 +181,7 @@ EXPORT_SYMBOL_GPL(rcu_unexpedite_gp);
  16506. */
  16507. void rcu_end_inkernel_boot(void)
  16508. {
  16509. - if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT))
  16510. - rcu_unexpedite_gp();
  16511. + rcu_unexpedite_gp();
  16512. if (rcu_normal_after_boot)
  16513. WRITE_ONCE(rcu_normal, 1);
  16514. }
  16515. @@ -298,6 +296,7 @@ int rcu_read_lock_held(void)
  16516. }
  16517. EXPORT_SYMBOL_GPL(rcu_read_lock_held);
  16518. +#ifndef CONFIG_PREEMPT_RT_FULL
  16519. /**
  16520. * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
  16521. *
  16522. @@ -324,6 +323,7 @@ int rcu_read_lock_bh_held(void)
  16523. return in_softirq() || irqs_disabled();
  16524. }
  16525. EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
  16526. +#endif
  16527. #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
  16528. diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
  16529. index 5e59b832ae2b..7337a7f60e3f 100644
  16530. --- a/kernel/sched/Makefile
  16531. +++ b/kernel/sched/Makefile
  16532. @@ -17,7 +17,7 @@ endif
  16533. obj-y += core.o loadavg.o clock.o cputime.o
  16534. obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
  16535. -obj-y += wait.o swait.o completion.o idle.o
  16536. +obj-y += wait.o swait.o swork.o completion.o idle.o
  16537. obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
  16538. obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
  16539. obj-$(CONFIG_SCHEDSTATS) += stats.o
  16540. diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
  16541. index 8d0f35debf35..b62cf6400fe0 100644
  16542. --- a/kernel/sched/completion.c
  16543. +++ b/kernel/sched/completion.c
  16544. @@ -30,10 +30,10 @@ void complete(struct completion *x)
  16545. {
  16546. unsigned long flags;
  16547. - spin_lock_irqsave(&x->wait.lock, flags);
  16548. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  16549. x->done++;
  16550. - __wake_up_locked(&x->wait, TASK_NORMAL, 1);
  16551. - spin_unlock_irqrestore(&x->wait.lock, flags);
  16552. + swake_up_locked(&x->wait);
  16553. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  16554. }
  16555. EXPORT_SYMBOL(complete);
  16556. @@ -50,10 +50,10 @@ void complete_all(struct completion *x)
  16557. {
  16558. unsigned long flags;
  16559. - spin_lock_irqsave(&x->wait.lock, flags);
  16560. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  16561. x->done += UINT_MAX/2;
  16562. - __wake_up_locked(&x->wait, TASK_NORMAL, 0);
  16563. - spin_unlock_irqrestore(&x->wait.lock, flags);
  16564. + swake_up_all_locked(&x->wait);
  16565. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  16566. }
  16567. EXPORT_SYMBOL(complete_all);
  16568. @@ -62,20 +62,20 @@ do_wait_for_common(struct completion *x,
  16569. long (*action)(long), long timeout, int state)
  16570. {
  16571. if (!x->done) {
  16572. - DECLARE_WAITQUEUE(wait, current);
  16573. + DECLARE_SWAITQUEUE(wait);
  16574. - __add_wait_queue_tail_exclusive(&x->wait, &wait);
  16575. + __prepare_to_swait(&x->wait, &wait);
  16576. do {
  16577. if (signal_pending_state(state, current)) {
  16578. timeout = -ERESTARTSYS;
  16579. break;
  16580. }
  16581. __set_current_state(state);
  16582. - spin_unlock_irq(&x->wait.lock);
  16583. + raw_spin_unlock_irq(&x->wait.lock);
  16584. timeout = action(timeout);
  16585. - spin_lock_irq(&x->wait.lock);
  16586. + raw_spin_lock_irq(&x->wait.lock);
  16587. } while (!x->done && timeout);
  16588. - __remove_wait_queue(&x->wait, &wait);
  16589. + __finish_swait(&x->wait, &wait);
  16590. if (!x->done)
  16591. return timeout;
  16592. }
  16593. @@ -89,9 +89,9 @@ __wait_for_common(struct completion *x,
  16594. {
  16595. might_sleep();
  16596. - spin_lock_irq(&x->wait.lock);
  16597. + raw_spin_lock_irq(&x->wait.lock);
  16598. timeout = do_wait_for_common(x, action, timeout, state);
  16599. - spin_unlock_irq(&x->wait.lock);
  16600. + raw_spin_unlock_irq(&x->wait.lock);
  16601. return timeout;
  16602. }
  16603. @@ -277,12 +277,12 @@ bool try_wait_for_completion(struct completion *x)
  16604. if (!READ_ONCE(x->done))
  16605. return 0;
  16606. - spin_lock_irqsave(&x->wait.lock, flags);
  16607. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  16608. if (!x->done)
  16609. ret = 0;
  16610. else
  16611. x->done--;
  16612. - spin_unlock_irqrestore(&x->wait.lock, flags);
  16613. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  16614. return ret;
  16615. }
  16616. EXPORT_SYMBOL(try_wait_for_completion);
  16617. @@ -311,7 +311,7 @@ bool completion_done(struct completion *x)
  16618. * after it's acquired the lock.
  16619. */
  16620. smp_rmb();
  16621. - spin_unlock_wait(&x->wait.lock);
  16622. + raw_spin_unlock_wait(&x->wait.lock);
  16623. return true;
  16624. }
  16625. EXPORT_SYMBOL(completion_done);
  16626. diff --git a/kernel/sched/core.c b/kernel/sched/core.c
  16627. index e5066955cc3a..ed1ebcc2ff3d 100644
  16628. --- a/kernel/sched/core.c
  16629. +++ b/kernel/sched/core.c
  16630. @@ -129,7 +129,11 @@ const_debug unsigned int sysctl_sched_features =
  16631. * Number of tasks to iterate in a single balance run.
  16632. * Limited because this is done with IRQs disabled.
  16633. */
  16634. +#ifndef CONFIG_PREEMPT_RT_FULL
  16635. const_debug unsigned int sysctl_sched_nr_migrate = 32;
  16636. +#else
  16637. +const_debug unsigned int sysctl_sched_nr_migrate = 8;
  16638. +#endif
  16639. /*
  16640. * period over which we average the RT time consumption, measured
  16641. @@ -345,6 +349,7 @@ static void init_rq_hrtick(struct rq *rq)
  16642. hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  16643. rq->hrtick_timer.function = hrtick;
  16644. + rq->hrtick_timer.irqsafe = 1;
  16645. }
  16646. #else /* CONFIG_SCHED_HRTICK */
  16647. static inline void hrtick_clear(struct rq *rq)
  16648. @@ -425,9 +430,15 @@ static bool set_nr_if_polling(struct task_struct *p)
  16649. #endif
  16650. #endif
  16651. -void wake_q_add(struct wake_q_head *head, struct task_struct *task)
  16652. +void __wake_q_add(struct wake_q_head *head, struct task_struct *task,
  16653. + bool sleeper)
  16654. {
  16655. - struct wake_q_node *node = &task->wake_q;
  16656. + struct wake_q_node *node;
  16657. +
  16658. + if (sleeper)
  16659. + node = &task->wake_q_sleeper;
  16660. + else
  16661. + node = &task->wake_q;
  16662. /*
  16663. * Atomically grab the task, if ->wake_q is !nil already it means
  16664. @@ -449,24 +460,33 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
  16665. head->lastp = &node->next;
  16666. }
  16667. -void wake_up_q(struct wake_q_head *head)
  16668. +void __wake_up_q(struct wake_q_head *head, bool sleeper)
  16669. {
  16670. struct wake_q_node *node = head->first;
  16671. while (node != WAKE_Q_TAIL) {
  16672. struct task_struct *task;
  16673. - task = container_of(node, struct task_struct, wake_q);
  16674. + if (sleeper)
  16675. + task = container_of(node, struct task_struct, wake_q_sleeper);
  16676. + else
  16677. + task = container_of(node, struct task_struct, wake_q);
  16678. BUG_ON(!task);
  16679. /* task can safely be re-inserted now */
  16680. node = node->next;
  16681. - task->wake_q.next = NULL;
  16682. + if (sleeper)
  16683. + task->wake_q_sleeper.next = NULL;
  16684. + else
  16685. + task->wake_q.next = NULL;
  16686. /*
  16687. * wake_up_process() implies a wmb() to pair with the queueing
  16688. * in wake_q_add() so as not to miss wakeups.
  16689. */
  16690. - wake_up_process(task);
  16691. + if (sleeper)
  16692. + wake_up_lock_sleeper(task);
  16693. + else
  16694. + wake_up_process(task);
  16695. put_task_struct(task);
  16696. }
  16697. }
  16698. @@ -502,6 +522,38 @@ void resched_curr(struct rq *rq)
  16699. trace_sched_wake_idle_without_ipi(cpu);
  16700. }
  16701. +#ifdef CONFIG_PREEMPT_LAZY
  16702. +void resched_curr_lazy(struct rq *rq)
  16703. +{
  16704. + struct task_struct *curr = rq->curr;
  16705. + int cpu;
  16706. +
  16707. + if (!sched_feat(PREEMPT_LAZY)) {
  16708. + resched_curr(rq);
  16709. + return;
  16710. + }
  16711. +
  16712. + lockdep_assert_held(&rq->lock);
  16713. +
  16714. + if (test_tsk_need_resched(curr))
  16715. + return;
  16716. +
  16717. + if (test_tsk_need_resched_lazy(curr))
  16718. + return;
  16719. +
  16720. + set_tsk_need_resched_lazy(curr);
  16721. +
  16722. + cpu = cpu_of(rq);
  16723. + if (cpu == smp_processor_id())
  16724. + return;
  16725. +
  16726. + /* NEED_RESCHED_LAZY must be visible before we test polling */
  16727. + smp_mb();
  16728. + if (!tsk_is_polling(curr))
  16729. + smp_send_reschedule(cpu);
  16730. +}
  16731. +#endif
  16732. +
  16733. void resched_cpu(int cpu)
  16734. {
  16735. struct rq *rq = cpu_rq(cpu);
  16736. @@ -524,11 +576,14 @@ void resched_cpu(int cpu)
  16737. */
  16738. int get_nohz_timer_target(void)
  16739. {
  16740. - int i, cpu = smp_processor_id();
  16741. + int i, cpu;
  16742. struct sched_domain *sd;
  16743. + preempt_disable_rt();
  16744. + cpu = smp_processor_id();
  16745. +
  16746. if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu))
  16747. - return cpu;
  16748. + goto preempt_en_rt;
  16749. rcu_read_lock();
  16750. for_each_domain(cpu, sd) {
  16751. @@ -547,6 +602,8 @@ int get_nohz_timer_target(void)
  16752. cpu = housekeeping_any_cpu();
  16753. unlock:
  16754. rcu_read_unlock();
  16755. +preempt_en_rt:
  16756. + preempt_enable_rt();
  16757. return cpu;
  16758. }
  16759. /*
  16760. @@ -1092,7 +1149,8 @@ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_ma
  16761. p->nr_cpus_allowed = cpumask_weight(new_mask);
  16762. }
  16763. -void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  16764. +static void __do_set_cpus_allowed_tail(struct task_struct *p,
  16765. + const struct cpumask *new_mask)
  16766. {
  16767. struct rq *rq = task_rq(p);
  16768. bool queued, running;
  16769. @@ -1121,6 +1179,98 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  16770. set_curr_task(rq, p);
  16771. }
  16772. +void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  16773. +{
  16774. + if (__migrate_disabled(p)) {
  16775. + lockdep_assert_held(&p->pi_lock);
  16776. +
  16777. + cpumask_copy(&p->cpus_allowed, new_mask);
  16778. +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
  16779. + p->migrate_disable_update = 1;
  16780. +#endif
  16781. + return;
  16782. + }
  16783. + __do_set_cpus_allowed_tail(p, new_mask);
  16784. +}
  16785. +
  16786. +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
  16787. +static DEFINE_MUTEX(sched_down_mutex);
  16788. +static cpumask_t sched_down_cpumask;
  16789. +
  16790. +void tell_sched_cpu_down_begin(int cpu)
  16791. +{
  16792. + mutex_lock(&sched_down_mutex);
  16793. + cpumask_set_cpu(cpu, &sched_down_cpumask);
  16794. + mutex_unlock(&sched_down_mutex);
  16795. +}
  16796. +
  16797. +void tell_sched_cpu_down_done(int cpu)
  16798. +{
  16799. + mutex_lock(&sched_down_mutex);
  16800. + cpumask_clear_cpu(cpu, &sched_down_cpumask);
  16801. + mutex_unlock(&sched_down_mutex);
  16802. +}
  16803. +
  16804. +/**
  16805. + * migrate_me - try to move the current task off this cpu
  16806. + *
  16807. + * Used by the pin_current_cpu() code to try to get tasks
  16808. + * to move off the current CPU as it is going down.
  16809. + * It will only move the task if the task isn't pinned to
  16810. + * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
  16811. + * and the task has to be in a RUNNING state. Otherwise the
  16812. + * movement of the task will wake it up (change its state
  16813. + * to running) when the task did not expect it.
  16814. + *
  16815. + * Returns 1 if it succeeded in moving the current task
  16816. + * 0 otherwise.
  16817. + */
  16818. +int migrate_me(void)
  16819. +{
  16820. + struct task_struct *p = current;
  16821. + struct migration_arg arg;
  16822. + struct cpumask *cpumask;
  16823. + struct cpumask *mask;
  16824. + unsigned int dest_cpu;
  16825. + struct rq_flags rf;
  16826. + struct rq *rq;
  16827. +
  16828. + /*
  16829. + * We can not migrate tasks bounded to a CPU or tasks not
  16830. + * running. The movement of the task will wake it up.
  16831. + */
  16832. + if (p->flags & PF_NO_SETAFFINITY || p->state)
  16833. + return 0;
  16834. +
  16835. + mutex_lock(&sched_down_mutex);
  16836. + rq = task_rq_lock(p, &rf);
  16837. +
  16838. + cpumask = this_cpu_ptr(&sched_cpumasks);
  16839. + mask = &p->cpus_allowed;
  16840. +
  16841. + cpumask_andnot(cpumask, mask, &sched_down_cpumask);
  16842. +
  16843. + if (!cpumask_weight(cpumask)) {
  16844. + /* It's only on this CPU? */
  16845. + task_rq_unlock(rq, p, &rf);
  16846. + mutex_unlock(&sched_down_mutex);
  16847. + return 0;
  16848. + }
  16849. +
  16850. + dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
  16851. +
  16852. + arg.task = p;
  16853. + arg.dest_cpu = dest_cpu;
  16854. +
  16855. + task_rq_unlock(rq, p, &rf);
  16856. +
  16857. + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
  16858. + tlb_migrate_finish(p->mm);
  16859. + mutex_unlock(&sched_down_mutex);
  16860. +
  16861. + return 1;
  16862. +}
  16863. +
  16864. /*
  16865. * Change a given task's CPU affinity. Migrate the thread to a
  16866. * proper CPU and schedule it away if the CPU it's executing on
  16867. @@ -1179,7 +1329,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
  16868. }
  16869. /* Can the task run on the task's current CPU? If so, we're done */
  16870. - if (cpumask_test_cpu(task_cpu(p), new_mask))
  16871. + if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
  16872. goto out;
  16873. dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
  16874. @@ -1366,6 +1516,18 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
  16875. return ret;
  16876. }
  16877. +static bool check_task_state(struct task_struct *p, long match_state)
  16878. +{
  16879. + bool match = false;
  16880. +
  16881. + raw_spin_lock_irq(&p->pi_lock);
  16882. + if (p->state == match_state || p->saved_state == match_state)
  16883. + match = true;
  16884. + raw_spin_unlock_irq(&p->pi_lock);
  16885. +
  16886. + return match;
  16887. +}
  16888. +
  16889. /*
  16890. * wait_task_inactive - wait for a thread to unschedule.
  16891. *
  16892. @@ -1410,7 +1572,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
  16893. * is actually now running somewhere else!
  16894. */
  16895. while (task_running(rq, p)) {
  16896. - if (match_state && unlikely(p->state != match_state))
  16897. + if (match_state && !check_task_state(p, match_state))
  16898. return 0;
  16899. cpu_relax();
  16900. }
  16901. @@ -1425,7 +1587,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
  16902. running = task_running(rq, p);
  16903. queued = task_on_rq_queued(p);
  16904. ncsw = 0;
  16905. - if (!match_state || p->state == match_state)
  16906. + if (!match_state || p->state == match_state ||
  16907. + p->saved_state == match_state)
  16908. ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
  16909. task_rq_unlock(rq, p, &rf);
  16910. @@ -1680,10 +1843,6 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl
  16911. {
  16912. activate_task(rq, p, en_flags);
  16913. p->on_rq = TASK_ON_RQ_QUEUED;
  16914. -
  16915. - /* if a worker is waking up, notify workqueue */
  16916. - if (p->flags & PF_WQ_WORKER)
  16917. - wq_worker_waking_up(p, cpu_of(rq));
  16918. }
  16919. /*
  16920. @@ -2018,8 +2177,27 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
  16921. */
  16922. smp_mb__before_spinlock();
  16923. raw_spin_lock_irqsave(&p->pi_lock, flags);
  16924. - if (!(p->state & state))
  16925. + if (!(p->state & state)) {
  16926. + /*
  16927. + * The task might be running due to a spinlock sleeper
  16928. + * wakeup. Check the saved state and set it to running
  16929. + * if the wakeup condition is true.
  16930. + */
  16931. + if (!(wake_flags & WF_LOCK_SLEEPER)) {
  16932. + if (p->saved_state & state) {
  16933. + p->saved_state = TASK_RUNNING;
  16934. + success = 1;
  16935. + }
  16936. + }
  16937. goto out;
  16938. + }
  16939. +
  16940. + /*
  16941. + * If this is a regular wakeup, then we can unconditionally
  16942. + * clear the saved state of a "lock sleeper".
  16943. + */
  16944. + if (!(wake_flags & WF_LOCK_SLEEPER))
  16945. + p->saved_state = TASK_RUNNING;
  16946. trace_sched_waking(p);
  16947. @@ -2101,53 +2279,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
  16948. return success;
  16949. }
  16950. -/**
  16951. - * try_to_wake_up_local - try to wake up a local task with rq lock held
  16952. - * @p: the thread to be awakened
  16953. - * @cookie: context's cookie for pinning
  16954. - *
  16955. - * Put @p on the run-queue if it's not already there. The caller must
  16956. - * ensure that this_rq() is locked, @p is bound to this_rq() and not
  16957. - * the current task.
  16958. - */
  16959. -static void try_to_wake_up_local(struct task_struct *p, struct pin_cookie cookie)
  16960. -{
  16961. - struct rq *rq = task_rq(p);
  16962. -
  16963. - if (WARN_ON_ONCE(rq != this_rq()) ||
  16964. - WARN_ON_ONCE(p == current))
  16965. - return;
  16966. -
  16967. - lockdep_assert_held(&rq->lock);
  16968. -
  16969. - if (!raw_spin_trylock(&p->pi_lock)) {
  16970. - /*
  16971. - * This is OK, because current is on_cpu, which avoids it being
  16972. - * picked for load-balance and preemption/IRQs are still
  16973. - * disabled avoiding further scheduler activity on it and we've
  16974. - * not yet picked a replacement task.
  16975. - */
  16976. - lockdep_unpin_lock(&rq->lock, cookie);
  16977. - raw_spin_unlock(&rq->lock);
  16978. - raw_spin_lock(&p->pi_lock);
  16979. - raw_spin_lock(&rq->lock);
  16980. - lockdep_repin_lock(&rq->lock, cookie);
  16981. - }
  16982. -
  16983. - if (!(p->state & TASK_NORMAL))
  16984. - goto out;
  16985. -
  16986. - trace_sched_waking(p);
  16987. -
  16988. - if (!task_on_rq_queued(p))
  16989. - ttwu_activate(rq, p, ENQUEUE_WAKEUP);
  16990. -
  16991. - ttwu_do_wakeup(rq, p, 0, cookie);
  16992. - ttwu_stat(p, smp_processor_id(), 0);
  16993. -out:
  16994. - raw_spin_unlock(&p->pi_lock);
  16995. -}
  16996. -
  16997. /**
  16998. * wake_up_process - Wake up a specific process
  16999. * @p: The process to be woken up.
  17000. @@ -2166,6 +2297,18 @@ int wake_up_process(struct task_struct *p)
  17001. }
  17002. EXPORT_SYMBOL(wake_up_process);
  17003. +/**
  17004. + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
  17005. + * @p: The process to be woken up.
  17006. + *
  17007. + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
  17008. + * the nature of the wakeup.
  17009. + */
  17010. +int wake_up_lock_sleeper(struct task_struct *p)
  17011. +{
  17012. + return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER);
  17013. +}
  17014. +
  17015. int wake_up_state(struct task_struct *p, unsigned int state)
  17016. {
  17017. return try_to_wake_up(p, state, 0);
  17018. @@ -2442,6 +2585,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
  17019. p->on_cpu = 0;
  17020. #endif
  17021. init_task_preempt_count(p);
  17022. +#ifdef CONFIG_HAVE_PREEMPT_LAZY
  17023. + task_thread_info(p)->preempt_lazy_count = 0;
  17024. +#endif
  17025. #ifdef CONFIG_SMP
  17026. plist_node_init(&p->pushable_tasks, MAX_PRIO);
  17027. RB_CLEAR_NODE(&p->pushable_dl_tasks);
  17028. @@ -2770,21 +2916,16 @@ static struct rq *finish_task_switch(struct task_struct *prev)
  17029. finish_arch_post_lock_switch();
  17030. fire_sched_in_preempt_notifiers(current);
  17031. + /*
  17032. + * We use mmdrop_delayed() here so we don't have to do the
  17033. + * full __mmdrop() when we are the last user.
  17034. + */
  17035. if (mm)
  17036. - mmdrop(mm);
  17037. + mmdrop_delayed(mm);
  17038. if (unlikely(prev_state == TASK_DEAD)) {
  17039. if (prev->sched_class->task_dead)
  17040. prev->sched_class->task_dead(prev);
  17041. - /*
  17042. - * Remove function-return probe instances associated with this
  17043. - * task and put them back on the free list.
  17044. - */
  17045. - kprobe_flush_task(prev);
  17046. -
  17047. - /* Task is done with its stack. */
  17048. - put_task_stack(prev);
  17049. -
  17050. put_task_struct(prev);
  17051. }
  17052. @@ -3252,6 +3393,114 @@ static inline void schedule_debug(struct task_struct *prev)
  17053. schedstat_inc(this_rq()->sched_count);
  17054. }
  17055. +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
  17056. +
  17057. +void migrate_disable(void)
  17058. +{
  17059. + struct task_struct *p = current;
  17060. +
  17061. + if (in_atomic() || irqs_disabled()) {
  17062. +#ifdef CONFIG_SCHED_DEBUG
  17063. + p->migrate_disable_atomic++;
  17064. +#endif
  17065. + return;
  17066. + }
  17067. +
  17068. +#ifdef CONFIG_SCHED_DEBUG
  17069. + if (unlikely(p->migrate_disable_atomic)) {
  17070. + tracing_off();
  17071. + WARN_ON_ONCE(1);
  17072. + }
  17073. +#endif
  17074. +
  17075. + if (p->migrate_disable) {
  17076. + p->migrate_disable++;
  17077. + return;
  17078. + }
  17079. +
  17080. + preempt_disable();
  17081. + preempt_lazy_disable();
  17082. + pin_current_cpu();
  17083. + p->migrate_disable = 1;
  17084. + preempt_enable();
  17085. +}
  17086. +EXPORT_SYMBOL(migrate_disable);
  17087. +
  17088. +void migrate_enable(void)
  17089. +{
  17090. + struct task_struct *p = current;
  17091. +
  17092. + if (in_atomic() || irqs_disabled()) {
  17093. +#ifdef CONFIG_SCHED_DEBUG
  17094. + p->migrate_disable_atomic--;
  17095. +#endif
  17096. + return;
  17097. + }
  17098. +
  17099. +#ifdef CONFIG_SCHED_DEBUG
  17100. + if (unlikely(p->migrate_disable_atomic)) {
  17101. + tracing_off();
  17102. + WARN_ON_ONCE(1);
  17103. + }
  17104. +#endif
  17105. + WARN_ON_ONCE(p->migrate_disable <= 0);
  17106. +
  17107. + if (p->migrate_disable > 1) {
  17108. + p->migrate_disable--;
  17109. + return;
  17110. + }
  17111. +
  17112. + preempt_disable();
  17113. + /*
  17114. + * Clearing migrate_disable causes tsk_cpus_allowed to
  17115. + * show the tasks original cpu affinity.
  17116. + */
  17117. + p->migrate_disable = 0;
  17118. +
  17119. + if (p->migrate_disable_update) {
  17120. + struct rq *rq;
  17121. + struct rq_flags rf;
  17122. +
  17123. + rq = task_rq_lock(p, &rf);
  17124. + update_rq_clock(rq);
  17125. +
  17126. + __do_set_cpus_allowed_tail(p, &p->cpus_allowed);
  17127. + task_rq_unlock(rq, p, &rf);
  17128. +
  17129. + p->migrate_disable_update = 0;
  17130. +
  17131. + WARN_ON(smp_processor_id() != task_cpu(p));
  17132. + if (!cpumask_test_cpu(task_cpu(p), &p->cpus_allowed)) {
  17133. + const struct cpumask *cpu_valid_mask = cpu_active_mask;
  17134. + struct migration_arg arg;
  17135. + unsigned int dest_cpu;
  17136. +
  17137. + if (p->flags & PF_KTHREAD) {
  17138. + /*
  17139. + * Kernel threads are allowed on online && !active CPUs
  17140. + */
  17141. + cpu_valid_mask = cpu_online_mask;
  17142. + }
  17143. + dest_cpu = cpumask_any_and(cpu_valid_mask, &p->cpus_allowed);
  17144. + arg.task = p;
  17145. + arg.dest_cpu = dest_cpu;
  17146. +
  17147. + unpin_current_cpu();
  17148. + preempt_lazy_enable();
  17149. + preempt_enable();
  17150. + stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
  17151. + tlb_migrate_finish(p->mm);
  17152. + return;
  17153. + }
  17154. + }
  17155. +
  17156. + unpin_current_cpu();
  17157. + preempt_enable();
  17158. + preempt_lazy_enable();
  17159. +}
  17160. +EXPORT_SYMBOL(migrate_enable);
  17161. +#endif
  17162. +
  17163. /*
  17164. * Pick up the highest-prio task:
  17165. */
  17166. @@ -3368,19 +3617,6 @@ static void __sched notrace __schedule(bool preempt)
  17167. } else {
  17168. deactivate_task(rq, prev, DEQUEUE_SLEEP);
  17169. prev->on_rq = 0;
  17170. -
  17171. - /*
  17172. - * If a worker went to sleep, notify and ask workqueue
  17173. - * whether it wants to wake up a task to maintain
  17174. - * concurrency.
  17175. - */
  17176. - if (prev->flags & PF_WQ_WORKER) {
  17177. - struct task_struct *to_wakeup;
  17178. -
  17179. - to_wakeup = wq_worker_sleeping(prev);
  17180. - if (to_wakeup)
  17181. - try_to_wake_up_local(to_wakeup, cookie);
  17182. - }
  17183. }
  17184. switch_count = &prev->nvcsw;
  17185. }
  17186. @@ -3390,6 +3626,7 @@ static void __sched notrace __schedule(bool preempt)
  17187. next = pick_next_task(rq, prev, cookie);
  17188. clear_tsk_need_resched(prev);
  17189. + clear_tsk_need_resched_lazy(prev);
  17190. clear_preempt_need_resched();
  17191. rq->clock_skip_update = 0;
  17192. @@ -3437,8 +3674,19 @@ void __noreturn do_task_dead(void)
  17193. static inline void sched_submit_work(struct task_struct *tsk)
  17194. {
  17195. - if (!tsk->state || tsk_is_pi_blocked(tsk))
  17196. + if (!tsk->state)
  17197. return;
  17198. + /*
  17199. + * If a worker went to sleep, notify and ask workqueue whether
  17200. + * it wants to wake up a task to maintain concurrency.
  17201. + */
  17202. + if (tsk->flags & PF_WQ_WORKER)
  17203. + wq_worker_sleeping(tsk);
  17204. +
  17205. +
  17206. + if (tsk_is_pi_blocked(tsk))
  17207. + return;
  17208. +
  17209. /*
  17210. * If we are going to sleep and we have plugged IO queued,
  17211. * make sure to submit it to avoid deadlocks.
  17212. @@ -3447,6 +3695,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
  17213. blk_schedule_flush_plug(tsk);
  17214. }
  17215. +static void sched_update_worker(struct task_struct *tsk)
  17216. +{
  17217. + if (tsk->flags & PF_WQ_WORKER)
  17218. + wq_worker_running(tsk);
  17219. +}
  17220. +
  17221. asmlinkage __visible void __sched schedule(void)
  17222. {
  17223. struct task_struct *tsk = current;
  17224. @@ -3457,6 +3711,7 @@ asmlinkage __visible void __sched schedule(void)
  17225. __schedule(false);
  17226. sched_preempt_enable_no_resched();
  17227. } while (need_resched());
  17228. + sched_update_worker(tsk);
  17229. }
  17230. EXPORT_SYMBOL(schedule);
  17231. @@ -3520,6 +3775,30 @@ static void __sched notrace preempt_schedule_common(void)
  17232. } while (need_resched());
  17233. }
  17234. +#ifdef CONFIG_PREEMPT_LAZY
  17235. +/*
  17236. + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
  17237. + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
  17238. + * preempt_lazy_count counter >0.
  17239. + */
  17240. +static __always_inline int preemptible_lazy(void)
  17241. +{
  17242. + if (test_thread_flag(TIF_NEED_RESCHED))
  17243. + return 1;
  17244. + if (current_thread_info()->preempt_lazy_count)
  17245. + return 0;
  17246. + return 1;
  17247. +}
  17248. +
  17249. +#else
  17250. +
  17251. +static inline int preemptible_lazy(void)
  17252. +{
  17253. + return 1;
  17254. +}
  17255. +
  17256. +#endif
  17257. +
  17258. #ifdef CONFIG_PREEMPT
  17259. /*
  17260. * this is the entry point to schedule() from in-kernel preemption
  17261. @@ -3534,7 +3813,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
  17262. */
  17263. if (likely(!preemptible()))
  17264. return;
  17265. -
  17266. + if (!preemptible_lazy())
  17267. + return;
  17268. preempt_schedule_common();
  17269. }
  17270. NOKPROBE_SYMBOL(preempt_schedule);
  17271. @@ -3561,6 +3841,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
  17272. if (likely(!preemptible()))
  17273. return;
  17274. + if (!preemptible_lazy())
  17275. + return;
  17276. +
  17277. do {
  17278. /*
  17279. * Because the function tracer can trace preempt_count_sub()
  17280. @@ -3583,7 +3866,16 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
  17281. * an infinite recursion.
  17282. */
  17283. prev_ctx = exception_enter();
  17284. + /*
  17285. + * The add/subtract must not be traced by the function
  17286. + * tracer. But we still want to account for the
  17287. + * preempt off latency tracer. Since the _notrace versions
  17288. + * of add/subtract skip the accounting for latency tracer
  17289. + * we must force it manually.
  17290. + */
  17291. + start_critical_timings();
  17292. __schedule(true);
  17293. + stop_critical_timings();
  17294. exception_exit(prev_ctx);
  17295. preempt_latency_stop(1);
  17296. @@ -3629,10 +3921,25 @@ EXPORT_SYMBOL(default_wake_function);
  17297. #ifdef CONFIG_RT_MUTEXES
  17298. +static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
  17299. +{
  17300. + if (pi_task)
  17301. + prio = min(prio, pi_task->prio);
  17302. +
  17303. + return prio;
  17304. +}
  17305. +
  17306. +static inline int rt_effective_prio(struct task_struct *p, int prio)
  17307. +{
  17308. + struct task_struct *pi_task = rt_mutex_get_top_task(p);
  17309. +
  17310. + return __rt_effective_prio(pi_task, prio);
  17311. +}
  17312. +
  17313. /*
  17314. * rt_mutex_setprio - set the current priority of a task
  17315. - * @p: task
  17316. - * @prio: prio value (kernel-internal form)
  17317. + * @p: task to boost
  17318. + * @pi_task: donor task
  17319. *
  17320. * This function changes the 'effective' priority of a task. It does
  17321. * not touch ->normal_prio like __setscheduler().
  17322. @@ -3640,16 +3947,40 @@ EXPORT_SYMBOL(default_wake_function);
  17323. * Used by the rt_mutex code to implement priority inheritance
  17324. * logic. Call site only calls if the priority of the task changed.
  17325. */
  17326. -void rt_mutex_setprio(struct task_struct *p, int prio)
  17327. +void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
  17328. {
  17329. - int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
  17330. + int prio, oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
  17331. const struct sched_class *prev_class;
  17332. struct rq_flags rf;
  17333. struct rq *rq;
  17334. - BUG_ON(prio > MAX_PRIO);
  17335. + /* XXX used to be waiter->prio, not waiter->task->prio */
  17336. + prio = __rt_effective_prio(pi_task, p->normal_prio);
  17337. +
  17338. + /*
  17339. + * If nothing changed; bail early.
  17340. + */
  17341. + if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio))
  17342. + return;
  17343. rq = __task_rq_lock(p, &rf);
  17344. + /*
  17345. + * Set under pi_lock && rq->lock, such that the value can be used under
  17346. + * either lock.
  17347. + *
  17348. + * Note that there is loads of tricky to make this pointer cache work
  17349. + * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
  17350. + * ensure a task is de-boosted (pi_task is set to NULL) before the
  17351. + * task is allowed to run again (and can exit). This ensures the pointer
  17352. + * points to a blocked task -- which guaratees the task is present.
  17353. + */
  17354. + p->pi_top_task = pi_task;
  17355. +
  17356. + /*
  17357. + * For FIFO/RR we only need to set prio, if that matches we're done.
  17358. + */
  17359. + if (prio == p->prio && !dl_prio(prio))
  17360. + goto out_unlock;
  17361. /*
  17362. * Idle task boosting is a nono in general. There is one
  17363. @@ -3669,7 +4000,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
  17364. goto out_unlock;
  17365. }
  17366. - trace_sched_pi_setprio(p, prio);
  17367. + trace_sched_pi_setprio(p, pi_task);
  17368. oldprio = p->prio;
  17369. if (oldprio == prio)
  17370. @@ -3693,7 +4024,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
  17371. * running task
  17372. */
  17373. if (dl_prio(prio)) {
  17374. - struct task_struct *pi_task = rt_mutex_get_top_task(p);
  17375. if (!dl_prio(p->normal_prio) ||
  17376. (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
  17377. p->dl.dl_boosted = 1;
  17378. @@ -3730,6 +4060,11 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
  17379. balance_callback(rq);
  17380. preempt_enable();
  17381. }
  17382. +#else
  17383. +static inline int rt_effective_prio(struct task_struct *p, int prio)
  17384. +{
  17385. + return prio;
  17386. +}
  17387. #endif
  17388. void set_user_nice(struct task_struct *p, long nice)
  17389. @@ -3974,10 +4309,9 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
  17390. * Keep a potential priority boosting if called from
  17391. * sched_setscheduler().
  17392. */
  17393. + p->prio = normal_prio(p);
  17394. if (keep_boost)
  17395. - p->prio = rt_mutex_get_effective_prio(p, normal_prio(p));
  17396. - else
  17397. - p->prio = normal_prio(p);
  17398. + p->prio = rt_effective_prio(p, p->prio);
  17399. if (dl_prio(p->prio))
  17400. p->sched_class = &dl_sched_class;
  17401. @@ -4264,7 +4598,7 @@ static int __sched_setscheduler(struct task_struct *p,
  17402. * the runqueue. This will be done when the task deboost
  17403. * itself.
  17404. */
  17405. - new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
  17406. + new_effective_prio = rt_effective_prio(p, newprio);
  17407. if (new_effective_prio == oldprio)
  17408. queue_flags &= ~DEQUEUE_MOVE;
  17409. }
  17410. @@ -4939,6 +5273,7 @@ int __cond_resched_lock(spinlock_t *lock)
  17411. }
  17412. EXPORT_SYMBOL(__cond_resched_lock);
  17413. +#ifndef CONFIG_PREEMPT_RT_FULL
  17414. int __sched __cond_resched_softirq(void)
  17415. {
  17416. BUG_ON(!in_softirq());
  17417. @@ -4952,6 +5287,7 @@ int __sched __cond_resched_softirq(void)
  17418. return 0;
  17419. }
  17420. EXPORT_SYMBOL(__cond_resched_softirq);
  17421. +#endif
  17422. /**
  17423. * yield - yield the current processor to other threads.
  17424. @@ -5315,7 +5651,9 @@ void init_idle(struct task_struct *idle, int cpu)
  17425. /* Set the preempt count _outside_ the spinlocks! */
  17426. init_idle_preempt_count(idle, cpu);
  17427. -
  17428. +#ifdef CONFIG_HAVE_PREEMPT_LAZY
  17429. + task_thread_info(idle)->preempt_lazy_count = 0;
  17430. +#endif
  17431. /*
  17432. * The idle tasks have their own, simple scheduling class:
  17433. */
  17434. @@ -5458,6 +5796,8 @@ void sched_setnuma(struct task_struct *p, int nid)
  17435. #endif /* CONFIG_NUMA_BALANCING */
  17436. #ifdef CONFIG_HOTPLUG_CPU
  17437. +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
  17438. +
  17439. /*
  17440. * Ensures that the idle task is using init_mm right before its cpu goes
  17441. * offline.
  17442. @@ -5472,7 +5812,12 @@ void idle_task_exit(void)
  17443. switch_mm(mm, &init_mm, current);
  17444. finish_arch_post_lock_switch();
  17445. }
  17446. - mmdrop(mm);
  17447. + /*
  17448. + * Defer the cleanup to an alive cpu. On RT we can neither
  17449. + * call mmdrop() nor mmdrop_delayed() from here.
  17450. + */
  17451. + per_cpu(idle_last_mm, smp_processor_id()) = mm;
  17452. +
  17453. }
  17454. /*
  17455. @@ -5881,6 +6226,7 @@ static int init_rootdomain(struct root_domain *rd)
  17456. rd->rto_cpu = -1;
  17457. raw_spin_lock_init(&rd->rto_lock);
  17458. init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
  17459. + rd->rto_push_work.flags |= IRQ_WORK_HARD_IRQ;
  17460. #endif
  17461. init_dl_bw(&rd->dl_bw);
  17462. @@ -7439,6 +7785,10 @@ int sched_cpu_dying(unsigned int cpu)
  17463. update_max_interval();
  17464. nohz_balance_exit_idle(cpu);
  17465. hrtick_clear(rq);
  17466. + if (per_cpu(idle_last_mm, cpu)) {
  17467. + mmdrop_delayed(per_cpu(idle_last_mm, cpu));
  17468. + per_cpu(idle_last_mm, cpu) = NULL;
  17469. + }
  17470. return 0;
  17471. }
  17472. #endif
  17473. @@ -7700,7 +8050,7 @@ void __init sched_init(void)
  17474. #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  17475. static inline int preempt_count_equals(int preempt_offset)
  17476. {
  17477. - int nested = preempt_count() + rcu_preempt_depth();
  17478. + int nested = preempt_count() + sched_rcu_preempt_depth();
  17479. return (nested == preempt_offset);
  17480. }
  17481. diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
  17482. index df5c32a0c6ed..c77fd444dc3c 100644
  17483. --- a/kernel/sched/deadline.c
  17484. +++ b/kernel/sched/deadline.c
  17485. @@ -693,6 +693,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
  17486. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  17487. timer->function = dl_task_timer;
  17488. + timer->irqsafe = 1;
  17489. }
  17490. /*
  17491. diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
  17492. index fa178b62ea79..935224123441 100644
  17493. --- a/kernel/sched/debug.c
  17494. +++ b/kernel/sched/debug.c
  17495. @@ -558,6 +558,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
  17496. P(rt_throttled);
  17497. PN(rt_time);
  17498. PN(rt_runtime);
  17499. +#ifdef CONFIG_SMP
  17500. + P(rt_nr_migratory);
  17501. +#endif
  17502. #undef PN
  17503. #undef P
  17504. @@ -953,6 +956,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
  17505. #endif
  17506. P(policy);
  17507. P(prio);
  17508. +#ifdef CONFIG_PREEMPT_RT_FULL
  17509. + P(migrate_disable);
  17510. +#endif
  17511. + P(nr_cpus_allowed);
  17512. #undef PN_SCHEDSTAT
  17513. #undef PN
  17514. #undef __PN
  17515. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  17516. index 3d862f5b0331..c6db32c0c557 100644
  17517. --- a/kernel/sched/fair.c
  17518. +++ b/kernel/sched/fair.c
  17519. @@ -3518,7 +3518,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
  17520. ideal_runtime = sched_slice(cfs_rq, curr);
  17521. delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
  17522. if (delta_exec > ideal_runtime) {
  17523. - resched_curr(rq_of(cfs_rq));
  17524. + resched_curr_lazy(rq_of(cfs_rq));
  17525. /*
  17526. * The current task ran long enough, ensure it doesn't get
  17527. * re-elected due to buddy favours.
  17528. @@ -3542,7 +3542,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
  17529. return;
  17530. if (delta > ideal_runtime)
  17531. - resched_curr(rq_of(cfs_rq));
  17532. + resched_curr_lazy(rq_of(cfs_rq));
  17533. }
  17534. static void
  17535. @@ -3684,7 +3684,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
  17536. * validating it and just reschedule.
  17537. */
  17538. if (queued) {
  17539. - resched_curr(rq_of(cfs_rq));
  17540. + resched_curr_lazy(rq_of(cfs_rq));
  17541. return;
  17542. }
  17543. /*
  17544. @@ -3866,7 +3866,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
  17545. * hierarchy can be throttled
  17546. */
  17547. if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
  17548. - resched_curr(rq_of(cfs_rq));
  17549. + resched_curr_lazy(rq_of(cfs_rq));
  17550. }
  17551. static __always_inline
  17552. @@ -4494,7 +4494,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
  17553. if (delta < 0) {
  17554. if (rq->curr == p)
  17555. - resched_curr(rq);
  17556. + resched_curr_lazy(rq);
  17557. return;
  17558. }
  17559. hrtick_start(rq, delta);
  17560. @@ -5862,7 +5862,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
  17561. return;
  17562. preempt:
  17563. - resched_curr(rq);
  17564. + resched_curr_lazy(rq);
  17565. /*
  17566. * Only set the backward buddy when the current task is still
  17567. * on the rq. This can happen when a wakeup gets interleaved
  17568. @@ -8588,7 +8588,7 @@ static void task_fork_fair(struct task_struct *p)
  17569. * 'current' within the tree based on its new key value.
  17570. */
  17571. swap(curr->vruntime, se->vruntime);
  17572. - resched_curr(rq);
  17573. + resched_curr_lazy(rq);
  17574. }
  17575. se->vruntime -= cfs_rq->min_vruntime;
  17576. @@ -8612,7 +8612,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
  17577. */
  17578. if (rq->curr == p) {
  17579. if (p->prio > oldprio)
  17580. - resched_curr(rq);
  17581. + resched_curr_lazy(rq);
  17582. } else
  17583. check_preempt_curr(rq, p, 0);
  17584. }
  17585. diff --git a/kernel/sched/features.h b/kernel/sched/features.h
  17586. index 1b3c8189b286..36086f74e011 100644
  17587. --- a/kernel/sched/features.h
  17588. +++ b/kernel/sched/features.h
  17589. @@ -45,11 +45,19 @@ SCHED_FEAT(LB_BIAS, true)
  17590. */
  17591. SCHED_FEAT(NONTASK_CAPACITY, true)
  17592. +#ifdef CONFIG_PREEMPT_RT_FULL
  17593. +SCHED_FEAT(TTWU_QUEUE, false)
  17594. +# ifdef CONFIG_PREEMPT_LAZY
  17595. +SCHED_FEAT(PREEMPT_LAZY, true)
  17596. +# endif
  17597. +#else
  17598. +
  17599. /*
  17600. * Queue remote wakeups on the target CPU and process them
  17601. * using the scheduler IPI. Reduces rq->lock contention/bounces.
  17602. */
  17603. SCHED_FEAT(TTWU_QUEUE, true)
  17604. +#endif
  17605. /*
  17606. * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
  17607. diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
  17608. index 7a360d6f6798..d361629c0f96 100644
  17609. --- a/kernel/sched/rt.c
  17610. +++ b/kernel/sched/rt.c
  17611. @@ -47,6 +47,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
  17612. hrtimer_init(&rt_b->rt_period_timer,
  17613. CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  17614. + rt_b->rt_period_timer.irqsafe = 1;
  17615. rt_b->rt_period_timer.function = sched_rt_period_timer;
  17616. }
  17617. diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
  17618. index cff985feb6e7..280c7d5a7657 100644
  17619. --- a/kernel/sched/sched.h
  17620. +++ b/kernel/sched/sched.h
  17621. @@ -1162,6 +1162,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
  17622. #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
  17623. #define WF_FORK 0x02 /* child wakeup after fork */
  17624. #define WF_MIGRATED 0x4 /* internal use, task got migrated */
  17625. +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */
  17626. /*
  17627. * To aid in avoiding the subversion of "niceness" due to uneven distribution
  17628. @@ -1345,6 +1346,15 @@ extern void init_sched_fair_class(void);
  17629. extern void resched_curr(struct rq *rq);
  17630. extern void resched_cpu(int cpu);
  17631. +#ifdef CONFIG_PREEMPT_LAZY
  17632. +extern void resched_curr_lazy(struct rq *rq);
  17633. +#else
  17634. +static inline void resched_curr_lazy(struct rq *rq)
  17635. +{
  17636. + resched_curr(rq);
  17637. +}
  17638. +#endif
  17639. +
  17640. extern struct rt_bandwidth def_rt_bandwidth;
  17641. extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
  17642. diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
  17643. index 82f0dff90030..ef027ff3250a 100644
  17644. --- a/kernel/sched/swait.c
  17645. +++ b/kernel/sched/swait.c
  17646. @@ -1,5 +1,6 @@
  17647. #include <linux/sched.h>
  17648. #include <linux/swait.h>
  17649. +#include <linux/suspend.h>
  17650. void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
  17651. struct lock_class_key *key)
  17652. @@ -29,6 +30,25 @@ void swake_up_locked(struct swait_queue_head *q)
  17653. }
  17654. EXPORT_SYMBOL(swake_up_locked);
  17655. +void swake_up_all_locked(struct swait_queue_head *q)
  17656. +{
  17657. + struct swait_queue *curr;
  17658. + int wakes = 0;
  17659. +
  17660. + while (!list_empty(&q->task_list)) {
  17661. +
  17662. + curr = list_first_entry(&q->task_list, typeof(*curr),
  17663. + task_list);
  17664. + wake_up_process(curr->task);
  17665. + list_del_init(&curr->task_list);
  17666. + wakes++;
  17667. + }
  17668. + if (pm_in_action)
  17669. + return;
  17670. + WARN(wakes > 2, "complete_all() with %d waiters\n", wakes);
  17671. +}
  17672. +EXPORT_SYMBOL(swake_up_all_locked);
  17673. +
  17674. void swake_up(struct swait_queue_head *q)
  17675. {
  17676. unsigned long flags;
  17677. @@ -54,6 +74,7 @@ void swake_up_all(struct swait_queue_head *q)
  17678. if (!swait_active(q))
  17679. return;
  17680. + WARN_ON(irqs_disabled());
  17681. raw_spin_lock_irq(&q->lock);
  17682. list_splice_init(&q->task_list, &tmp);
  17683. while (!list_empty(&tmp)) {
  17684. diff --git a/kernel/sched/swork.c b/kernel/sched/swork.c
  17685. new file mode 100644
  17686. index 000000000000..1950f40ca725
  17687. --- /dev/null
  17688. +++ b/kernel/sched/swork.c
  17689. @@ -0,0 +1,173 @@
  17690. +/*
  17691. + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de
  17692. + *
  17693. + * Provides a framework for enqueuing callbacks from irq context
  17694. + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context.
  17695. + */
  17696. +
  17697. +#include <linux/swait.h>
  17698. +#include <linux/swork.h>
  17699. +#include <linux/kthread.h>
  17700. +#include <linux/slab.h>
  17701. +#include <linux/spinlock.h>
  17702. +#include <linux/export.h>
  17703. +
  17704. +#define SWORK_EVENT_PENDING (1 << 0)
  17705. +
  17706. +static DEFINE_MUTEX(worker_mutex);
  17707. +static struct sworker *glob_worker;
  17708. +
  17709. +struct sworker {
  17710. + struct list_head events;
  17711. + struct swait_queue_head wq;
  17712. +
  17713. + raw_spinlock_t lock;
  17714. +
  17715. + struct task_struct *task;
  17716. + int refs;
  17717. +};
  17718. +
  17719. +static bool swork_readable(struct sworker *worker)
  17720. +{
  17721. + bool r;
  17722. +
  17723. + if (kthread_should_stop())
  17724. + return true;
  17725. +
  17726. + raw_spin_lock_irq(&worker->lock);
  17727. + r = !list_empty(&worker->events);
  17728. + raw_spin_unlock_irq(&worker->lock);
  17729. +
  17730. + return r;
  17731. +}
  17732. +
  17733. +static int swork_kthread(void *arg)
  17734. +{
  17735. + struct sworker *worker = arg;
  17736. +
  17737. + for (;;) {
  17738. + swait_event_interruptible(worker->wq,
  17739. + swork_readable(worker));
  17740. + if (kthread_should_stop())
  17741. + break;
  17742. +
  17743. + raw_spin_lock_irq(&worker->lock);
  17744. + while (!list_empty(&worker->events)) {
  17745. + struct swork_event *sev;
  17746. +
  17747. + sev = list_first_entry(&worker->events,
  17748. + struct swork_event, item);
  17749. + list_del(&sev->item);
  17750. + raw_spin_unlock_irq(&worker->lock);
  17751. +
  17752. + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING,
  17753. + &sev->flags));
  17754. + sev->func(sev);
  17755. + raw_spin_lock_irq(&worker->lock);
  17756. + }
  17757. + raw_spin_unlock_irq(&worker->lock);
  17758. + }
  17759. + return 0;
  17760. +}
  17761. +
  17762. +static struct sworker *swork_create(void)
  17763. +{
  17764. + struct sworker *worker;
  17765. +
  17766. + worker = kzalloc(sizeof(*worker), GFP_KERNEL);
  17767. + if (!worker)
  17768. + return ERR_PTR(-ENOMEM);
  17769. +
  17770. + INIT_LIST_HEAD(&worker->events);
  17771. + raw_spin_lock_init(&worker->lock);
  17772. + init_swait_queue_head(&worker->wq);
  17773. +
  17774. + worker->task = kthread_run(swork_kthread, worker, "kswork");
  17775. + if (IS_ERR(worker->task)) {
  17776. + kfree(worker);
  17777. + return ERR_PTR(-ENOMEM);
  17778. + }
  17779. +
  17780. + return worker;
  17781. +}
  17782. +
  17783. +static void swork_destroy(struct sworker *worker)
  17784. +{
  17785. + kthread_stop(worker->task);
  17786. +
  17787. + WARN_ON(!list_empty(&worker->events));
  17788. + kfree(worker);
  17789. +}
  17790. +
  17791. +/**
  17792. + * swork_queue - queue swork
  17793. + *
  17794. + * Returns %false if @work was already on a queue, %true otherwise.
  17795. + *
  17796. + * The work is queued and processed on a random CPU
  17797. + */
  17798. +bool swork_queue(struct swork_event *sev)
  17799. +{
  17800. + unsigned long flags;
  17801. +
  17802. + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags))
  17803. + return false;
  17804. +
  17805. + raw_spin_lock_irqsave(&glob_worker->lock, flags);
  17806. + list_add_tail(&sev->item, &glob_worker->events);
  17807. + raw_spin_unlock_irqrestore(&glob_worker->lock, flags);
  17808. +
  17809. + swake_up(&glob_worker->wq);
  17810. + return true;
  17811. +}
  17812. +EXPORT_SYMBOL_GPL(swork_queue);
  17813. +
  17814. +/**
  17815. + * swork_get - get an instance of the sworker
  17816. + *
  17817. + * Returns an negative error code if the initialization if the worker did not
  17818. + * work, %0 otherwise.
  17819. + *
  17820. + */
  17821. +int swork_get(void)
  17822. +{
  17823. + struct sworker *worker;
  17824. +
  17825. + mutex_lock(&worker_mutex);
  17826. + if (!glob_worker) {
  17827. + worker = swork_create();
  17828. + if (IS_ERR(worker)) {
  17829. + mutex_unlock(&worker_mutex);
  17830. + return -ENOMEM;
  17831. + }
  17832. +
  17833. + glob_worker = worker;
  17834. + }
  17835. +
  17836. + glob_worker->refs++;
  17837. + mutex_unlock(&worker_mutex);
  17838. +
  17839. + return 0;
  17840. +}
  17841. +EXPORT_SYMBOL_GPL(swork_get);
  17842. +
  17843. +/**
  17844. + * swork_put - puts an instance of the sworker
  17845. + *
  17846. + * Will destroy the sworker thread. This function must not be called until all
  17847. + * queued events have been completed.
  17848. + */
  17849. +void swork_put(void)
  17850. +{
  17851. + mutex_lock(&worker_mutex);
  17852. +
  17853. + glob_worker->refs--;
  17854. + if (glob_worker->refs > 0)
  17855. + goto out;
  17856. +
  17857. + swork_destroy(glob_worker);
  17858. + glob_worker = NULL;
  17859. +out:
  17860. + mutex_unlock(&worker_mutex);
  17861. +}
  17862. +EXPORT_SYMBOL_GPL(swork_put);
  17863. diff --git a/kernel/signal.c b/kernel/signal.c
  17864. index 7ebe236a5364..4d094ae3a625 100644
  17865. --- a/kernel/signal.c
  17866. +++ b/kernel/signal.c
  17867. @@ -14,6 +14,7 @@
  17868. #include <linux/export.h>
  17869. #include <linux/init.h>
  17870. #include <linux/sched.h>
  17871. +#include <linux/sched/rt.h>
  17872. #include <linux/fs.h>
  17873. #include <linux/tty.h>
  17874. #include <linux/binfmts.h>
  17875. @@ -354,13 +355,30 @@ static bool task_participate_group_stop(struct task_struct *task)
  17876. return false;
  17877. }
  17878. +static inline struct sigqueue *get_task_cache(struct task_struct *t)
  17879. +{
  17880. + struct sigqueue *q = t->sigqueue_cache;
  17881. +
  17882. + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
  17883. + return NULL;
  17884. + return q;
  17885. +}
  17886. +
  17887. +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
  17888. +{
  17889. + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
  17890. + return 0;
  17891. + return 1;
  17892. +}
  17893. +
  17894. /*
  17895. * allocate a new signal queue record
  17896. * - this may be called without locks if and only if t == current, otherwise an
  17897. * appropriate lock must be held to stop the target task from exiting
  17898. */
  17899. static struct sigqueue *
  17900. -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
  17901. +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
  17902. + int override_rlimit, int fromslab)
  17903. {
  17904. struct sigqueue *q = NULL;
  17905. struct user_struct *user;
  17906. @@ -377,7 +395,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
  17907. if (override_rlimit ||
  17908. atomic_read(&user->sigpending) <=
  17909. task_rlimit(t, RLIMIT_SIGPENDING)) {
  17910. - q = kmem_cache_alloc(sigqueue_cachep, flags);
  17911. + if (!fromslab)
  17912. + q = get_task_cache(t);
  17913. + if (!q)
  17914. + q = kmem_cache_alloc(sigqueue_cachep, flags);
  17915. } else {
  17916. print_dropped_signal(sig);
  17917. }
  17918. @@ -394,6 +415,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
  17919. return q;
  17920. }
  17921. +static struct sigqueue *
  17922. +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
  17923. + int override_rlimit)
  17924. +{
  17925. + return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
  17926. +}
  17927. +
  17928. static void __sigqueue_free(struct sigqueue *q)
  17929. {
  17930. if (q->flags & SIGQUEUE_PREALLOC)
  17931. @@ -403,6 +431,21 @@ static void __sigqueue_free(struct sigqueue *q)
  17932. kmem_cache_free(sigqueue_cachep, q);
  17933. }
  17934. +static void sigqueue_free_current(struct sigqueue *q)
  17935. +{
  17936. + struct user_struct *up;
  17937. +
  17938. + if (q->flags & SIGQUEUE_PREALLOC)
  17939. + return;
  17940. +
  17941. + up = q->user;
  17942. + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
  17943. + atomic_dec(&up->sigpending);
  17944. + free_uid(up);
  17945. + } else
  17946. + __sigqueue_free(q);
  17947. +}
  17948. +
  17949. void flush_sigqueue(struct sigpending *queue)
  17950. {
  17951. struct sigqueue *q;
  17952. @@ -415,6 +458,21 @@ void flush_sigqueue(struct sigpending *queue)
  17953. }
  17954. }
  17955. +/*
  17956. + * Called from __exit_signal. Flush tsk->pending and
  17957. + * tsk->sigqueue_cache
  17958. + */
  17959. +void flush_task_sigqueue(struct task_struct *tsk)
  17960. +{
  17961. + struct sigqueue *q;
  17962. +
  17963. + flush_sigqueue(&tsk->pending);
  17964. +
  17965. + q = get_task_cache(tsk);
  17966. + if (q)
  17967. + kmem_cache_free(sigqueue_cachep, q);
  17968. +}
  17969. +
  17970. /*
  17971. * Flush all pending signals for this kthread.
  17972. */
  17973. @@ -534,7 +592,7 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info,
  17974. (info->si_code == SI_TIMER) &&
  17975. (info->si_sys_private);
  17976. - __sigqueue_free(first);
  17977. + sigqueue_free_current(first);
  17978. } else {
  17979. /*
  17980. * Ok, it wasn't in the queue. This must be
  17981. @@ -570,6 +628,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
  17982. bool resched_timer = false;
  17983. int signr;
  17984. + WARN_ON_ONCE(tsk != current);
  17985. +
  17986. /* We only dequeue private signals from ourselves, we don't let
  17987. * signalfd steal them
  17988. */
  17989. @@ -1166,8 +1226,8 @@ int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
  17990. * We don't want to have recursive SIGSEGV's etc, for example,
  17991. * that is why we also clear SIGNAL_UNKILLABLE.
  17992. */
  17993. -int
  17994. -force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  17995. +static int
  17996. +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  17997. {
  17998. unsigned long int flags;
  17999. int ret, blocked, ignored;
  18000. @@ -1192,6 +1252,39 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  18001. return ret;
  18002. }
  18003. +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  18004. +{
  18005. +/*
  18006. + * On some archs, PREEMPT_RT has to delay sending a signal from a trap
  18007. + * since it can not enable preemption, and the signal code's spin_locks
  18008. + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
  18009. + * send the signal on exit of the trap.
  18010. + */
  18011. +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
  18012. + if (in_atomic()) {
  18013. + if (WARN_ON_ONCE(t != current))
  18014. + return 0;
  18015. + if (WARN_ON_ONCE(t->forced_info.si_signo))
  18016. + return 0;
  18017. +
  18018. + if (is_si_special(info)) {
  18019. + WARN_ON_ONCE(info != SEND_SIG_PRIV);
  18020. + t->forced_info.si_signo = sig;
  18021. + t->forced_info.si_errno = 0;
  18022. + t->forced_info.si_code = SI_KERNEL;
  18023. + t->forced_info.si_pid = 0;
  18024. + t->forced_info.si_uid = 0;
  18025. + } else {
  18026. + t->forced_info = *info;
  18027. + }
  18028. +
  18029. + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
  18030. + return 0;
  18031. + }
  18032. +#endif
  18033. + return do_force_sig_info(sig, info, t);
  18034. +}
  18035. +
  18036. /*
  18037. * Nuke all other threads in the group.
  18038. */
  18039. @@ -1226,12 +1319,12 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
  18040. * Disable interrupts early to avoid deadlocks.
  18041. * See rcu_read_unlock() comment header for details.
  18042. */
  18043. - local_irq_save(*flags);
  18044. + local_irq_save_nort(*flags);
  18045. rcu_read_lock();
  18046. sighand = rcu_dereference(tsk->sighand);
  18047. if (unlikely(sighand == NULL)) {
  18048. rcu_read_unlock();
  18049. - local_irq_restore(*flags);
  18050. + local_irq_restore_nort(*flags);
  18051. break;
  18052. }
  18053. /*
  18054. @@ -1252,7 +1345,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
  18055. }
  18056. spin_unlock(&sighand->siglock);
  18057. rcu_read_unlock();
  18058. - local_irq_restore(*flags);
  18059. + local_irq_restore_nort(*flags);
  18060. }
  18061. return sighand;
  18062. @@ -1495,7 +1588,8 @@ EXPORT_SYMBOL(kill_pid);
  18063. */
  18064. struct sigqueue *sigqueue_alloc(void)
  18065. {
  18066. - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
  18067. + /* Preallocated sigqueue objects always from the slabcache ! */
  18068. + struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
  18069. if (q)
  18070. q->flags |= SIGQUEUE_PREALLOC;
  18071. @@ -1856,15 +1950,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
  18072. if (gstop_done && ptrace_reparented(current))
  18073. do_notify_parent_cldstop(current, false, why);
  18074. - /*
  18075. - * Don't want to allow preemption here, because
  18076. - * sys_ptrace() needs this task to be inactive.
  18077. - *
  18078. - * XXX: implement read_unlock_no_resched().
  18079. - */
  18080. - preempt_disable();
  18081. read_unlock(&tasklist_lock);
  18082. - preempt_enable_no_resched();
  18083. freezable_schedule();
  18084. } else {
  18085. /*
  18086. diff --git a/kernel/softirq.c b/kernel/softirq.c
  18087. index 744fa611cae0..819bd7cf5ad0 100644
  18088. --- a/kernel/softirq.c
  18089. +++ b/kernel/softirq.c
  18090. @@ -21,10 +21,12 @@
  18091. #include <linux/freezer.h>
  18092. #include <linux/kthread.h>
  18093. #include <linux/rcupdate.h>
  18094. +#include <linux/delay.h>
  18095. #include <linux/ftrace.h>
  18096. #include <linux/smp.h>
  18097. #include <linux/smpboot.h>
  18098. #include <linux/tick.h>
  18099. +#include <linux/locallock.h>
  18100. #include <linux/irq.h>
  18101. #define CREATE_TRACE_POINTS
  18102. @@ -56,12 +58,108 @@ EXPORT_SYMBOL(irq_stat);
  18103. static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
  18104. DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
  18105. +#ifdef CONFIG_PREEMPT_RT_FULL
  18106. +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ))
  18107. +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd);
  18108. +#endif
  18109. const char * const softirq_to_name[NR_SOFTIRQS] = {
  18110. "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
  18111. "TASKLET", "SCHED", "HRTIMER", "RCU"
  18112. };
  18113. +#ifdef CONFIG_NO_HZ_COMMON
  18114. +# ifdef CONFIG_PREEMPT_RT_FULL
  18115. +
  18116. +struct softirq_runner {
  18117. + struct task_struct *runner[NR_SOFTIRQS];
  18118. +};
  18119. +
  18120. +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners);
  18121. +
  18122. +static inline void softirq_set_runner(unsigned int sirq)
  18123. +{
  18124. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  18125. +
  18126. + sr->runner[sirq] = current;
  18127. +}
  18128. +
  18129. +static inline void softirq_clr_runner(unsigned int sirq)
  18130. +{
  18131. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  18132. +
  18133. + sr->runner[sirq] = NULL;
  18134. +}
  18135. +
  18136. +/*
  18137. + * On preempt-rt a softirq running context might be blocked on a
  18138. + * lock. There might be no other runnable task on this CPU because the
  18139. + * lock owner runs on some other CPU. So we have to go into idle with
  18140. + * the pending bit set. Therefor we need to check this otherwise we
  18141. + * warn about false positives which confuses users and defeats the
  18142. + * whole purpose of this test.
  18143. + *
  18144. + * This code is called with interrupts disabled.
  18145. + */
  18146. +void softirq_check_pending_idle(void)
  18147. +{
  18148. + static int rate_limit;
  18149. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  18150. + u32 warnpending;
  18151. + int i;
  18152. +
  18153. + if (rate_limit >= 10)
  18154. + return;
  18155. +
  18156. + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
  18157. + for (i = 0; i < NR_SOFTIRQS; i++) {
  18158. + struct task_struct *tsk = sr->runner[i];
  18159. +
  18160. + /*
  18161. + * The wakeup code in rtmutex.c wakes up the task
  18162. + * _before_ it sets pi_blocked_on to NULL under
  18163. + * tsk->pi_lock. So we need to check for both: state
  18164. + * and pi_blocked_on.
  18165. + */
  18166. + if (tsk) {
  18167. + raw_spin_lock(&tsk->pi_lock);
  18168. + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
  18169. + /* Clear all bits pending in that task */
  18170. + warnpending &= ~(tsk->softirqs_raised);
  18171. + warnpending &= ~(1 << i);
  18172. + }
  18173. + raw_spin_unlock(&tsk->pi_lock);
  18174. + }
  18175. + }
  18176. +
  18177. + if (warnpending) {
  18178. + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
  18179. + warnpending);
  18180. + rate_limit++;
  18181. + }
  18182. +}
  18183. +# else
  18184. +/*
  18185. + * On !PREEMPT_RT we just printk rate limited:
  18186. + */
  18187. +void softirq_check_pending_idle(void)
  18188. +{
  18189. + static int rate_limit;
  18190. +
  18191. + if (rate_limit < 10 &&
  18192. + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  18193. + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
  18194. + local_softirq_pending());
  18195. + rate_limit++;
  18196. + }
  18197. +}
  18198. +# endif
  18199. +
  18200. +#else /* !CONFIG_NO_HZ_COMMON */
  18201. +static inline void softirq_set_runner(unsigned int sirq) { }
  18202. +static inline void softirq_clr_runner(unsigned int sirq) { }
  18203. +#endif
  18204. +
  18205. /*
  18206. * we cannot loop indefinitely here to avoid userspace starvation,
  18207. * but we also don't want to introduce a worst case 1/HZ latency
  18208. @@ -77,6 +175,38 @@ static void wakeup_softirqd(void)
  18209. wake_up_process(tsk);
  18210. }
  18211. +#ifdef CONFIG_PREEMPT_RT_FULL
  18212. +static void wakeup_timer_softirqd(void)
  18213. +{
  18214. + /* Interrupts are disabled: no need to stop preemption */
  18215. + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd);
  18216. +
  18217. + if (tsk && tsk->state != TASK_RUNNING)
  18218. + wake_up_process(tsk);
  18219. +}
  18220. +#endif
  18221. +
  18222. +static void handle_softirq(unsigned int vec_nr)
  18223. +{
  18224. + struct softirq_action *h = softirq_vec + vec_nr;
  18225. + int prev_count;
  18226. +
  18227. + prev_count = preempt_count();
  18228. +
  18229. + kstat_incr_softirqs_this_cpu(vec_nr);
  18230. +
  18231. + trace_softirq_entry(vec_nr);
  18232. + h->action(h);
  18233. + trace_softirq_exit(vec_nr);
  18234. + if (unlikely(prev_count != preempt_count())) {
  18235. + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
  18236. + vec_nr, softirq_to_name[vec_nr], h->action,
  18237. + prev_count, preempt_count());
  18238. + preempt_count_set(prev_count);
  18239. + }
  18240. +}
  18241. +
  18242. +#ifndef CONFIG_PREEMPT_RT_FULL
  18243. /*
  18244. * If ksoftirqd is scheduled, we do not want to process pending softirqs
  18245. * right now. Let ksoftirqd handle this at its own rate, to get fairness.
  18246. @@ -88,6 +218,47 @@ static bool ksoftirqd_running(void)
  18247. return tsk && (tsk->state == TASK_RUNNING);
  18248. }
  18249. +static inline int ksoftirqd_softirq_pending(void)
  18250. +{
  18251. + return local_softirq_pending();
  18252. +}
  18253. +
  18254. +static void handle_pending_softirqs(u32 pending)
  18255. +{
  18256. + struct softirq_action *h = softirq_vec;
  18257. + int softirq_bit;
  18258. +
  18259. + local_irq_enable();
  18260. +
  18261. + h = softirq_vec;
  18262. +
  18263. + while ((softirq_bit = ffs(pending))) {
  18264. + unsigned int vec_nr;
  18265. +
  18266. + h += softirq_bit - 1;
  18267. + vec_nr = h - softirq_vec;
  18268. + handle_softirq(vec_nr);
  18269. +
  18270. + h++;
  18271. + pending >>= softirq_bit;
  18272. + }
  18273. +
  18274. + rcu_bh_qs();
  18275. + local_irq_disable();
  18276. +}
  18277. +
  18278. +static void run_ksoftirqd(unsigned int cpu)
  18279. +{
  18280. + local_irq_disable();
  18281. + if (ksoftirqd_softirq_pending()) {
  18282. + __do_softirq();
  18283. + local_irq_enable();
  18284. + cond_resched_rcu_qs();
  18285. + return;
  18286. + }
  18287. + local_irq_enable();
  18288. +}
  18289. +
  18290. /*
  18291. * preempt_count and SOFTIRQ_OFFSET usage:
  18292. * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
  18293. @@ -243,10 +414,8 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
  18294. unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
  18295. unsigned long old_flags = current->flags;
  18296. int max_restart = MAX_SOFTIRQ_RESTART;
  18297. - struct softirq_action *h;
  18298. bool in_hardirq;
  18299. __u32 pending;
  18300. - int softirq_bit;
  18301. /*
  18302. * Mask out PF_MEMALLOC s current task context is borrowed for the
  18303. @@ -265,36 +434,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
  18304. /* Reset the pending bitmask before enabling irqs */
  18305. set_softirq_pending(0);
  18306. - local_irq_enable();
  18307. -
  18308. - h = softirq_vec;
  18309. -
  18310. - while ((softirq_bit = ffs(pending))) {
  18311. - unsigned int vec_nr;
  18312. - int prev_count;
  18313. -
  18314. - h += softirq_bit - 1;
  18315. -
  18316. - vec_nr = h - softirq_vec;
  18317. - prev_count = preempt_count();
  18318. -
  18319. - kstat_incr_softirqs_this_cpu(vec_nr);
  18320. -
  18321. - trace_softirq_entry(vec_nr);
  18322. - h->action(h);
  18323. - trace_softirq_exit(vec_nr);
  18324. - if (unlikely(prev_count != preempt_count())) {
  18325. - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
  18326. - vec_nr, softirq_to_name[vec_nr], h->action,
  18327. - prev_count, preempt_count());
  18328. - preempt_count_set(prev_count);
  18329. - }
  18330. - h++;
  18331. - pending >>= softirq_bit;
  18332. - }
  18333. -
  18334. - rcu_bh_qs();
  18335. - local_irq_disable();
  18336. + handle_pending_softirqs(pending);
  18337. pending = local_softirq_pending();
  18338. if (pending) {
  18339. @@ -330,6 +470,309 @@ asmlinkage __visible void do_softirq(void)
  18340. local_irq_restore(flags);
  18341. }
  18342. +/*
  18343. + * This function must run with irqs disabled!
  18344. + */
  18345. +void raise_softirq_irqoff(unsigned int nr)
  18346. +{
  18347. + __raise_softirq_irqoff(nr);
  18348. +
  18349. + /*
  18350. + * If we're in an interrupt or softirq, we're done
  18351. + * (this also catches softirq-disabled code). We will
  18352. + * actually run the softirq once we return from
  18353. + * the irq or softirq.
  18354. + *
  18355. + * Otherwise we wake up ksoftirqd to make sure we
  18356. + * schedule the softirq soon.
  18357. + */
  18358. + if (!in_interrupt())
  18359. + wakeup_softirqd();
  18360. +}
  18361. +
  18362. +void __raise_softirq_irqoff(unsigned int nr)
  18363. +{
  18364. + trace_softirq_raise(nr);
  18365. + or_softirq_pending(1UL << nr);
  18366. +}
  18367. +
  18368. +static inline void local_bh_disable_nort(void) { local_bh_disable(); }
  18369. +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
  18370. +static void ksoftirqd_set_sched_params(unsigned int cpu) { }
  18371. +
  18372. +#else /* !PREEMPT_RT_FULL */
  18373. +
  18374. +/*
  18375. + * On RT we serialize softirq execution with a cpu local lock per softirq
  18376. + */
  18377. +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks);
  18378. +
  18379. +void __init softirq_early_init(void)
  18380. +{
  18381. + int i;
  18382. +
  18383. + for (i = 0; i < NR_SOFTIRQS; i++)
  18384. + local_irq_lock_init(local_softirq_locks[i]);
  18385. +}
  18386. +
  18387. +static void lock_softirq(int which)
  18388. +{
  18389. + local_lock(local_softirq_locks[which]);
  18390. +}
  18391. +
  18392. +static void unlock_softirq(int which)
  18393. +{
  18394. + local_unlock(local_softirq_locks[which]);
  18395. +}
  18396. +
  18397. +static void do_single_softirq(int which)
  18398. +{
  18399. + unsigned long old_flags = current->flags;
  18400. +
  18401. + current->flags &= ~PF_MEMALLOC;
  18402. + vtime_account_irq_enter(current);
  18403. + current->flags |= PF_IN_SOFTIRQ;
  18404. + lockdep_softirq_enter();
  18405. + local_irq_enable();
  18406. + handle_softirq(which);
  18407. + local_irq_disable();
  18408. + lockdep_softirq_exit();
  18409. + current->flags &= ~PF_IN_SOFTIRQ;
  18410. + vtime_account_irq_enter(current);
  18411. + tsk_restore_flags(current, old_flags, PF_MEMALLOC);
  18412. +}
  18413. +
  18414. +/*
  18415. + * Called with interrupts disabled. Process softirqs which were raised
  18416. + * in current context (or on behalf of ksoftirqd).
  18417. + */
  18418. +static void do_current_softirqs(void)
  18419. +{
  18420. + while (current->softirqs_raised) {
  18421. + int i = __ffs(current->softirqs_raised);
  18422. + unsigned int pending, mask = (1U << i);
  18423. +
  18424. + current->softirqs_raised &= ~mask;
  18425. + local_irq_enable();
  18426. +
  18427. + /*
  18428. + * If the lock is contended, we boost the owner to
  18429. + * process the softirq or leave the critical section
  18430. + * now.
  18431. + */
  18432. + lock_softirq(i);
  18433. + local_irq_disable();
  18434. + softirq_set_runner(i);
  18435. + /*
  18436. + * Check with the local_softirq_pending() bits,
  18437. + * whether we need to process this still or if someone
  18438. + * else took care of it.
  18439. + */
  18440. + pending = local_softirq_pending();
  18441. + if (pending & mask) {
  18442. + set_softirq_pending(pending & ~mask);
  18443. + do_single_softirq(i);
  18444. + }
  18445. + softirq_clr_runner(i);
  18446. + WARN_ON(current->softirq_nestcnt != 1);
  18447. + local_irq_enable();
  18448. + unlock_softirq(i);
  18449. + local_irq_disable();
  18450. + }
  18451. +}
  18452. +
  18453. +void __local_bh_disable(void)
  18454. +{
  18455. + if (++current->softirq_nestcnt == 1)
  18456. + migrate_disable();
  18457. +}
  18458. +EXPORT_SYMBOL(__local_bh_disable);
  18459. +
  18460. +void __local_bh_enable(void)
  18461. +{
  18462. + if (WARN_ON(current->softirq_nestcnt == 0))
  18463. + return;
  18464. +
  18465. + local_irq_disable();
  18466. + if (current->softirq_nestcnt == 1 && current->softirqs_raised)
  18467. + do_current_softirqs();
  18468. + local_irq_enable();
  18469. +
  18470. + if (--current->softirq_nestcnt == 0)
  18471. + migrate_enable();
  18472. +}
  18473. +EXPORT_SYMBOL(__local_bh_enable);
  18474. +
  18475. +void _local_bh_enable(void)
  18476. +{
  18477. + if (WARN_ON(current->softirq_nestcnt == 0))
  18478. + return;
  18479. + if (--current->softirq_nestcnt == 0)
  18480. + migrate_enable();
  18481. +}
  18482. +EXPORT_SYMBOL(_local_bh_enable);
  18483. +
  18484. +int in_serving_softirq(void)
  18485. +{
  18486. + return current->flags & PF_IN_SOFTIRQ;
  18487. +}
  18488. +EXPORT_SYMBOL(in_serving_softirq);
  18489. +
  18490. +/* Called with preemption disabled */
  18491. +static void run_ksoftirqd(unsigned int cpu)
  18492. +{
  18493. + local_irq_disable();
  18494. + current->softirq_nestcnt++;
  18495. +
  18496. + do_current_softirqs();
  18497. + current->softirq_nestcnt--;
  18498. + local_irq_enable();
  18499. + cond_resched_rcu_qs();
  18500. +}
  18501. +
  18502. +/*
  18503. + * Called from netif_rx_ni(). Preemption enabled, but migration
  18504. + * disabled. So the cpu can't go away under us.
  18505. + */
  18506. +void thread_do_softirq(void)
  18507. +{
  18508. + if (!in_serving_softirq() && current->softirqs_raised) {
  18509. + current->softirq_nestcnt++;
  18510. + do_current_softirqs();
  18511. + current->softirq_nestcnt--;
  18512. + }
  18513. +}
  18514. +
  18515. +static void do_raise_softirq_irqoff(unsigned int nr)
  18516. +{
  18517. + unsigned int mask;
  18518. +
  18519. + mask = 1UL << nr;
  18520. +
  18521. + trace_softirq_raise(nr);
  18522. + or_softirq_pending(mask);
  18523. +
  18524. + /*
  18525. + * If we are not in a hard interrupt and inside a bh disabled
  18526. + * region, we simply raise the flag on current. local_bh_enable()
  18527. + * will make sure that the softirq is executed. Otherwise we
  18528. + * delegate it to ksoftirqd.
  18529. + */
  18530. + if (!in_irq() && current->softirq_nestcnt)
  18531. + current->softirqs_raised |= mask;
  18532. + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd))
  18533. + return;
  18534. +
  18535. + if (mask & TIMER_SOFTIRQS)
  18536. + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
  18537. + else
  18538. + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
  18539. +}
  18540. +
  18541. +static void wakeup_proper_softirq(unsigned int nr)
  18542. +{
  18543. + if ((1UL << nr) & TIMER_SOFTIRQS)
  18544. + wakeup_timer_softirqd();
  18545. + else
  18546. + wakeup_softirqd();
  18547. +}
  18548. +
  18549. +void __raise_softirq_irqoff(unsigned int nr)
  18550. +{
  18551. + do_raise_softirq_irqoff(nr);
  18552. + if (!in_irq() && !current->softirq_nestcnt)
  18553. + wakeup_proper_softirq(nr);
  18554. +}
  18555. +
  18556. +/*
  18557. + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd
  18558. + */
  18559. +void __raise_softirq_irqoff_ksoft(unsigned int nr)
  18560. +{
  18561. + unsigned int mask;
  18562. +
  18563. + if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) ||
  18564. + !__this_cpu_read(ktimer_softirqd)))
  18565. + return;
  18566. + mask = 1UL << nr;
  18567. +
  18568. + trace_softirq_raise(nr);
  18569. + or_softirq_pending(mask);
  18570. + if (mask & TIMER_SOFTIRQS)
  18571. + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
  18572. + else
  18573. + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
  18574. + wakeup_proper_softirq(nr);
  18575. +}
  18576. +
  18577. +/*
  18578. + * This function must run with irqs disabled!
  18579. + */
  18580. +void raise_softirq_irqoff(unsigned int nr)
  18581. +{
  18582. + do_raise_softirq_irqoff(nr);
  18583. +
  18584. + /*
  18585. + * If we're in an hard interrupt we let irq return code deal
  18586. + * with the wakeup of ksoftirqd.
  18587. + */
  18588. + if (in_irq())
  18589. + return;
  18590. + /*
  18591. + * If we are in thread context but outside of a bh disabled
  18592. + * region, we need to wake ksoftirqd as well.
  18593. + *
  18594. + * CHECKME: Some of the places which do that could be wrapped
  18595. + * into local_bh_disable/enable pairs. Though it's unclear
  18596. + * whether this is worth the effort. To find those places just
  18597. + * raise a WARN() if the condition is met.
  18598. + */
  18599. + if (!current->softirq_nestcnt)
  18600. + wakeup_proper_softirq(nr);
  18601. +}
  18602. +
  18603. +static inline int ksoftirqd_softirq_pending(void)
  18604. +{
  18605. + return current->softirqs_raised;
  18606. +}
  18607. +
  18608. +static inline void local_bh_disable_nort(void) { }
  18609. +static inline void _local_bh_enable_nort(void) { }
  18610. +
  18611. +static inline void ksoftirqd_set_sched_params(unsigned int cpu)
  18612. +{
  18613. + /* Take over all but timer pending softirqs when starting */
  18614. + local_irq_disable();
  18615. + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS;
  18616. + local_irq_enable();
  18617. +}
  18618. +
  18619. +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu)
  18620. +{
  18621. + struct sched_param param = { .sched_priority = 1 };
  18622. +
  18623. + sched_setscheduler(current, SCHED_FIFO, &param);
  18624. +
  18625. + /* Take over timer pending softirqs when starting */
  18626. + local_irq_disable();
  18627. + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS;
  18628. + local_irq_enable();
  18629. +}
  18630. +
  18631. +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu,
  18632. + bool online)
  18633. +{
  18634. + struct sched_param param = { .sched_priority = 0 };
  18635. +
  18636. + sched_setscheduler(current, SCHED_NORMAL, &param);
  18637. +}
  18638. +
  18639. +static int ktimer_softirqd_should_run(unsigned int cpu)
  18640. +{
  18641. + return current->softirqs_raised;
  18642. +}
  18643. +
  18644. +#endif /* PREEMPT_RT_FULL */
  18645. /*
  18646. * Enter an interrupt context.
  18647. */
  18648. @@ -341,9 +784,9 @@ void irq_enter(void)
  18649. * Prevent raise_softirq from needlessly waking up ksoftirqd
  18650. * here, as softirq will be serviced on return from interrupt.
  18651. */
  18652. - local_bh_disable();
  18653. + local_bh_disable_nort();
  18654. tick_irq_enter();
  18655. - _local_bh_enable();
  18656. + _local_bh_enable_nort();
  18657. }
  18658. __irq_enter();
  18659. @@ -351,6 +794,7 @@ void irq_enter(void)
  18660. static inline void invoke_softirq(void)
  18661. {
  18662. +#ifndef CONFIG_PREEMPT_RT_FULL
  18663. if (ksoftirqd_running())
  18664. return;
  18665. @@ -373,6 +817,18 @@ static inline void invoke_softirq(void)
  18666. } else {
  18667. wakeup_softirqd();
  18668. }
  18669. +#else /* PREEMPT_RT_FULL */
  18670. + unsigned long flags;
  18671. +
  18672. + local_irq_save(flags);
  18673. + if (__this_cpu_read(ksoftirqd) &&
  18674. + __this_cpu_read(ksoftirqd)->softirqs_raised)
  18675. + wakeup_softirqd();
  18676. + if (__this_cpu_read(ktimer_softirqd) &&
  18677. + __this_cpu_read(ktimer_softirqd)->softirqs_raised)
  18678. + wakeup_timer_softirqd();
  18679. + local_irq_restore(flags);
  18680. +#endif
  18681. }
  18682. static inline void tick_irq_exit(void)
  18683. @@ -409,26 +865,6 @@ void irq_exit(void)
  18684. trace_hardirq_exit(); /* must be last! */
  18685. }
  18686. -/*
  18687. - * This function must run with irqs disabled!
  18688. - */
  18689. -inline void raise_softirq_irqoff(unsigned int nr)
  18690. -{
  18691. - __raise_softirq_irqoff(nr);
  18692. -
  18693. - /*
  18694. - * If we're in an interrupt or softirq, we're done
  18695. - * (this also catches softirq-disabled code). We will
  18696. - * actually run the softirq once we return from
  18697. - * the irq or softirq.
  18698. - *
  18699. - * Otherwise we wake up ksoftirqd to make sure we
  18700. - * schedule the softirq soon.
  18701. - */
  18702. - if (!in_interrupt())
  18703. - wakeup_softirqd();
  18704. -}
  18705. -
  18706. void raise_softirq(unsigned int nr)
  18707. {
  18708. unsigned long flags;
  18709. @@ -438,12 +874,6 @@ void raise_softirq(unsigned int nr)
  18710. local_irq_restore(flags);
  18711. }
  18712. -void __raise_softirq_irqoff(unsigned int nr)
  18713. -{
  18714. - trace_softirq_raise(nr);
  18715. - or_softirq_pending(1UL << nr);
  18716. -}
  18717. -
  18718. void open_softirq(int nr, void (*action)(struct softirq_action *))
  18719. {
  18720. softirq_vec[nr].action = action;
  18721. @@ -460,15 +890,45 @@ struct tasklet_head {
  18722. static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
  18723. static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
  18724. +static void inline
  18725. +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
  18726. +{
  18727. + if (tasklet_trylock(t)) {
  18728. +again:
  18729. + /* We may have been preempted before tasklet_trylock
  18730. + * and __tasklet_action may have already run.
  18731. + * So double check the sched bit while the takslet
  18732. + * is locked before adding it to the list.
  18733. + */
  18734. + if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
  18735. + t->next = NULL;
  18736. + *head->tail = t;
  18737. + head->tail = &(t->next);
  18738. + raise_softirq_irqoff(nr);
  18739. + tasklet_unlock(t);
  18740. + } else {
  18741. + /* This is subtle. If we hit the corner case above
  18742. + * It is possible that we get preempted right here,
  18743. + * and another task has successfully called
  18744. + * tasklet_schedule(), then this function, and
  18745. + * failed on the trylock. Thus we must be sure
  18746. + * before releasing the tasklet lock, that the
  18747. + * SCHED_BIT is clear. Otherwise the tasklet
  18748. + * may get its SCHED_BIT set, but not added to the
  18749. + * list
  18750. + */
  18751. + if (!tasklet_tryunlock(t))
  18752. + goto again;
  18753. + }
  18754. + }
  18755. +}
  18756. +
  18757. void __tasklet_schedule(struct tasklet_struct *t)
  18758. {
  18759. unsigned long flags;
  18760. local_irq_save(flags);
  18761. - t->next = NULL;
  18762. - *__this_cpu_read(tasklet_vec.tail) = t;
  18763. - __this_cpu_write(tasklet_vec.tail, &(t->next));
  18764. - raise_softirq_irqoff(TASKLET_SOFTIRQ);
  18765. + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
  18766. local_irq_restore(flags);
  18767. }
  18768. EXPORT_SYMBOL(__tasklet_schedule);
  18769. @@ -478,10 +938,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
  18770. unsigned long flags;
  18771. local_irq_save(flags);
  18772. - t->next = NULL;
  18773. - *__this_cpu_read(tasklet_hi_vec.tail) = t;
  18774. - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
  18775. - raise_softirq_irqoff(HI_SOFTIRQ);
  18776. + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
  18777. local_irq_restore(flags);
  18778. }
  18779. EXPORT_SYMBOL(__tasklet_hi_schedule);
  18780. @@ -490,82 +947,122 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
  18781. {
  18782. BUG_ON(!irqs_disabled());
  18783. - t->next = __this_cpu_read(tasklet_hi_vec.head);
  18784. - __this_cpu_write(tasklet_hi_vec.head, t);
  18785. - __raise_softirq_irqoff(HI_SOFTIRQ);
  18786. + __tasklet_hi_schedule(t);
  18787. }
  18788. EXPORT_SYMBOL(__tasklet_hi_schedule_first);
  18789. -static __latent_entropy void tasklet_action(struct softirq_action *a)
  18790. +void tasklet_enable(struct tasklet_struct *t)
  18791. {
  18792. - struct tasklet_struct *list;
  18793. + if (!atomic_dec_and_test(&t->count))
  18794. + return;
  18795. + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
  18796. + tasklet_schedule(t);
  18797. +}
  18798. +EXPORT_SYMBOL(tasklet_enable);
  18799. - local_irq_disable();
  18800. - list = __this_cpu_read(tasklet_vec.head);
  18801. - __this_cpu_write(tasklet_vec.head, NULL);
  18802. - __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
  18803. - local_irq_enable();
  18804. +static void __tasklet_action(struct softirq_action *a,
  18805. + struct tasklet_struct *list)
  18806. +{
  18807. + int loops = 1000000;
  18808. while (list) {
  18809. struct tasklet_struct *t = list;
  18810. list = list->next;
  18811. - if (tasklet_trylock(t)) {
  18812. - if (!atomic_read(&t->count)) {
  18813. - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
  18814. - &t->state))
  18815. - BUG();
  18816. - t->func(t->data);
  18817. - tasklet_unlock(t);
  18818. - continue;
  18819. - }
  18820. - tasklet_unlock(t);
  18821. + /*
  18822. + * Should always succeed - after a tasklist got on the
  18823. + * list (after getting the SCHED bit set from 0 to 1),
  18824. + * nothing but the tasklet softirq it got queued to can
  18825. + * lock it:
  18826. + */
  18827. + if (!tasklet_trylock(t)) {
  18828. + WARN_ON(1);
  18829. + continue;
  18830. }
  18831. - local_irq_disable();
  18832. t->next = NULL;
  18833. - *__this_cpu_read(tasklet_vec.tail) = t;
  18834. - __this_cpu_write(tasklet_vec.tail, &(t->next));
  18835. - __raise_softirq_irqoff(TASKLET_SOFTIRQ);
  18836. - local_irq_enable();
  18837. +
  18838. + /*
  18839. + * If we cannot handle the tasklet because it's disabled,
  18840. + * mark it as pending. tasklet_enable() will later
  18841. + * re-schedule the tasklet.
  18842. + */
  18843. + if (unlikely(atomic_read(&t->count))) {
  18844. +out_disabled:
  18845. + /* implicit unlock: */
  18846. + wmb();
  18847. + t->state = TASKLET_STATEF_PENDING;
  18848. + continue;
  18849. + }
  18850. +
  18851. + /*
  18852. + * After this point on the tasklet might be rescheduled
  18853. + * on another CPU, but it can only be added to another
  18854. + * CPU's tasklet list if we unlock the tasklet (which we
  18855. + * dont do yet).
  18856. + */
  18857. + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
  18858. + WARN_ON(1);
  18859. +
  18860. +again:
  18861. + t->func(t->data);
  18862. +
  18863. + /*
  18864. + * Try to unlock the tasklet. We must use cmpxchg, because
  18865. + * another CPU might have scheduled or disabled the tasklet.
  18866. + * We only allow the STATE_RUN -> 0 transition here.
  18867. + */
  18868. + while (!tasklet_tryunlock(t)) {
  18869. + /*
  18870. + * If it got disabled meanwhile, bail out:
  18871. + */
  18872. + if (atomic_read(&t->count))
  18873. + goto out_disabled;
  18874. + /*
  18875. + * If it got scheduled meanwhile, re-execute
  18876. + * the tasklet function:
  18877. + */
  18878. + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
  18879. + goto again;
  18880. + if (!--loops) {
  18881. + printk("hm, tasklet state: %08lx\n", t->state);
  18882. + WARN_ON(1);
  18883. + tasklet_unlock(t);
  18884. + break;
  18885. + }
  18886. + }
  18887. }
  18888. }
  18889. +static void tasklet_action(struct softirq_action *a)
  18890. +{
  18891. + struct tasklet_struct *list;
  18892. +
  18893. + local_irq_disable();
  18894. +
  18895. + list = __this_cpu_read(tasklet_vec.head);
  18896. + __this_cpu_write(tasklet_vec.head, NULL);
  18897. + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
  18898. +
  18899. + local_irq_enable();
  18900. +
  18901. + __tasklet_action(a, list);
  18902. +}
  18903. +
  18904. static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
  18905. {
  18906. struct tasklet_struct *list;
  18907. local_irq_disable();
  18908. +
  18909. list = __this_cpu_read(tasklet_hi_vec.head);
  18910. __this_cpu_write(tasklet_hi_vec.head, NULL);
  18911. __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
  18912. - local_irq_enable();
  18913. -
  18914. - while (list) {
  18915. - struct tasklet_struct *t = list;
  18916. - list = list->next;
  18917. -
  18918. - if (tasklet_trylock(t)) {
  18919. - if (!atomic_read(&t->count)) {
  18920. - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
  18921. - &t->state))
  18922. - BUG();
  18923. - t->func(t->data);
  18924. - tasklet_unlock(t);
  18925. - continue;
  18926. - }
  18927. - tasklet_unlock(t);
  18928. - }
  18929. + local_irq_enable();
  18930. - local_irq_disable();
  18931. - t->next = NULL;
  18932. - *__this_cpu_read(tasklet_hi_vec.tail) = t;
  18933. - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
  18934. - __raise_softirq_irqoff(HI_SOFTIRQ);
  18935. - local_irq_enable();
  18936. - }
  18937. + __tasklet_action(a, list);
  18938. }
  18939. void tasklet_init(struct tasklet_struct *t,
  18940. @@ -586,7 +1083,7 @@ void tasklet_kill(struct tasklet_struct *t)
  18941. while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
  18942. do {
  18943. - yield();
  18944. + msleep(1);
  18945. } while (test_bit(TASKLET_STATE_SCHED, &t->state));
  18946. }
  18947. tasklet_unlock_wait(t);
  18948. @@ -660,25 +1157,26 @@ void __init softirq_init(void)
  18949. open_softirq(HI_SOFTIRQ, tasklet_hi_action);
  18950. }
  18951. -static int ksoftirqd_should_run(unsigned int cpu)
  18952. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  18953. +void tasklet_unlock_wait(struct tasklet_struct *t)
  18954. {
  18955. - return local_softirq_pending();
  18956. -}
  18957. -
  18958. -static void run_ksoftirqd(unsigned int cpu)
  18959. -{
  18960. - local_irq_disable();
  18961. - if (local_softirq_pending()) {
  18962. + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
  18963. /*
  18964. - * We can safely run softirq on inline stack, as we are not deep
  18965. - * in the task stack here.
  18966. + * Hack for now to avoid this busy-loop:
  18967. */
  18968. - __do_softirq();
  18969. - local_irq_enable();
  18970. - cond_resched_rcu_qs();
  18971. - return;
  18972. +#ifdef CONFIG_PREEMPT_RT_FULL
  18973. + msleep(1);
  18974. +#else
  18975. + barrier();
  18976. +#endif
  18977. }
  18978. - local_irq_enable();
  18979. +}
  18980. +EXPORT_SYMBOL(tasklet_unlock_wait);
  18981. +#endif
  18982. +
  18983. +static int ksoftirqd_should_run(unsigned int cpu)
  18984. +{
  18985. + return ksoftirqd_softirq_pending();
  18986. }
  18987. #ifdef CONFIG_HOTPLUG_CPU
  18988. @@ -745,17 +1243,31 @@ static int takeover_tasklets(unsigned int cpu)
  18989. static struct smp_hotplug_thread softirq_threads = {
  18990. .store = &ksoftirqd,
  18991. + .setup = ksoftirqd_set_sched_params,
  18992. .thread_should_run = ksoftirqd_should_run,
  18993. .thread_fn = run_ksoftirqd,
  18994. .thread_comm = "ksoftirqd/%u",
  18995. };
  18996. +#ifdef CONFIG_PREEMPT_RT_FULL
  18997. +static struct smp_hotplug_thread softirq_timer_threads = {
  18998. + .store = &ktimer_softirqd,
  18999. + .setup = ktimer_softirqd_set_sched_params,
  19000. + .cleanup = ktimer_softirqd_clr_sched_params,
  19001. + .thread_should_run = ktimer_softirqd_should_run,
  19002. + .thread_fn = run_ksoftirqd,
  19003. + .thread_comm = "ktimersoftd/%u",
  19004. +};
  19005. +#endif
  19006. +
  19007. static __init int spawn_ksoftirqd(void)
  19008. {
  19009. cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
  19010. takeover_tasklets);
  19011. BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
  19012. -
  19013. +#ifdef CONFIG_PREEMPT_RT_FULL
  19014. + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads));
  19015. +#endif
  19016. return 0;
  19017. }
  19018. early_initcall(spawn_ksoftirqd);
  19019. diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
  19020. index ec9ab2f01489..8b89dbedeaff 100644
  19021. --- a/kernel/stop_machine.c
  19022. +++ b/kernel/stop_machine.c
  19023. @@ -36,7 +36,7 @@ struct cpu_stop_done {
  19024. struct cpu_stopper {
  19025. struct task_struct *thread;
  19026. - spinlock_t lock;
  19027. + raw_spinlock_t lock;
  19028. bool enabled; /* is this stopper enabled? */
  19029. struct list_head works; /* list of pending works */
  19030. @@ -78,14 +78,14 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
  19031. unsigned long flags;
  19032. bool enabled;
  19033. - spin_lock_irqsave(&stopper->lock, flags);
  19034. + raw_spin_lock_irqsave(&stopper->lock, flags);
  19035. enabled = stopper->enabled;
  19036. if (enabled)
  19037. __cpu_stop_queue_work(stopper, work);
  19038. else if (work->done)
  19039. cpu_stop_signal_done(work->done);
  19040. - spin_unlock_irqrestore(&stopper->lock, flags);
  19041. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  19042. return enabled;
  19043. }
  19044. @@ -231,8 +231,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
  19045. struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
  19046. int err;
  19047. retry:
  19048. - spin_lock_irq(&stopper1->lock);
  19049. - spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
  19050. + raw_spin_lock_irq(&stopper1->lock);
  19051. + raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
  19052. err = -ENOENT;
  19053. if (!stopper1->enabled || !stopper2->enabled)
  19054. @@ -255,8 +255,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
  19055. __cpu_stop_queue_work(stopper1, work1);
  19056. __cpu_stop_queue_work(stopper2, work2);
  19057. unlock:
  19058. - spin_unlock(&stopper2->lock);
  19059. - spin_unlock_irq(&stopper1->lock);
  19060. + raw_spin_unlock(&stopper2->lock);
  19061. + raw_spin_unlock_irq(&stopper1->lock);
  19062. if (unlikely(err == -EDEADLK)) {
  19063. while (stop_cpus_in_progress)
  19064. @@ -448,9 +448,9 @@ static int cpu_stop_should_run(unsigned int cpu)
  19065. unsigned long flags;
  19066. int run;
  19067. - spin_lock_irqsave(&stopper->lock, flags);
  19068. + raw_spin_lock_irqsave(&stopper->lock, flags);
  19069. run = !list_empty(&stopper->works);
  19070. - spin_unlock_irqrestore(&stopper->lock, flags);
  19071. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  19072. return run;
  19073. }
  19074. @@ -461,13 +461,13 @@ static void cpu_stopper_thread(unsigned int cpu)
  19075. repeat:
  19076. work = NULL;
  19077. - spin_lock_irq(&stopper->lock);
  19078. + raw_spin_lock_irq(&stopper->lock);
  19079. if (!list_empty(&stopper->works)) {
  19080. work = list_first_entry(&stopper->works,
  19081. struct cpu_stop_work, list);
  19082. list_del_init(&work->list);
  19083. }
  19084. - spin_unlock_irq(&stopper->lock);
  19085. + raw_spin_unlock_irq(&stopper->lock);
  19086. if (work) {
  19087. cpu_stop_fn_t fn = work->fn;
  19088. @@ -475,6 +475,8 @@ static void cpu_stopper_thread(unsigned int cpu)
  19089. struct cpu_stop_done *done = work->done;
  19090. int ret;
  19091. + /* XXX */
  19092. +
  19093. /* cpu stop callbacks must not sleep, make in_atomic() == T */
  19094. preempt_count_inc();
  19095. ret = fn(arg);
  19096. @@ -541,7 +543,7 @@ static int __init cpu_stop_init(void)
  19097. for_each_possible_cpu(cpu) {
  19098. struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
  19099. - spin_lock_init(&stopper->lock);
  19100. + raw_spin_lock_init(&stopper->lock);
  19101. INIT_LIST_HEAD(&stopper->works);
  19102. }
  19103. diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
  19104. index eeb7f2f5698d..369203af6406 100644
  19105. --- a/kernel/time/hrtimer.c
  19106. +++ b/kernel/time/hrtimer.c
  19107. @@ -53,6 +53,7 @@
  19108. #include <asm/uaccess.h>
  19109. #include <trace/events/timer.h>
  19110. +#include <trace/events/hist.h>
  19111. #include "tick-internal.h"
  19112. @@ -693,6 +694,29 @@ static void hrtimer_switch_to_hres(void)
  19113. retrigger_next_event(NULL);
  19114. }
  19115. +#ifdef CONFIG_PREEMPT_RT_FULL
  19116. +
  19117. +static struct swork_event clock_set_delay_work;
  19118. +
  19119. +static void run_clock_set_delay(struct swork_event *event)
  19120. +{
  19121. + clock_was_set();
  19122. +}
  19123. +
  19124. +void clock_was_set_delayed(void)
  19125. +{
  19126. + swork_queue(&clock_set_delay_work);
  19127. +}
  19128. +
  19129. +static __init int create_clock_set_delay_thread(void)
  19130. +{
  19131. + WARN_ON(swork_get());
  19132. + INIT_SWORK(&clock_set_delay_work, run_clock_set_delay);
  19133. + return 0;
  19134. +}
  19135. +early_initcall(create_clock_set_delay_thread);
  19136. +#else /* PREEMPT_RT_FULL */
  19137. +
  19138. static void clock_was_set_work(struct work_struct *work)
  19139. {
  19140. clock_was_set();
  19141. @@ -708,6 +732,7 @@ void clock_was_set_delayed(void)
  19142. {
  19143. schedule_work(&hrtimer_work);
  19144. }
  19145. +#endif
  19146. #else
  19147. @@ -717,11 +742,8 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; }
  19148. static inline void hrtimer_switch_to_hres(void) { }
  19149. static inline void
  19150. hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
  19151. -static inline int hrtimer_reprogram(struct hrtimer *timer,
  19152. - struct hrtimer_clock_base *base)
  19153. -{
  19154. - return 0;
  19155. -}
  19156. +static inline void hrtimer_reprogram(struct hrtimer *timer,
  19157. + struct hrtimer_clock_base *base) { }
  19158. static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
  19159. static inline void retrigger_next_event(void *arg) { }
  19160. @@ -853,6 +875,32 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
  19161. }
  19162. EXPORT_SYMBOL_GPL(hrtimer_forward);
  19163. +#ifdef CONFIG_PREEMPT_RT_BASE
  19164. +# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
  19165. +
  19166. +/**
  19167. + * hrtimer_wait_for_timer - Wait for a running timer
  19168. + *
  19169. + * @timer: timer to wait for
  19170. + *
  19171. + * The function waits in case the timers callback function is
  19172. + * currently executed on the waitqueue of the timer base. The
  19173. + * waitqueue is woken up after the timer callback function has
  19174. + * finished execution.
  19175. + */
  19176. +void hrtimer_wait_for_timer(const struct hrtimer *timer)
  19177. +{
  19178. + struct hrtimer_clock_base *base = timer->base;
  19179. +
  19180. + if (base && base->cpu_base && !timer->irqsafe)
  19181. + wait_event(base->cpu_base->wait,
  19182. + !(hrtimer_callback_running(timer)));
  19183. +}
  19184. +
  19185. +#else
  19186. +# define wake_up_timer_waiters(b) do { } while (0)
  19187. +#endif
  19188. +
  19189. /*
  19190. * enqueue_hrtimer - internal function to (re)start a timer
  19191. *
  19192. @@ -894,6 +942,11 @@ static void __remove_hrtimer(struct hrtimer *timer,
  19193. if (!(state & HRTIMER_STATE_ENQUEUED))
  19194. return;
  19195. + if (unlikely(!list_empty(&timer->cb_entry))) {
  19196. + list_del_init(&timer->cb_entry);
  19197. + return;
  19198. + }
  19199. +
  19200. if (!timerqueue_del(&base->active, &timer->node))
  19201. cpu_base->active_bases &= ~(1 << base->index);
  19202. @@ -989,7 +1042,16 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
  19203. new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
  19204. timer_stats_hrtimer_set_start_info(timer);
  19205. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19206. + {
  19207. + ktime_t now = new_base->get_time();
  19208. + if (ktime_to_ns(tim) < ktime_to_ns(now))
  19209. + timer->praecox = now;
  19210. + else
  19211. + timer->praecox = ktime_set(0, 0);
  19212. + }
  19213. +#endif
  19214. leftmost = enqueue_hrtimer(timer, new_base);
  19215. if (!leftmost)
  19216. goto unlock;
  19217. @@ -1061,7 +1123,7 @@ int hrtimer_cancel(struct hrtimer *timer)
  19218. if (ret >= 0)
  19219. return ret;
  19220. - cpu_relax();
  19221. + hrtimer_wait_for_timer(timer);
  19222. }
  19223. }
  19224. EXPORT_SYMBOL_GPL(hrtimer_cancel);
  19225. @@ -1137,6 +1199,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
  19226. base = hrtimer_clockid_to_base(clock_id);
  19227. timer->base = &cpu_base->clock_base[base];
  19228. + INIT_LIST_HEAD(&timer->cb_entry);
  19229. timerqueue_init(&timer->node);
  19230. #ifdef CONFIG_TIMER_STATS
  19231. @@ -1177,6 +1240,7 @@ bool hrtimer_active(const struct hrtimer *timer)
  19232. seq = raw_read_seqcount_begin(&cpu_base->seq);
  19233. if (timer->state != HRTIMER_STATE_INACTIVE ||
  19234. + cpu_base->running_soft == timer ||
  19235. cpu_base->running == timer)
  19236. return true;
  19237. @@ -1275,10 +1339,112 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
  19238. cpu_base->running = NULL;
  19239. }
  19240. -static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
  19241. +#ifdef CONFIG_PREEMPT_RT_BASE
  19242. +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
  19243. + struct hrtimer_clock_base *base)
  19244. +{
  19245. + int leftmost;
  19246. +
  19247. + if (restart != HRTIMER_NORESTART &&
  19248. + !(timer->state & HRTIMER_STATE_ENQUEUED)) {
  19249. +
  19250. + leftmost = enqueue_hrtimer(timer, base);
  19251. + if (!leftmost)
  19252. + return;
  19253. +#ifdef CONFIG_HIGH_RES_TIMERS
  19254. + if (!hrtimer_is_hres_active(timer)) {
  19255. + /*
  19256. + * Kick to reschedule the next tick to handle the new timer
  19257. + * on dynticks target.
  19258. + */
  19259. + if (base->cpu_base->nohz_active)
  19260. + wake_up_nohz_cpu(base->cpu_base->cpu);
  19261. + } else {
  19262. +
  19263. + hrtimer_reprogram(timer, base);
  19264. + }
  19265. +#endif
  19266. + }
  19267. +}
  19268. +
  19269. +/*
  19270. + * The changes in mainline which removed the callback modes from
  19271. + * hrtimer are not yet working with -rt. The non wakeup_process()
  19272. + * based callbacks which involve sleeping locks need to be treated
  19273. + * seperately.
  19274. + */
  19275. +static void hrtimer_rt_run_pending(void)
  19276. +{
  19277. + enum hrtimer_restart (*fn)(struct hrtimer *);
  19278. + struct hrtimer_cpu_base *cpu_base;
  19279. + struct hrtimer_clock_base *base;
  19280. + struct hrtimer *timer;
  19281. + int index, restart;
  19282. +
  19283. + local_irq_disable();
  19284. + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
  19285. +
  19286. + raw_spin_lock(&cpu_base->lock);
  19287. +
  19288. + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
  19289. + base = &cpu_base->clock_base[index];
  19290. +
  19291. + while (!list_empty(&base->expired)) {
  19292. + timer = list_first_entry(&base->expired,
  19293. + struct hrtimer, cb_entry);
  19294. +
  19295. + /*
  19296. + * Same as the above __run_hrtimer function
  19297. + * just we run with interrupts enabled.
  19298. + */
  19299. + debug_deactivate(timer);
  19300. + cpu_base->running_soft = timer;
  19301. + raw_write_seqcount_barrier(&cpu_base->seq);
  19302. +
  19303. + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
  19304. + timer_stats_account_hrtimer(timer);
  19305. + fn = timer->function;
  19306. +
  19307. + raw_spin_unlock_irq(&cpu_base->lock);
  19308. + restart = fn(timer);
  19309. + raw_spin_lock_irq(&cpu_base->lock);
  19310. +
  19311. + hrtimer_rt_reprogram(restart, timer, base);
  19312. + raw_write_seqcount_barrier(&cpu_base->seq);
  19313. +
  19314. + WARN_ON_ONCE(cpu_base->running_soft != timer);
  19315. + cpu_base->running_soft = NULL;
  19316. + }
  19317. + }
  19318. +
  19319. + raw_spin_unlock_irq(&cpu_base->lock);
  19320. +
  19321. + wake_up_timer_waiters(cpu_base);
  19322. +}
  19323. +
  19324. +static int hrtimer_rt_defer(struct hrtimer *timer)
  19325. +{
  19326. + if (timer->irqsafe)
  19327. + return 0;
  19328. +
  19329. + __remove_hrtimer(timer, timer->base, timer->state, 0);
  19330. + list_add_tail(&timer->cb_entry, &timer->base->expired);
  19331. + return 1;
  19332. +}
  19333. +
  19334. +#else
  19335. +
  19336. +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
  19337. +
  19338. +#endif
  19339. +
  19340. +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
  19341. +
  19342. +static int __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
  19343. {
  19344. struct hrtimer_clock_base *base = cpu_base->clock_base;
  19345. unsigned int active = cpu_base->active_bases;
  19346. + int raise = 0;
  19347. for (; active; base++, active >>= 1) {
  19348. struct timerqueue_node *node;
  19349. @@ -1294,6 +1460,15 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
  19350. timer = container_of(node, struct hrtimer, node);
  19351. + trace_hrtimer_interrupt(raw_smp_processor_id(),
  19352. + ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ?
  19353. + timer->praecox : hrtimer_get_expires(timer),
  19354. + basenow)),
  19355. + current,
  19356. + timer->function == hrtimer_wakeup ?
  19357. + container_of(timer, struct hrtimer_sleeper,
  19358. + timer)->task : NULL);
  19359. +
  19360. /*
  19361. * The immediate goal for using the softexpires is
  19362. * minimizing wakeups, not running timers at the
  19363. @@ -1309,9 +1484,13 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
  19364. if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
  19365. break;
  19366. - __run_hrtimer(cpu_base, base, timer, &basenow);
  19367. + if (!hrtimer_rt_defer(timer))
  19368. + __run_hrtimer(cpu_base, base, timer, &basenow);
  19369. + else
  19370. + raise = 1;
  19371. }
  19372. }
  19373. + return raise;
  19374. }
  19375. #ifdef CONFIG_HIGH_RES_TIMERS
  19376. @@ -1325,6 +1504,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
  19377. struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
  19378. ktime_t expires_next, now, entry_time, delta;
  19379. int retries = 0;
  19380. + int raise;
  19381. BUG_ON(!cpu_base->hres_active);
  19382. cpu_base->nr_events++;
  19383. @@ -1343,7 +1523,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
  19384. */
  19385. cpu_base->expires_next.tv64 = KTIME_MAX;
  19386. - __hrtimer_run_queues(cpu_base, now);
  19387. + raise = __hrtimer_run_queues(cpu_base, now);
  19388. /* Reevaluate the clock bases for the next expiry */
  19389. expires_next = __hrtimer_get_next_event(cpu_base);
  19390. @@ -1354,6 +1534,8 @@ void hrtimer_interrupt(struct clock_event_device *dev)
  19391. cpu_base->expires_next = expires_next;
  19392. cpu_base->in_hrtirq = 0;
  19393. raw_spin_unlock(&cpu_base->lock);
  19394. + if (raise)
  19395. + raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  19396. /* Reprogramming necessary ? */
  19397. if (!tick_program_event(expires_next, 0)) {
  19398. @@ -1433,6 +1615,7 @@ void hrtimer_run_queues(void)
  19399. {
  19400. struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
  19401. ktime_t now;
  19402. + int raise;
  19403. if (__hrtimer_hres_active(cpu_base))
  19404. return;
  19405. @@ -1451,8 +1634,10 @@ void hrtimer_run_queues(void)
  19406. raw_spin_lock(&cpu_base->lock);
  19407. now = hrtimer_update_base(cpu_base);
  19408. - __hrtimer_run_queues(cpu_base, now);
  19409. + raise = __hrtimer_run_queues(cpu_base, now);
  19410. raw_spin_unlock(&cpu_base->lock);
  19411. + if (raise)
  19412. + raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  19413. }
  19414. /*
  19415. @@ -1474,16 +1659,18 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
  19416. void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
  19417. {
  19418. sl->timer.function = hrtimer_wakeup;
  19419. + sl->timer.irqsafe = 1;
  19420. sl->task = task;
  19421. }
  19422. EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
  19423. -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
  19424. +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
  19425. + unsigned long state)
  19426. {
  19427. hrtimer_init_sleeper(t, current);
  19428. do {
  19429. - set_current_state(TASK_INTERRUPTIBLE);
  19430. + set_current_state(state);
  19431. hrtimer_start_expires(&t->timer, mode);
  19432. if (likely(t->task))
  19433. @@ -1525,7 +1712,8 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
  19434. HRTIMER_MODE_ABS);
  19435. hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
  19436. - if (do_nanosleep(&t, HRTIMER_MODE_ABS))
  19437. + /* cpu_chill() does not care about restart state. */
  19438. + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE))
  19439. goto out;
  19440. rmtp = restart->nanosleep.rmtp;
  19441. @@ -1542,8 +1730,10 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
  19442. return ret;
  19443. }
  19444. -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  19445. - const enum hrtimer_mode mode, const clockid_t clockid)
  19446. +static long
  19447. +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  19448. + const enum hrtimer_mode mode, const clockid_t clockid,
  19449. + unsigned long state)
  19450. {
  19451. struct restart_block *restart;
  19452. struct hrtimer_sleeper t;
  19453. @@ -1556,7 +1746,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  19454. hrtimer_init_on_stack(&t.timer, clockid, mode);
  19455. hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
  19456. - if (do_nanosleep(&t, mode))
  19457. + if (do_nanosleep(&t, mode, state))
  19458. goto out;
  19459. /* Absolute timers do not update the rmtp value and restart: */
  19460. @@ -1583,6 +1773,12 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  19461. return ret;
  19462. }
  19463. +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  19464. + const enum hrtimer_mode mode, const clockid_t clockid)
  19465. +{
  19466. + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE);
  19467. +}
  19468. +
  19469. SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
  19470. struct timespec __user *, rmtp)
  19471. {
  19472. @@ -1597,6 +1793,26 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
  19473. return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
  19474. }
  19475. +#ifdef CONFIG_PREEMPT_RT_FULL
  19476. +/*
  19477. + * Sleep for 1 ms in hope whoever holds what we want will let it go.
  19478. + */
  19479. +void cpu_chill(void)
  19480. +{
  19481. + struct timespec tu = {
  19482. + .tv_nsec = NSEC_PER_MSEC,
  19483. + };
  19484. + unsigned int freeze_flag = current->flags & PF_NOFREEZE;
  19485. +
  19486. + current->flags |= PF_NOFREEZE;
  19487. + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
  19488. + TASK_UNINTERRUPTIBLE);
  19489. + if (!freeze_flag)
  19490. + current->flags &= ~PF_NOFREEZE;
  19491. +}
  19492. +EXPORT_SYMBOL(cpu_chill);
  19493. +#endif
  19494. +
  19495. /*
  19496. * Functions related to boot-time initialization:
  19497. */
  19498. @@ -1608,16 +1824,20 @@ int hrtimers_prepare_cpu(unsigned int cpu)
  19499. for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
  19500. cpu_base->clock_base[i].cpu_base = cpu_base;
  19501. timerqueue_init_head(&cpu_base->clock_base[i].active);
  19502. + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
  19503. }
  19504. cpu_base->cpu = cpu;
  19505. hrtimer_init_hres(cpu_base);
  19506. +#ifdef CONFIG_PREEMPT_RT_BASE
  19507. + init_waitqueue_head(&cpu_base->wait);
  19508. +#endif
  19509. return 0;
  19510. }
  19511. #ifdef CONFIG_HOTPLUG_CPU
  19512. -static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
  19513. +static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
  19514. struct hrtimer_clock_base *new_base)
  19515. {
  19516. struct hrtimer *timer;
  19517. @@ -1645,12 +1865,21 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
  19518. */
  19519. enqueue_hrtimer(timer, new_base);
  19520. }
  19521. +#ifdef CONFIG_PREEMPT_RT_BASE
  19522. + list_splice_tail(&old_base->expired, &new_base->expired);
  19523. + /*
  19524. + * Tell the caller to raise HRTIMER_SOFTIRQ. We can't safely
  19525. + * acquire ktimersoftd->pi_lock while the base lock is held.
  19526. + */
  19527. + return !list_empty(&new_base->expired);
  19528. +#endif
  19529. + return 0;
  19530. }
  19531. int hrtimers_dead_cpu(unsigned int scpu)
  19532. {
  19533. struct hrtimer_cpu_base *old_base, *new_base;
  19534. - int i;
  19535. + int i, raise = 0;
  19536. BUG_ON(cpu_online(scpu));
  19537. tick_cancel_sched_timer(scpu);
  19538. @@ -1666,13 +1895,16 @@ int hrtimers_dead_cpu(unsigned int scpu)
  19539. raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
  19540. for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
  19541. - migrate_hrtimer_list(&old_base->clock_base[i],
  19542. - &new_base->clock_base[i]);
  19543. + raise |= migrate_hrtimer_list(&old_base->clock_base[i],
  19544. + &new_base->clock_base[i]);
  19545. }
  19546. raw_spin_unlock(&old_base->lock);
  19547. raw_spin_unlock(&new_base->lock);
  19548. + if (raise)
  19549. + raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  19550. +
  19551. /* Check, if we got expired work to do */
  19552. __hrtimer_peek_ahead_timers();
  19553. local_irq_enable();
  19554. @@ -1681,9 +1913,26 @@ int hrtimers_dead_cpu(unsigned int scpu)
  19555. #endif /* CONFIG_HOTPLUG_CPU */
  19556. +#ifdef CONFIG_PREEMPT_RT_BASE
  19557. +
  19558. +static void run_hrtimer_softirq(struct softirq_action *h)
  19559. +{
  19560. + hrtimer_rt_run_pending();
  19561. +}
  19562. +
  19563. +static void hrtimers_open_softirq(void)
  19564. +{
  19565. + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
  19566. +}
  19567. +
  19568. +#else
  19569. +static void hrtimers_open_softirq(void) { }
  19570. +#endif
  19571. +
  19572. void __init hrtimers_init(void)
  19573. {
  19574. hrtimers_prepare_cpu(smp_processor_id());
  19575. + hrtimers_open_softirq();
  19576. }
  19577. /**
  19578. diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
  19579. index 1d5c7204ddc9..184de6751180 100644
  19580. --- a/kernel/time/itimer.c
  19581. +++ b/kernel/time/itimer.c
  19582. @@ -213,6 +213,7 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
  19583. /* We are sharing ->siglock with it_real_fn() */
  19584. if (hrtimer_try_to_cancel(timer) < 0) {
  19585. spin_unlock_irq(&tsk->sighand->siglock);
  19586. + hrtimer_wait_for_timer(&tsk->signal->real_timer);
  19587. goto again;
  19588. }
  19589. expires = timeval_to_ktime(value->it_value);
  19590. diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
  19591. index 555e21f7b966..a5d6435fabbb 100644
  19592. --- a/kernel/time/jiffies.c
  19593. +++ b/kernel/time/jiffies.c
  19594. @@ -74,7 +74,8 @@ static struct clocksource clocksource_jiffies = {
  19595. .max_cycles = 10,
  19596. };
  19597. -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
  19598. +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
  19599. +__cacheline_aligned_in_smp seqcount_t jiffies_seq;
  19600. #if (BITS_PER_LONG < 64)
  19601. u64 get_jiffies_64(void)
  19602. @@ -83,9 +84,9 @@ u64 get_jiffies_64(void)
  19603. u64 ret;
  19604. do {
  19605. - seq = read_seqbegin(&jiffies_lock);
  19606. + seq = read_seqcount_begin(&jiffies_seq);
  19607. ret = jiffies_64;
  19608. - } while (read_seqretry(&jiffies_lock, seq));
  19609. + } while (read_seqcount_retry(&jiffies_seq, seq));
  19610. return ret;
  19611. }
  19612. EXPORT_SYMBOL(get_jiffies_64);
  19613. diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
  19614. index 6df8927c58a5..05b7391bf9bd 100644
  19615. --- a/kernel/time/ntp.c
  19616. +++ b/kernel/time/ntp.c
  19617. @@ -17,6 +17,7 @@
  19618. #include <linux/module.h>
  19619. #include <linux/rtc.h>
  19620. #include <linux/math64.h>
  19621. +#include <linux/swork.h>
  19622. #include "ntp_internal.h"
  19623. #include "timekeeping_internal.h"
  19624. @@ -568,10 +569,35 @@ static void sync_cmos_clock(struct work_struct *work)
  19625. &sync_cmos_work, timespec64_to_jiffies(&next));
  19626. }
  19627. +#ifdef CONFIG_PREEMPT_RT_FULL
  19628. +
  19629. +static void run_clock_set_delay(struct swork_event *event)
  19630. +{
  19631. + queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
  19632. +}
  19633. +
  19634. +static struct swork_event ntp_cmos_swork;
  19635. +
  19636. +void ntp_notify_cmos_timer(void)
  19637. +{
  19638. + swork_queue(&ntp_cmos_swork);
  19639. +}
  19640. +
  19641. +static __init int create_cmos_delay_thread(void)
  19642. +{
  19643. + WARN_ON(swork_get());
  19644. + INIT_SWORK(&ntp_cmos_swork, run_clock_set_delay);
  19645. + return 0;
  19646. +}
  19647. +early_initcall(create_cmos_delay_thread);
  19648. +
  19649. +#else
  19650. +
  19651. void ntp_notify_cmos_timer(void)
  19652. {
  19653. queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
  19654. }
  19655. +#endif /* CONFIG_PREEMPT_RT_FULL */
  19656. #else
  19657. void ntp_notify_cmos_timer(void) { }
  19658. diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
  19659. index 39008d78927a..633f4eaca9e7 100644
  19660. --- a/kernel/time/posix-cpu-timers.c
  19661. +++ b/kernel/time/posix-cpu-timers.c
  19662. @@ -3,6 +3,7 @@
  19663. */
  19664. #include <linux/sched.h>
  19665. +#include <linux/sched/rt.h>
  19666. #include <linux/posix-timers.h>
  19667. #include <linux/errno.h>
  19668. #include <linux/math64.h>
  19669. @@ -620,7 +621,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
  19670. /*
  19671. * Disarm any old timer after extracting its expiry time.
  19672. */
  19673. - WARN_ON_ONCE(!irqs_disabled());
  19674. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  19675. ret = 0;
  19676. old_incr = timer->it.cpu.incr;
  19677. @@ -1064,7 +1065,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
  19678. /*
  19679. * Now re-arm for the new expiry time.
  19680. */
  19681. - WARN_ON_ONCE(!irqs_disabled());
  19682. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  19683. arm_timer(timer);
  19684. unlock_task_sighand(p, &flags);
  19685. @@ -1153,13 +1154,13 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
  19686. * already updated our counts. We need to check if any timers fire now.
  19687. * Interrupts are disabled.
  19688. */
  19689. -void run_posix_cpu_timers(struct task_struct *tsk)
  19690. +static void __run_posix_cpu_timers(struct task_struct *tsk)
  19691. {
  19692. LIST_HEAD(firing);
  19693. struct k_itimer *timer, *next;
  19694. unsigned long flags;
  19695. - WARN_ON_ONCE(!irqs_disabled());
  19696. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  19697. /*
  19698. * The fast path checks that there are no expired thread or thread
  19699. @@ -1213,6 +1214,190 @@ void run_posix_cpu_timers(struct task_struct *tsk)
  19700. }
  19701. }
  19702. +#ifdef CONFIG_PREEMPT_RT_BASE
  19703. +#include <linux/kthread.h>
  19704. +#include <linux/cpu.h>
  19705. +DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
  19706. +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
  19707. +
  19708. +static int posix_cpu_timers_thread(void *data)
  19709. +{
  19710. + int cpu = (long)data;
  19711. +
  19712. + BUG_ON(per_cpu(posix_timer_task,cpu) != current);
  19713. +
  19714. + while (!kthread_should_stop()) {
  19715. + struct task_struct *tsk = NULL;
  19716. + struct task_struct *next = NULL;
  19717. +
  19718. + if (cpu_is_offline(cpu))
  19719. + goto wait_to_die;
  19720. +
  19721. + /* grab task list */
  19722. + raw_local_irq_disable();
  19723. + tsk = per_cpu(posix_timer_tasklist, cpu);
  19724. + per_cpu(posix_timer_tasklist, cpu) = NULL;
  19725. + raw_local_irq_enable();
  19726. +
  19727. + /* its possible the list is empty, just return */
  19728. + if (!tsk) {
  19729. + set_current_state(TASK_INTERRUPTIBLE);
  19730. + schedule();
  19731. + __set_current_state(TASK_RUNNING);
  19732. + continue;
  19733. + }
  19734. +
  19735. + /* Process task list */
  19736. + while (1) {
  19737. + /* save next */
  19738. + next = tsk->posix_timer_list;
  19739. +
  19740. + /* run the task timers, clear its ptr and
  19741. + * unreference it
  19742. + */
  19743. + __run_posix_cpu_timers(tsk);
  19744. + tsk->posix_timer_list = NULL;
  19745. + put_task_struct(tsk);
  19746. +
  19747. + /* check if this is the last on the list */
  19748. + if (next == tsk)
  19749. + break;
  19750. + tsk = next;
  19751. + }
  19752. + }
  19753. + return 0;
  19754. +
  19755. +wait_to_die:
  19756. + /* Wait for kthread_stop */
  19757. + set_current_state(TASK_INTERRUPTIBLE);
  19758. + while (!kthread_should_stop()) {
  19759. + schedule();
  19760. + set_current_state(TASK_INTERRUPTIBLE);
  19761. + }
  19762. + __set_current_state(TASK_RUNNING);
  19763. + return 0;
  19764. +}
  19765. +
  19766. +static inline int __fastpath_timer_check(struct task_struct *tsk)
  19767. +{
  19768. + /* tsk == current, ensure it is safe to use ->signal/sighand */
  19769. + if (unlikely(tsk->exit_state))
  19770. + return 0;
  19771. +
  19772. + if (!task_cputime_zero(&tsk->cputime_expires))
  19773. + return 1;
  19774. +
  19775. + if (!task_cputime_zero(&tsk->signal->cputime_expires))
  19776. + return 1;
  19777. +
  19778. + return 0;
  19779. +}
  19780. +
  19781. +void run_posix_cpu_timers(struct task_struct *tsk)
  19782. +{
  19783. + unsigned long cpu = smp_processor_id();
  19784. + struct task_struct *tasklist;
  19785. +
  19786. + BUG_ON(!irqs_disabled());
  19787. + if(!per_cpu(posix_timer_task, cpu))
  19788. + return;
  19789. + /* get per-cpu references */
  19790. + tasklist = per_cpu(posix_timer_tasklist, cpu);
  19791. +
  19792. + /* check to see if we're already queued */
  19793. + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
  19794. + get_task_struct(tsk);
  19795. + if (tasklist) {
  19796. + tsk->posix_timer_list = tasklist;
  19797. + } else {
  19798. + /*
  19799. + * The list is terminated by a self-pointing
  19800. + * task_struct
  19801. + */
  19802. + tsk->posix_timer_list = tsk;
  19803. + }
  19804. + per_cpu(posix_timer_tasklist, cpu) = tsk;
  19805. +
  19806. + wake_up_process(per_cpu(posix_timer_task, cpu));
  19807. + }
  19808. +}
  19809. +
  19810. +/*
  19811. + * posix_cpu_thread_call - callback that gets triggered when a CPU is added.
  19812. + * Here we can start up the necessary migration thread for the new CPU.
  19813. + */
  19814. +static int posix_cpu_thread_call(struct notifier_block *nfb,
  19815. + unsigned long action, void *hcpu)
  19816. +{
  19817. + int cpu = (long)hcpu;
  19818. + struct task_struct *p;
  19819. + struct sched_param param;
  19820. +
  19821. + switch (action) {
  19822. + case CPU_UP_PREPARE:
  19823. + p = kthread_create(posix_cpu_timers_thread, hcpu,
  19824. + "posixcputmr/%d",cpu);
  19825. + if (IS_ERR(p))
  19826. + return NOTIFY_BAD;
  19827. + p->flags |= PF_NOFREEZE;
  19828. + kthread_bind(p, cpu);
  19829. + /* Must be high prio to avoid getting starved */
  19830. + param.sched_priority = MAX_RT_PRIO-1;
  19831. + sched_setscheduler(p, SCHED_FIFO, &param);
  19832. + per_cpu(posix_timer_task,cpu) = p;
  19833. + break;
  19834. + case CPU_ONLINE:
  19835. + /* Strictly unneccessary, as first user will wake it. */
  19836. + wake_up_process(per_cpu(posix_timer_task,cpu));
  19837. + break;
  19838. +#ifdef CONFIG_HOTPLUG_CPU
  19839. + case CPU_UP_CANCELED:
  19840. + /* Unbind it from offline cpu so it can run. Fall thru. */
  19841. + kthread_bind(per_cpu(posix_timer_task, cpu),
  19842. + cpumask_any(cpu_online_mask));
  19843. + kthread_stop(per_cpu(posix_timer_task,cpu));
  19844. + per_cpu(posix_timer_task,cpu) = NULL;
  19845. + break;
  19846. + case CPU_DEAD:
  19847. + kthread_stop(per_cpu(posix_timer_task,cpu));
  19848. + per_cpu(posix_timer_task,cpu) = NULL;
  19849. + break;
  19850. +#endif
  19851. + }
  19852. + return NOTIFY_OK;
  19853. +}
  19854. +
  19855. +/* Register at highest priority so that task migration (migrate_all_tasks)
  19856. + * happens before everything else.
  19857. + */
  19858. +static struct notifier_block posix_cpu_thread_notifier = {
  19859. + .notifier_call = posix_cpu_thread_call,
  19860. + .priority = 10
  19861. +};
  19862. +
  19863. +static int __init posix_cpu_thread_init(void)
  19864. +{
  19865. + void *hcpu = (void *)(long)smp_processor_id();
  19866. + /* Start one for boot CPU. */
  19867. + unsigned long cpu;
  19868. +
  19869. + /* init the per-cpu posix_timer_tasklets */
  19870. + for_each_possible_cpu(cpu)
  19871. + per_cpu(posix_timer_tasklist, cpu) = NULL;
  19872. +
  19873. + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu);
  19874. + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu);
  19875. + register_cpu_notifier(&posix_cpu_thread_notifier);
  19876. + return 0;
  19877. +}
  19878. +early_initcall(posix_cpu_thread_init);
  19879. +#else /* CONFIG_PREEMPT_RT_BASE */
  19880. +void run_posix_cpu_timers(struct task_struct *tsk)
  19881. +{
  19882. + __run_posix_cpu_timers(tsk);
  19883. +}
  19884. +#endif /* CONFIG_PREEMPT_RT_BASE */
  19885. +
  19886. /*
  19887. * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
  19888. * The tsk->sighand->siglock must be held by the caller.
  19889. diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
  19890. index f2826c35e918..464a98155a0e 100644
  19891. --- a/kernel/time/posix-timers.c
  19892. +++ b/kernel/time/posix-timers.c
  19893. @@ -506,6 +506,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
  19894. static struct pid *good_sigevent(sigevent_t * event)
  19895. {
  19896. struct task_struct *rtn = current->group_leader;
  19897. + int sig = event->sigev_signo;
  19898. if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
  19899. (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
  19900. @@ -514,7 +515,8 @@ static struct pid *good_sigevent(sigevent_t * event)
  19901. return NULL;
  19902. if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
  19903. - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
  19904. + (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) ||
  19905. + sig_kernel_coredump(sig)))
  19906. return NULL;
  19907. return task_pid(rtn);
  19908. @@ -826,6 +828,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
  19909. return overrun;
  19910. }
  19911. +/*
  19912. + * Protected by RCU!
  19913. + */
  19914. +static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
  19915. +{
  19916. +#ifdef CONFIG_PREEMPT_RT_FULL
  19917. + if (kc->timer_set == common_timer_set)
  19918. + hrtimer_wait_for_timer(&timr->it.real.timer);
  19919. + else
  19920. + /* FIXME: Whacky hack for posix-cpu-timers */
  19921. + schedule_timeout(1);
  19922. +#endif
  19923. +}
  19924. +
  19925. /* Set a POSIX.1b interval timer. */
  19926. /* timr->it_lock is taken. */
  19927. static int
  19928. @@ -903,6 +919,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
  19929. if (!timr)
  19930. return -EINVAL;
  19931. + rcu_read_lock();
  19932. kc = clockid_to_kclock(timr->it_clock);
  19933. if (WARN_ON_ONCE(!kc || !kc->timer_set))
  19934. error = -EINVAL;
  19935. @@ -911,9 +928,12 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
  19936. unlock_timer(timr, flag);
  19937. if (error == TIMER_RETRY) {
  19938. + timer_wait_for_callback(kc, timr);
  19939. rtn = NULL; // We already got the old time...
  19940. + rcu_read_unlock();
  19941. goto retry;
  19942. }
  19943. + rcu_read_unlock();
  19944. if (old_setting && !error &&
  19945. copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
  19946. @@ -951,10 +971,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
  19947. if (!timer)
  19948. return -EINVAL;
  19949. + rcu_read_lock();
  19950. if (timer_delete_hook(timer) == TIMER_RETRY) {
  19951. unlock_timer(timer, flags);
  19952. + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
  19953. + timer);
  19954. + rcu_read_unlock();
  19955. goto retry_delete;
  19956. }
  19957. + rcu_read_unlock();
  19958. spin_lock(&current->sighand->siglock);
  19959. list_del(&timer->list);
  19960. @@ -980,8 +1005,18 @@ static void itimer_delete(struct k_itimer *timer)
  19961. retry_delete:
  19962. spin_lock_irqsave(&timer->it_lock, flags);
  19963. + /* On RT we can race with a deletion */
  19964. + if (!timer->it_signal) {
  19965. + unlock_timer(timer, flags);
  19966. + return;
  19967. + }
  19968. +
  19969. if (timer_delete_hook(timer) == TIMER_RETRY) {
  19970. + rcu_read_lock();
  19971. unlock_timer(timer, flags);
  19972. + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
  19973. + timer);
  19974. + rcu_read_unlock();
  19975. goto retry_delete;
  19976. }
  19977. list_del(&timer->list);
  19978. diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
  19979. index 690b797f522e..fe8ba1619879 100644
  19980. --- a/kernel/time/tick-broadcast-hrtimer.c
  19981. +++ b/kernel/time/tick-broadcast-hrtimer.c
  19982. @@ -107,5 +107,6 @@ void tick_setup_hrtimer_broadcast(void)
  19983. {
  19984. hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  19985. bctimer.function = bc_handler;
  19986. + bctimer.irqsafe = true;
  19987. clockevents_register_device(&ce_broadcast_hrtimer);
  19988. }
  19989. diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
  19990. index 4fcd99e12aa0..5a47f2e98faf 100644
  19991. --- a/kernel/time/tick-common.c
  19992. +++ b/kernel/time/tick-common.c
  19993. @@ -79,13 +79,15 @@ int tick_is_oneshot_available(void)
  19994. static void tick_periodic(int cpu)
  19995. {
  19996. if (tick_do_timer_cpu == cpu) {
  19997. - write_seqlock(&jiffies_lock);
  19998. + raw_spin_lock(&jiffies_lock);
  19999. + write_seqcount_begin(&jiffies_seq);
  20000. /* Keep track of the next tick event */
  20001. tick_next_period = ktime_add(tick_next_period, tick_period);
  20002. do_timer(1);
  20003. - write_sequnlock(&jiffies_lock);
  20004. + write_seqcount_end(&jiffies_seq);
  20005. + raw_spin_unlock(&jiffies_lock);
  20006. update_wall_time();
  20007. }
  20008. @@ -157,9 +159,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
  20009. ktime_t next;
  20010. do {
  20011. - seq = read_seqbegin(&jiffies_lock);
  20012. + seq = read_seqcount_begin(&jiffies_seq);
  20013. next = tick_next_period;
  20014. - } while (read_seqretry(&jiffies_lock, seq));
  20015. + } while (read_seqcount_retry(&jiffies_seq, seq));
  20016. clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
  20017. diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
  20018. index dae1a45be504..c573b1a848b6 100644
  20019. --- a/kernel/time/tick-sched.c
  20020. +++ b/kernel/time/tick-sched.c
  20021. @@ -62,7 +62,8 @@ static void tick_do_update_jiffies64(ktime_t now)
  20022. return;
  20023. /* Reevaluate with jiffies_lock held */
  20024. - write_seqlock(&jiffies_lock);
  20025. + raw_spin_lock(&jiffies_lock);
  20026. + write_seqcount_begin(&jiffies_seq);
  20027. delta = ktime_sub(now, last_jiffies_update);
  20028. if (delta.tv64 >= tick_period.tv64) {
  20029. @@ -85,10 +86,12 @@ static void tick_do_update_jiffies64(ktime_t now)
  20030. /* Keep the tick_next_period variable up to date */
  20031. tick_next_period = ktime_add(last_jiffies_update, tick_period);
  20032. } else {
  20033. - write_sequnlock(&jiffies_lock);
  20034. + write_seqcount_end(&jiffies_seq);
  20035. + raw_spin_unlock(&jiffies_lock);
  20036. return;
  20037. }
  20038. - write_sequnlock(&jiffies_lock);
  20039. + write_seqcount_end(&jiffies_seq);
  20040. + raw_spin_unlock(&jiffies_lock);
  20041. update_wall_time();
  20042. }
  20043. @@ -99,12 +102,14 @@ static ktime_t tick_init_jiffy_update(void)
  20044. {
  20045. ktime_t period;
  20046. - write_seqlock(&jiffies_lock);
  20047. + raw_spin_lock(&jiffies_lock);
  20048. + write_seqcount_begin(&jiffies_seq);
  20049. /* Did we start the jiffies update yet ? */
  20050. if (last_jiffies_update.tv64 == 0)
  20051. last_jiffies_update = tick_next_period;
  20052. period = last_jiffies_update;
  20053. - write_sequnlock(&jiffies_lock);
  20054. + write_seqcount_end(&jiffies_seq);
  20055. + raw_spin_unlock(&jiffies_lock);
  20056. return period;
  20057. }
  20058. @@ -215,6 +220,7 @@ static void nohz_full_kick_func(struct irq_work *work)
  20059. static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
  20060. .func = nohz_full_kick_func,
  20061. + .flags = IRQ_WORK_HARD_IRQ,
  20062. };
  20063. /*
  20064. @@ -678,10 +684,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
  20065. /* Read jiffies and the time when jiffies were updated last */
  20066. do {
  20067. - seq = read_seqbegin(&jiffies_lock);
  20068. + seq = read_seqcount_begin(&jiffies_seq);
  20069. basemono = last_jiffies_update.tv64;
  20070. basejiff = jiffies;
  20071. - } while (read_seqretry(&jiffies_lock, seq));
  20072. + } while (read_seqcount_retry(&jiffies_seq, seq));
  20073. ts->last_jiffies = basejiff;
  20074. /*
  20075. @@ -892,14 +898,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
  20076. return false;
  20077. if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
  20078. - static int ratelimit;
  20079. -
  20080. - if (ratelimit < 10 &&
  20081. - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  20082. - pr_warn("NOHZ: local_softirq_pending %02x\n",
  20083. - (unsigned int) local_softirq_pending());
  20084. - ratelimit++;
  20085. - }
  20086. + softirq_check_pending_idle();
  20087. return false;
  20088. }
  20089. @@ -1208,6 +1207,7 @@ void tick_setup_sched_timer(void)
  20090. * Emulate tick processing via per-CPU hrtimers:
  20091. */
  20092. hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  20093. + ts->sched_timer.irqsafe = 1;
  20094. ts->sched_timer.function = tick_sched_timer;
  20095. /* Get the next period (per-CPU) */
  20096. diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
  20097. index d831827d7ab0..76d982c11ac3 100644
  20098. --- a/kernel/time/timekeeping.c
  20099. +++ b/kernel/time/timekeeping.c
  20100. @@ -2348,8 +2348,10 @@ EXPORT_SYMBOL(hardpps);
  20101. */
  20102. void xtime_update(unsigned long ticks)
  20103. {
  20104. - write_seqlock(&jiffies_lock);
  20105. + raw_spin_lock(&jiffies_lock);
  20106. + write_seqcount_begin(&jiffies_seq);
  20107. do_timer(ticks);
  20108. - write_sequnlock(&jiffies_lock);
  20109. + write_seqcount_end(&jiffies_seq);
  20110. + raw_spin_unlock(&jiffies_lock);
  20111. update_wall_time();
  20112. }
  20113. diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
  20114. index 704f595ce83f..763a3e5121ff 100644
  20115. --- a/kernel/time/timekeeping.h
  20116. +++ b/kernel/time/timekeeping.h
  20117. @@ -19,7 +19,8 @@ extern void timekeeping_resume(void);
  20118. extern void do_timer(unsigned long ticks);
  20119. extern void update_wall_time(void);
  20120. -extern seqlock_t jiffies_lock;
  20121. +extern raw_spinlock_t jiffies_lock;
  20122. +extern seqcount_t jiffies_seq;
  20123. #define CS_NAME_LEN 32
  20124. diff --git a/kernel/time/timer.c b/kernel/time/timer.c
  20125. index e872f7f05e8a..8e75e7442aaa 100644
  20126. --- a/kernel/time/timer.c
  20127. +++ b/kernel/time/timer.c
  20128. @@ -193,8 +193,11 @@ EXPORT_SYMBOL(jiffies_64);
  20129. #endif
  20130. struct timer_base {
  20131. - spinlock_t lock;
  20132. + raw_spinlock_t lock;
  20133. struct timer_list *running_timer;
  20134. +#ifdef CONFIG_PREEMPT_RT_FULL
  20135. + struct swait_queue_head wait_for_running_timer;
  20136. +#endif
  20137. unsigned long clk;
  20138. unsigned long next_expiry;
  20139. unsigned int cpu;
  20140. @@ -953,10 +956,10 @@ static struct timer_base *lock_timer_base(struct timer_list *timer,
  20141. if (!(tf & TIMER_MIGRATING)) {
  20142. base = get_timer_base(tf);
  20143. - spin_lock_irqsave(&base->lock, *flags);
  20144. + raw_spin_lock_irqsave(&base->lock, *flags);
  20145. if (timer->flags == tf)
  20146. return base;
  20147. - spin_unlock_irqrestore(&base->lock, *flags);
  20148. + raw_spin_unlock_irqrestore(&base->lock, *flags);
  20149. }
  20150. cpu_relax();
  20151. }
  20152. @@ -1033,9 +1036,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
  20153. /* See the comment in lock_timer_base() */
  20154. timer->flags |= TIMER_MIGRATING;
  20155. - spin_unlock(&base->lock);
  20156. + raw_spin_unlock(&base->lock);
  20157. base = new_base;
  20158. - spin_lock(&base->lock);
  20159. + raw_spin_lock(&base->lock);
  20160. WRITE_ONCE(timer->flags,
  20161. (timer->flags & ~TIMER_BASEMASK) | base->cpu);
  20162. forward_timer_base(base);
  20163. @@ -1060,7 +1063,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
  20164. }
  20165. out_unlock:
  20166. - spin_unlock_irqrestore(&base->lock, flags);
  20167. + raw_spin_unlock_irqrestore(&base->lock, flags);
  20168. return ret;
  20169. }
  20170. @@ -1154,9 +1157,9 @@ void add_timer_on(struct timer_list *timer, int cpu)
  20171. if (base != new_base) {
  20172. timer->flags |= TIMER_MIGRATING;
  20173. - spin_unlock(&base->lock);
  20174. + raw_spin_unlock(&base->lock);
  20175. base = new_base;
  20176. - spin_lock(&base->lock);
  20177. + raw_spin_lock(&base->lock);
  20178. WRITE_ONCE(timer->flags,
  20179. (timer->flags & ~TIMER_BASEMASK) | cpu);
  20180. }
  20181. @@ -1164,10 +1167,37 @@ void add_timer_on(struct timer_list *timer, int cpu)
  20182. debug_activate(timer, timer->expires);
  20183. internal_add_timer(base, timer);
  20184. - spin_unlock_irqrestore(&base->lock, flags);
  20185. + raw_spin_unlock_irqrestore(&base->lock, flags);
  20186. }
  20187. EXPORT_SYMBOL_GPL(add_timer_on);
  20188. +#ifdef CONFIG_PREEMPT_RT_FULL
  20189. +/*
  20190. + * Wait for a running timer
  20191. + */
  20192. +static void wait_for_running_timer(struct timer_list *timer)
  20193. +{
  20194. + struct timer_base *base;
  20195. + u32 tf = timer->flags;
  20196. +
  20197. + if (tf & TIMER_MIGRATING)
  20198. + return;
  20199. +
  20200. + base = get_timer_base(tf);
  20201. + swait_event(base->wait_for_running_timer,
  20202. + base->running_timer != timer);
  20203. +}
  20204. +
  20205. +# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer)
  20206. +#else
  20207. +static inline void wait_for_running_timer(struct timer_list *timer)
  20208. +{
  20209. + cpu_relax();
  20210. +}
  20211. +
  20212. +# define wakeup_timer_waiters(b) do { } while (0)
  20213. +#endif
  20214. +
  20215. /**
  20216. * del_timer - deactive a timer.
  20217. * @timer: the timer to be deactivated
  20218. @@ -1191,7 +1221,7 @@ int del_timer(struct timer_list *timer)
  20219. if (timer_pending(timer)) {
  20220. base = lock_timer_base(timer, &flags);
  20221. ret = detach_if_pending(timer, base, true);
  20222. - spin_unlock_irqrestore(&base->lock, flags);
  20223. + raw_spin_unlock_irqrestore(&base->lock, flags);
  20224. }
  20225. return ret;
  20226. @@ -1219,13 +1249,13 @@ int try_to_del_timer_sync(struct timer_list *timer)
  20227. timer_stats_timer_clear_start_info(timer);
  20228. ret = detach_if_pending(timer, base, true);
  20229. }
  20230. - spin_unlock_irqrestore(&base->lock, flags);
  20231. + raw_spin_unlock_irqrestore(&base->lock, flags);
  20232. return ret;
  20233. }
  20234. EXPORT_SYMBOL(try_to_del_timer_sync);
  20235. -#ifdef CONFIG_SMP
  20236. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  20237. /**
  20238. * del_timer_sync - deactivate a timer and wait for the handler to finish.
  20239. * @timer: the timer to be deactivated
  20240. @@ -1285,7 +1315,7 @@ int del_timer_sync(struct timer_list *timer)
  20241. int ret = try_to_del_timer_sync(timer);
  20242. if (ret >= 0)
  20243. return ret;
  20244. - cpu_relax();
  20245. + wait_for_running_timer(timer);
  20246. }
  20247. }
  20248. EXPORT_SYMBOL(del_timer_sync);
  20249. @@ -1350,14 +1380,17 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
  20250. fn = timer->function;
  20251. data = timer->data;
  20252. - if (timer->flags & TIMER_IRQSAFE) {
  20253. - spin_unlock(&base->lock);
  20254. + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL) &&
  20255. + timer->flags & TIMER_IRQSAFE) {
  20256. + raw_spin_unlock(&base->lock);
  20257. call_timer_fn(timer, fn, data);
  20258. - spin_lock(&base->lock);
  20259. + base->running_timer = NULL;
  20260. + raw_spin_lock(&base->lock);
  20261. } else {
  20262. - spin_unlock_irq(&base->lock);
  20263. + raw_spin_unlock_irq(&base->lock);
  20264. call_timer_fn(timer, fn, data);
  20265. - spin_lock_irq(&base->lock);
  20266. + base->running_timer = NULL;
  20267. + raw_spin_lock_irq(&base->lock);
  20268. }
  20269. }
  20270. }
  20271. @@ -1526,7 +1559,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
  20272. if (cpu_is_offline(smp_processor_id()))
  20273. return expires;
  20274. - spin_lock(&base->lock);
  20275. + raw_spin_lock(&base->lock);
  20276. nextevt = __next_timer_interrupt(base);
  20277. is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
  20278. base->next_expiry = nextevt;
  20279. @@ -1560,7 +1593,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
  20280. base->is_idle = true;
  20281. }
  20282. }
  20283. - spin_unlock(&base->lock);
  20284. + raw_spin_unlock(&base->lock);
  20285. return cmp_next_hrtimer_event(basem, expires);
  20286. }
  20287. @@ -1625,13 +1658,13 @@ void update_process_times(int user_tick)
  20288. /* Note: this timer irq context must be accounted for as well. */
  20289. account_process_tick(p, user_tick);
  20290. + scheduler_tick();
  20291. run_local_timers();
  20292. rcu_check_callbacks(user_tick);
  20293. -#ifdef CONFIG_IRQ_WORK
  20294. +#if defined(CONFIG_IRQ_WORK)
  20295. if (in_irq())
  20296. irq_work_tick();
  20297. #endif
  20298. - scheduler_tick();
  20299. run_posix_cpu_timers(p);
  20300. }
  20301. @@ -1647,7 +1680,7 @@ static inline void __run_timers(struct timer_base *base)
  20302. if (!time_after_eq(jiffies, base->clk))
  20303. return;
  20304. - spin_lock_irq(&base->lock);
  20305. + raw_spin_lock_irq(&base->lock);
  20306. while (time_after_eq(jiffies, base->clk)) {
  20307. @@ -1657,8 +1690,8 @@ static inline void __run_timers(struct timer_base *base)
  20308. while (levels--)
  20309. expire_timers(base, heads + levels);
  20310. }
  20311. - base->running_timer = NULL;
  20312. - spin_unlock_irq(&base->lock);
  20313. + raw_spin_unlock_irq(&base->lock);
  20314. + wakeup_timer_waiters(base);
  20315. }
  20316. /*
  20317. @@ -1681,6 +1714,8 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
  20318. */
  20319. base->must_forward_clk = false;
  20320. + irq_work_tick_soft();
  20321. +
  20322. __run_timers(base);
  20323. if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
  20324. __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
  20325. @@ -1881,16 +1916,16 @@ int timers_dead_cpu(unsigned int cpu)
  20326. * The caller is globally serialized and nobody else
  20327. * takes two locks at once, deadlock is not possible.
  20328. */
  20329. - spin_lock_irq(&new_base->lock);
  20330. - spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
  20331. + raw_spin_lock_irq(&new_base->lock);
  20332. + raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
  20333. BUG_ON(old_base->running_timer);
  20334. for (i = 0; i < WHEEL_SIZE; i++)
  20335. migrate_timer_list(new_base, old_base->vectors + i);
  20336. - spin_unlock(&old_base->lock);
  20337. - spin_unlock_irq(&new_base->lock);
  20338. + raw_spin_unlock(&old_base->lock);
  20339. + raw_spin_unlock_irq(&new_base->lock);
  20340. put_cpu_ptr(&timer_bases);
  20341. }
  20342. return 0;
  20343. @@ -1906,8 +1941,11 @@ static void __init init_timer_cpu(int cpu)
  20344. for (i = 0; i < NR_BASES; i++) {
  20345. base = per_cpu_ptr(&timer_bases[i], cpu);
  20346. base->cpu = cpu;
  20347. - spin_lock_init(&base->lock);
  20348. + raw_spin_lock_init(&base->lock);
  20349. base->clk = jiffies;
  20350. +#ifdef CONFIG_PREEMPT_RT_FULL
  20351. + init_swait_queue_head(&base->wait_for_running_timer);
  20352. +#endif
  20353. }
  20354. }
  20355. diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
  20356. index 2a96b063d659..812e37237eb8 100644
  20357. --- a/kernel/trace/Kconfig
  20358. +++ b/kernel/trace/Kconfig
  20359. @@ -182,6 +182,24 @@ config IRQSOFF_TRACER
  20360. enabled. This option and the preempt-off timing option can be
  20361. used together or separately.)
  20362. +config INTERRUPT_OFF_HIST
  20363. + bool "Interrupts-off Latency Histogram"
  20364. + depends on IRQSOFF_TRACER
  20365. + help
  20366. + This option generates continuously updated histograms (one per cpu)
  20367. + of the duration of time periods with interrupts disabled. The
  20368. + histograms are disabled by default. To enable them, write a non-zero
  20369. + number to
  20370. +
  20371. + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
  20372. +
  20373. + If PREEMPT_OFF_HIST is also selected, additional histograms (one
  20374. + per cpu) are generated that accumulate the duration of time periods
  20375. + when both interrupts and preemption are disabled. The histogram data
  20376. + will be located in the debug file system at
  20377. +
  20378. + /sys/kernel/debug/tracing/latency_hist/irqsoff
  20379. +
  20380. config PREEMPT_TRACER
  20381. bool "Preemption-off Latency Tracer"
  20382. default n
  20383. @@ -206,6 +224,24 @@ config PREEMPT_TRACER
  20384. enabled. This option and the irqs-off timing option can be
  20385. used together or separately.)
  20386. +config PREEMPT_OFF_HIST
  20387. + bool "Preemption-off Latency Histogram"
  20388. + depends on PREEMPT_TRACER
  20389. + help
  20390. + This option generates continuously updated histograms (one per cpu)
  20391. + of the duration of time periods with preemption disabled. The
  20392. + histograms are disabled by default. To enable them, write a non-zero
  20393. + number to
  20394. +
  20395. + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
  20396. +
  20397. + If INTERRUPT_OFF_HIST is also selected, additional histograms (one
  20398. + per cpu) are generated that accumulate the duration of time periods
  20399. + when both interrupts and preemption are disabled. The histogram data
  20400. + will be located in the debug file system at
  20401. +
  20402. + /sys/kernel/debug/tracing/latency_hist/preemptoff
  20403. +
  20404. config SCHED_TRACER
  20405. bool "Scheduling Latency Tracer"
  20406. select GENERIC_TRACER
  20407. @@ -251,6 +287,74 @@ config HWLAT_TRACER
  20408. file. Every time a latency is greater than tracing_thresh, it will
  20409. be recorded into the ring buffer.
  20410. +config WAKEUP_LATENCY_HIST
  20411. + bool "Scheduling Latency Histogram"
  20412. + depends on SCHED_TRACER
  20413. + help
  20414. + This option generates continuously updated histograms (one per cpu)
  20415. + of the scheduling latency of the highest priority task.
  20416. + The histograms are disabled by default. To enable them, write a
  20417. + non-zero number to
  20418. +
  20419. + /sys/kernel/debug/tracing/latency_hist/enable/wakeup
  20420. +
  20421. + Two different algorithms are used, one to determine the latency of
  20422. + processes that exclusively use the highest priority of the system and
  20423. + another one to determine the latency of processes that share the
  20424. + highest system priority with other processes. The former is used to
  20425. + improve hardware and system software, the latter to optimize the
  20426. + priority design of a given system. The histogram data will be
  20427. + located in the debug file system at
  20428. +
  20429. + /sys/kernel/debug/tracing/latency_hist/wakeup
  20430. +
  20431. + and
  20432. +
  20433. + /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio
  20434. +
  20435. + If both Scheduling Latency Histogram and Missed Timer Offsets
  20436. + Histogram are selected, additional histogram data will be collected
  20437. + that contain, in addition to the wakeup latency, the timer latency, in
  20438. + case the wakeup was triggered by an expired timer. These histograms
  20439. + are available in the
  20440. +
  20441. + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
  20442. +
  20443. + directory. They reflect the apparent interrupt and scheduling latency
  20444. + and are best suitable to determine the worst-case latency of a given
  20445. + system. To enable these histograms, write a non-zero number to
  20446. +
  20447. + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
  20448. +
  20449. +config MISSED_TIMER_OFFSETS_HIST
  20450. + depends on HIGH_RES_TIMERS
  20451. + select GENERIC_TRACER
  20452. + bool "Missed Timer Offsets Histogram"
  20453. + help
  20454. + Generate a histogram of missed timer offsets in microseconds. The
  20455. + histograms are disabled by default. To enable them, write a non-zero
  20456. + number to
  20457. +
  20458. + /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets
  20459. +
  20460. + The histogram data will be located in the debug file system at
  20461. +
  20462. + /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets
  20463. +
  20464. + If both Scheduling Latency Histogram and Missed Timer Offsets
  20465. + Histogram are selected, additional histogram data will be collected
  20466. + that contain, in addition to the wakeup latency, the timer latency, in
  20467. + case the wakeup was triggered by an expired timer. These histograms
  20468. + are available in the
  20469. +
  20470. + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
  20471. +
  20472. + directory. They reflect the apparent interrupt and scheduling latency
  20473. + and are best suitable to determine the worst-case latency of a given
  20474. + system. To enable these histograms, write a non-zero number to
  20475. +
  20476. + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
  20477. +
  20478. config ENABLE_DEFAULT_TRACERS
  20479. bool "Trace process context switches and events"
  20480. depends on !GENERIC_TRACER
  20481. diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
  20482. index e57980845549..83af000b783c 100644
  20483. --- a/kernel/trace/Makefile
  20484. +++ b/kernel/trace/Makefile
  20485. @@ -38,6 +38,10 @@ obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
  20486. obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
  20487. obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
  20488. obj-$(CONFIG_HWLAT_TRACER) += trace_hwlat.o
  20489. +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o
  20490. +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o
  20491. +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o
  20492. +obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o
  20493. obj-$(CONFIG_NOP_TRACER) += trace_nop.o
  20494. obj-$(CONFIG_STACK_TRACER) += trace_stack.o
  20495. obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
  20496. diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c
  20497. new file mode 100644
  20498. index 000000000000..7f6ee70dea41
  20499. --- /dev/null
  20500. +++ b/kernel/trace/latency_hist.c
  20501. @@ -0,0 +1,1178 @@
  20502. +/*
  20503. + * kernel/trace/latency_hist.c
  20504. + *
  20505. + * Add support for histograms of preemption-off latency and
  20506. + * interrupt-off latency and wakeup latency, it depends on
  20507. + * Real-Time Preemption Support.
  20508. + *
  20509. + * Copyright (C) 2005 MontaVista Software, Inc.
  20510. + * Yi Yang <yyang@ch.mvista.com>
  20511. + *
  20512. + * Converted to work with the new latency tracer.
  20513. + * Copyright (C) 2008 Red Hat, Inc.
  20514. + * Steven Rostedt <srostedt@redhat.com>
  20515. + *
  20516. + */
  20517. +#include <linux/module.h>
  20518. +#include <linux/debugfs.h>
  20519. +#include <linux/seq_file.h>
  20520. +#include <linux/percpu.h>
  20521. +#include <linux/kallsyms.h>
  20522. +#include <linux/uaccess.h>
  20523. +#include <linux/sched.h>
  20524. +#include <linux/sched/rt.h>
  20525. +#include <linux/slab.h>
  20526. +#include <linux/atomic.h>
  20527. +#include <asm/div64.h>
  20528. +
  20529. +#include "trace.h"
  20530. +#include <trace/events/sched.h>
  20531. +
  20532. +#define NSECS_PER_USECS 1000L
  20533. +
  20534. +#define CREATE_TRACE_POINTS
  20535. +#include <trace/events/hist.h>
  20536. +
  20537. +enum {
  20538. + IRQSOFF_LATENCY = 0,
  20539. + PREEMPTOFF_LATENCY,
  20540. + PREEMPTIRQSOFF_LATENCY,
  20541. + WAKEUP_LATENCY,
  20542. + WAKEUP_LATENCY_SHAREDPRIO,
  20543. + MISSED_TIMER_OFFSETS,
  20544. + TIMERANDWAKEUP_LATENCY,
  20545. + MAX_LATENCY_TYPE,
  20546. +};
  20547. +
  20548. +#define MAX_ENTRY_NUM 10240
  20549. +
  20550. +struct hist_data {
  20551. + atomic_t hist_mode; /* 0 log, 1 don't log */
  20552. + long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */
  20553. + long min_lat;
  20554. + long max_lat;
  20555. + unsigned long long below_hist_bound_samples;
  20556. + unsigned long long above_hist_bound_samples;
  20557. + long long accumulate_lat;
  20558. + unsigned long long total_samples;
  20559. + unsigned long long hist_array[MAX_ENTRY_NUM];
  20560. +};
  20561. +
  20562. +struct enable_data {
  20563. + int latency_type;
  20564. + int enabled;
  20565. +};
  20566. +
  20567. +static char *latency_hist_dir_root = "latency_hist";
  20568. +
  20569. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  20570. +static DEFINE_PER_CPU(struct hist_data, irqsoff_hist);
  20571. +static char *irqsoff_hist_dir = "irqsoff";
  20572. +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start);
  20573. +static DEFINE_PER_CPU(int, hist_irqsoff_counting);
  20574. +#endif
  20575. +
  20576. +#ifdef CONFIG_PREEMPT_OFF_HIST
  20577. +static DEFINE_PER_CPU(struct hist_data, preemptoff_hist);
  20578. +static char *preemptoff_hist_dir = "preemptoff";
  20579. +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start);
  20580. +static DEFINE_PER_CPU(int, hist_preemptoff_counting);
  20581. +#endif
  20582. +
  20583. +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
  20584. +static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist);
  20585. +static char *preemptirqsoff_hist_dir = "preemptirqsoff";
  20586. +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start);
  20587. +static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting);
  20588. +#endif
  20589. +
  20590. +#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST)
  20591. +static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start);
  20592. +static struct enable_data preemptirqsoff_enabled_data = {
  20593. + .latency_type = PREEMPTIRQSOFF_LATENCY,
  20594. + .enabled = 0,
  20595. +};
  20596. +#endif
  20597. +
  20598. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20599. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20600. +struct maxlatproc_data {
  20601. + char comm[FIELD_SIZEOF(struct task_struct, comm)];
  20602. + char current_comm[FIELD_SIZEOF(struct task_struct, comm)];
  20603. + int pid;
  20604. + int current_pid;
  20605. + int prio;
  20606. + int current_prio;
  20607. + long latency;
  20608. + long timeroffset;
  20609. + cycle_t timestamp;
  20610. +};
  20611. +#endif
  20612. +
  20613. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  20614. +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist);
  20615. +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio);
  20616. +static char *wakeup_latency_hist_dir = "wakeup";
  20617. +static char *wakeup_latency_hist_dir_sharedprio = "sharedprio";
  20618. +static notrace void probe_wakeup_latency_hist_start(void *v,
  20619. + struct task_struct *p);
  20620. +static notrace void probe_wakeup_latency_hist_stop(void *v,
  20621. + bool preempt, struct task_struct *prev, struct task_struct *next);
  20622. +static notrace void probe_sched_migrate_task(void *,
  20623. + struct task_struct *task, int cpu);
  20624. +static struct enable_data wakeup_latency_enabled_data = {
  20625. + .latency_type = WAKEUP_LATENCY,
  20626. + .enabled = 0,
  20627. +};
  20628. +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc);
  20629. +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio);
  20630. +static DEFINE_PER_CPU(struct task_struct *, wakeup_task);
  20631. +static DEFINE_PER_CPU(int, wakeup_sharedprio);
  20632. +static unsigned long wakeup_pid;
  20633. +#endif
  20634. +
  20635. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  20636. +static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets);
  20637. +static char *missed_timer_offsets_dir = "missed_timer_offsets";
  20638. +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
  20639. + long long offset, struct task_struct *curr, struct task_struct *task);
  20640. +static struct enable_data missed_timer_offsets_enabled_data = {
  20641. + .latency_type = MISSED_TIMER_OFFSETS,
  20642. + .enabled = 0,
  20643. +};
  20644. +static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc);
  20645. +static unsigned long missed_timer_offsets_pid;
  20646. +#endif
  20647. +
  20648. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  20649. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20650. +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
  20651. +static char *timerandwakeup_latency_hist_dir = "timerandwakeup";
  20652. +static struct enable_data timerandwakeup_enabled_data = {
  20653. + .latency_type = TIMERANDWAKEUP_LATENCY,
  20654. + .enabled = 0,
  20655. +};
  20656. +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
  20657. +#endif
  20658. +
  20659. +void notrace latency_hist(int latency_type, int cpu, long latency,
  20660. + long timeroffset, cycle_t stop,
  20661. + struct task_struct *p)
  20662. +{
  20663. + struct hist_data *my_hist;
  20664. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20665. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20666. + struct maxlatproc_data *mp = NULL;
  20667. +#endif
  20668. +
  20669. + if (!cpu_possible(cpu) || latency_type < 0 ||
  20670. + latency_type >= MAX_LATENCY_TYPE)
  20671. + return;
  20672. +
  20673. + switch (latency_type) {
  20674. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  20675. + case IRQSOFF_LATENCY:
  20676. + my_hist = &per_cpu(irqsoff_hist, cpu);
  20677. + break;
  20678. +#endif
  20679. +#ifdef CONFIG_PREEMPT_OFF_HIST
  20680. + case PREEMPTOFF_LATENCY:
  20681. + my_hist = &per_cpu(preemptoff_hist, cpu);
  20682. + break;
  20683. +#endif
  20684. +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
  20685. + case PREEMPTIRQSOFF_LATENCY:
  20686. + my_hist = &per_cpu(preemptirqsoff_hist, cpu);
  20687. + break;
  20688. +#endif
  20689. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  20690. + case WAKEUP_LATENCY:
  20691. + my_hist = &per_cpu(wakeup_latency_hist, cpu);
  20692. + mp = &per_cpu(wakeup_maxlatproc, cpu);
  20693. + break;
  20694. + case WAKEUP_LATENCY_SHAREDPRIO:
  20695. + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
  20696. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
  20697. + break;
  20698. +#endif
  20699. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  20700. + case MISSED_TIMER_OFFSETS:
  20701. + my_hist = &per_cpu(missed_timer_offsets, cpu);
  20702. + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
  20703. + break;
  20704. +#endif
  20705. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  20706. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20707. + case TIMERANDWAKEUP_LATENCY:
  20708. + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu);
  20709. + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
  20710. + break;
  20711. +#endif
  20712. +
  20713. + default:
  20714. + return;
  20715. + }
  20716. +
  20717. + latency += my_hist->offset;
  20718. +
  20719. + if (atomic_read(&my_hist->hist_mode) == 0)
  20720. + return;
  20721. +
  20722. + if (latency < 0 || latency >= MAX_ENTRY_NUM) {
  20723. + if (latency < 0)
  20724. + my_hist->below_hist_bound_samples++;
  20725. + else
  20726. + my_hist->above_hist_bound_samples++;
  20727. + } else
  20728. + my_hist->hist_array[latency]++;
  20729. +
  20730. + if (unlikely(latency > my_hist->max_lat ||
  20731. + my_hist->min_lat == LONG_MAX)) {
  20732. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20733. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20734. + if (latency_type == WAKEUP_LATENCY ||
  20735. + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
  20736. + latency_type == MISSED_TIMER_OFFSETS ||
  20737. + latency_type == TIMERANDWAKEUP_LATENCY) {
  20738. + strncpy(mp->comm, p->comm, sizeof(mp->comm));
  20739. + strncpy(mp->current_comm, current->comm,
  20740. + sizeof(mp->current_comm));
  20741. + mp->pid = task_pid_nr(p);
  20742. + mp->current_pid = task_pid_nr(current);
  20743. + mp->prio = p->prio;
  20744. + mp->current_prio = current->prio;
  20745. + mp->latency = latency;
  20746. + mp->timeroffset = timeroffset;
  20747. + mp->timestamp = stop;
  20748. + }
  20749. +#endif
  20750. + my_hist->max_lat = latency;
  20751. + }
  20752. + if (unlikely(latency < my_hist->min_lat))
  20753. + my_hist->min_lat = latency;
  20754. + my_hist->total_samples++;
  20755. + my_hist->accumulate_lat += latency;
  20756. +}
  20757. +
  20758. +static void *l_start(struct seq_file *m, loff_t *pos)
  20759. +{
  20760. + loff_t *index_ptr = NULL;
  20761. + loff_t index = *pos;
  20762. + struct hist_data *my_hist = m->private;
  20763. +
  20764. + if (index == 0) {
  20765. + char minstr[32], avgstr[32], maxstr[32];
  20766. +
  20767. + atomic_dec(&my_hist->hist_mode);
  20768. +
  20769. + if (likely(my_hist->total_samples)) {
  20770. + long avg = (long) div64_s64(my_hist->accumulate_lat,
  20771. + my_hist->total_samples);
  20772. + snprintf(minstr, sizeof(minstr), "%ld",
  20773. + my_hist->min_lat - my_hist->offset);
  20774. + snprintf(avgstr, sizeof(avgstr), "%ld",
  20775. + avg - my_hist->offset);
  20776. + snprintf(maxstr, sizeof(maxstr), "%ld",
  20777. + my_hist->max_lat - my_hist->offset);
  20778. + } else {
  20779. + strcpy(minstr, "<undef>");
  20780. + strcpy(avgstr, minstr);
  20781. + strcpy(maxstr, minstr);
  20782. + }
  20783. +
  20784. + seq_printf(m, "#Minimum latency: %s microseconds\n"
  20785. + "#Average latency: %s microseconds\n"
  20786. + "#Maximum latency: %s microseconds\n"
  20787. + "#Total samples: %llu\n"
  20788. + "#There are %llu samples lower than %ld"
  20789. + " microseconds.\n"
  20790. + "#There are %llu samples greater or equal"
  20791. + " than %ld microseconds.\n"
  20792. + "#usecs\t%16s\n",
  20793. + minstr, avgstr, maxstr,
  20794. + my_hist->total_samples,
  20795. + my_hist->below_hist_bound_samples,
  20796. + -my_hist->offset,
  20797. + my_hist->above_hist_bound_samples,
  20798. + MAX_ENTRY_NUM - my_hist->offset,
  20799. + "samples");
  20800. + }
  20801. + if (index < MAX_ENTRY_NUM) {
  20802. + index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL);
  20803. + if (index_ptr)
  20804. + *index_ptr = index;
  20805. + }
  20806. +
  20807. + return index_ptr;
  20808. +}
  20809. +
  20810. +static void *l_next(struct seq_file *m, void *p, loff_t *pos)
  20811. +{
  20812. + loff_t *index_ptr = p;
  20813. + struct hist_data *my_hist = m->private;
  20814. +
  20815. + if (++*pos >= MAX_ENTRY_NUM) {
  20816. + atomic_inc(&my_hist->hist_mode);
  20817. + return NULL;
  20818. + }
  20819. + *index_ptr = *pos;
  20820. + return index_ptr;
  20821. +}
  20822. +
  20823. +static void l_stop(struct seq_file *m, void *p)
  20824. +{
  20825. + kfree(p);
  20826. +}
  20827. +
  20828. +static int l_show(struct seq_file *m, void *p)
  20829. +{
  20830. + int index = *(loff_t *) p;
  20831. + struct hist_data *my_hist = m->private;
  20832. +
  20833. + seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset,
  20834. + my_hist->hist_array[index]);
  20835. + return 0;
  20836. +}
  20837. +
  20838. +static const struct seq_operations latency_hist_seq_op = {
  20839. + .start = l_start,
  20840. + .next = l_next,
  20841. + .stop = l_stop,
  20842. + .show = l_show
  20843. +};
  20844. +
  20845. +static int latency_hist_open(struct inode *inode, struct file *file)
  20846. +{
  20847. + int ret;
  20848. +
  20849. + ret = seq_open(file, &latency_hist_seq_op);
  20850. + if (!ret) {
  20851. + struct seq_file *seq = file->private_data;
  20852. + seq->private = inode->i_private;
  20853. + }
  20854. + return ret;
  20855. +}
  20856. +
  20857. +static const struct file_operations latency_hist_fops = {
  20858. + .open = latency_hist_open,
  20859. + .read = seq_read,
  20860. + .llseek = seq_lseek,
  20861. + .release = seq_release,
  20862. +};
  20863. +
  20864. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20865. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20866. +static void clear_maxlatprocdata(struct maxlatproc_data *mp)
  20867. +{
  20868. + mp->comm[0] = mp->current_comm[0] = '\0';
  20869. + mp->prio = mp->current_prio = mp->pid = mp->current_pid =
  20870. + mp->latency = mp->timeroffset = -1;
  20871. + mp->timestamp = 0;
  20872. +}
  20873. +#endif
  20874. +
  20875. +static void hist_reset(struct hist_data *hist)
  20876. +{
  20877. + atomic_dec(&hist->hist_mode);
  20878. +
  20879. + memset(hist->hist_array, 0, sizeof(hist->hist_array));
  20880. + hist->below_hist_bound_samples = 0ULL;
  20881. + hist->above_hist_bound_samples = 0ULL;
  20882. + hist->min_lat = LONG_MAX;
  20883. + hist->max_lat = LONG_MIN;
  20884. + hist->total_samples = 0ULL;
  20885. + hist->accumulate_lat = 0LL;
  20886. +
  20887. + atomic_inc(&hist->hist_mode);
  20888. +}
  20889. +
  20890. +static ssize_t
  20891. +latency_hist_reset(struct file *file, const char __user *a,
  20892. + size_t size, loff_t *off)
  20893. +{
  20894. + int cpu;
  20895. + struct hist_data *hist = NULL;
  20896. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20897. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20898. + struct maxlatproc_data *mp = NULL;
  20899. +#endif
  20900. + off_t latency_type = (off_t) file->private_data;
  20901. +
  20902. + for_each_online_cpu(cpu) {
  20903. +
  20904. + switch (latency_type) {
  20905. +#ifdef CONFIG_PREEMPT_OFF_HIST
  20906. + case PREEMPTOFF_LATENCY:
  20907. + hist = &per_cpu(preemptoff_hist, cpu);
  20908. + break;
  20909. +#endif
  20910. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  20911. + case IRQSOFF_LATENCY:
  20912. + hist = &per_cpu(irqsoff_hist, cpu);
  20913. + break;
  20914. +#endif
  20915. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  20916. + case PREEMPTIRQSOFF_LATENCY:
  20917. + hist = &per_cpu(preemptirqsoff_hist, cpu);
  20918. + break;
  20919. +#endif
  20920. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  20921. + case WAKEUP_LATENCY:
  20922. + hist = &per_cpu(wakeup_latency_hist, cpu);
  20923. + mp = &per_cpu(wakeup_maxlatproc, cpu);
  20924. + break;
  20925. + case WAKEUP_LATENCY_SHAREDPRIO:
  20926. + hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
  20927. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
  20928. + break;
  20929. +#endif
  20930. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  20931. + case MISSED_TIMER_OFFSETS:
  20932. + hist = &per_cpu(missed_timer_offsets, cpu);
  20933. + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
  20934. + break;
  20935. +#endif
  20936. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  20937. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20938. + case TIMERANDWAKEUP_LATENCY:
  20939. + hist = &per_cpu(timerandwakeup_latency_hist, cpu);
  20940. + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
  20941. + break;
  20942. +#endif
  20943. + }
  20944. +
  20945. + hist_reset(hist);
  20946. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20947. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20948. + if (latency_type == WAKEUP_LATENCY ||
  20949. + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
  20950. + latency_type == MISSED_TIMER_OFFSETS ||
  20951. + latency_type == TIMERANDWAKEUP_LATENCY)
  20952. + clear_maxlatprocdata(mp);
  20953. +#endif
  20954. + }
  20955. +
  20956. + return size;
  20957. +}
  20958. +
  20959. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20960. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20961. +static ssize_t
  20962. +show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  20963. +{
  20964. + char buf[64];
  20965. + int r;
  20966. + unsigned long *this_pid = file->private_data;
  20967. +
  20968. + r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid);
  20969. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  20970. +}
  20971. +
  20972. +static ssize_t do_pid(struct file *file, const char __user *ubuf,
  20973. + size_t cnt, loff_t *ppos)
  20974. +{
  20975. + char buf[64];
  20976. + unsigned long pid;
  20977. + unsigned long *this_pid = file->private_data;
  20978. +
  20979. + if (cnt >= sizeof(buf))
  20980. + return -EINVAL;
  20981. +
  20982. + if (copy_from_user(&buf, ubuf, cnt))
  20983. + return -EFAULT;
  20984. +
  20985. + buf[cnt] = '\0';
  20986. +
  20987. + if (kstrtoul(buf, 10, &pid))
  20988. + return -EINVAL;
  20989. +
  20990. + *this_pid = pid;
  20991. +
  20992. + return cnt;
  20993. +}
  20994. +#endif
  20995. +
  20996. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20997. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20998. +static ssize_t
  20999. +show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  21000. +{
  21001. + int r;
  21002. + struct maxlatproc_data *mp = file->private_data;
  21003. + int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
  21004. + unsigned long long t;
  21005. + unsigned long usecs, secs;
  21006. + char *buf;
  21007. +
  21008. + if (mp->pid == -1 || mp->current_pid == -1) {
  21009. + buf = "(none)\n";
  21010. + return simple_read_from_buffer(ubuf, cnt, ppos, buf,
  21011. + strlen(buf));
  21012. + }
  21013. +
  21014. + buf = kmalloc(strmaxlen, GFP_KERNEL);
  21015. + if (buf == NULL)
  21016. + return -ENOMEM;
  21017. +
  21018. + t = ns2usecs(mp->timestamp);
  21019. + usecs = do_div(t, USEC_PER_SEC);
  21020. + secs = (unsigned long) t;
  21021. + r = snprintf(buf, strmaxlen,
  21022. + "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
  21023. + MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
  21024. + mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
  21025. + secs, usecs);
  21026. + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  21027. + kfree(buf);
  21028. + return r;
  21029. +}
  21030. +#endif
  21031. +
  21032. +static ssize_t
  21033. +show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  21034. +{
  21035. + char buf[64];
  21036. + struct enable_data *ed = file->private_data;
  21037. + int r;
  21038. +
  21039. + r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled);
  21040. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  21041. +}
  21042. +
  21043. +static ssize_t
  21044. +do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
  21045. +{
  21046. + char buf[64];
  21047. + long enable;
  21048. + struct enable_data *ed = file->private_data;
  21049. +
  21050. + if (cnt >= sizeof(buf))
  21051. + return -EINVAL;
  21052. +
  21053. + if (copy_from_user(&buf, ubuf, cnt))
  21054. + return -EFAULT;
  21055. +
  21056. + buf[cnt] = 0;
  21057. +
  21058. + if (kstrtoul(buf, 10, &enable))
  21059. + return -EINVAL;
  21060. +
  21061. + if ((enable && ed->enabled) || (!enable && !ed->enabled))
  21062. + return cnt;
  21063. +
  21064. + if (enable) {
  21065. + int ret;
  21066. +
  21067. + switch (ed->latency_type) {
  21068. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  21069. + case PREEMPTIRQSOFF_LATENCY:
  21070. + ret = register_trace_preemptirqsoff_hist(
  21071. + probe_preemptirqsoff_hist, NULL);
  21072. + if (ret) {
  21073. + pr_info("wakeup trace: Couldn't assign "
  21074. + "probe_preemptirqsoff_hist "
  21075. + "to trace_preemptirqsoff_hist\n");
  21076. + return ret;
  21077. + }
  21078. + break;
  21079. +#endif
  21080. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  21081. + case WAKEUP_LATENCY:
  21082. + ret = register_trace_sched_wakeup(
  21083. + probe_wakeup_latency_hist_start, NULL);
  21084. + if (ret) {
  21085. + pr_info("wakeup trace: Couldn't assign "
  21086. + "probe_wakeup_latency_hist_start "
  21087. + "to trace_sched_wakeup\n");
  21088. + return ret;
  21089. + }
  21090. + ret = register_trace_sched_wakeup_new(
  21091. + probe_wakeup_latency_hist_start, NULL);
  21092. + if (ret) {
  21093. + pr_info("wakeup trace: Couldn't assign "
  21094. + "probe_wakeup_latency_hist_start "
  21095. + "to trace_sched_wakeup_new\n");
  21096. + unregister_trace_sched_wakeup(
  21097. + probe_wakeup_latency_hist_start, NULL);
  21098. + return ret;
  21099. + }
  21100. + ret = register_trace_sched_switch(
  21101. + probe_wakeup_latency_hist_stop, NULL);
  21102. + if (ret) {
  21103. + pr_info("wakeup trace: Couldn't assign "
  21104. + "probe_wakeup_latency_hist_stop "
  21105. + "to trace_sched_switch\n");
  21106. + unregister_trace_sched_wakeup(
  21107. + probe_wakeup_latency_hist_start, NULL);
  21108. + unregister_trace_sched_wakeup_new(
  21109. + probe_wakeup_latency_hist_start, NULL);
  21110. + return ret;
  21111. + }
  21112. + ret = register_trace_sched_migrate_task(
  21113. + probe_sched_migrate_task, NULL);
  21114. + if (ret) {
  21115. + pr_info("wakeup trace: Couldn't assign "
  21116. + "probe_sched_migrate_task "
  21117. + "to trace_sched_migrate_task\n");
  21118. + unregister_trace_sched_wakeup(
  21119. + probe_wakeup_latency_hist_start, NULL);
  21120. + unregister_trace_sched_wakeup_new(
  21121. + probe_wakeup_latency_hist_start, NULL);
  21122. + unregister_trace_sched_switch(
  21123. + probe_wakeup_latency_hist_stop, NULL);
  21124. + return ret;
  21125. + }
  21126. + break;
  21127. +#endif
  21128. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  21129. + case MISSED_TIMER_OFFSETS:
  21130. + ret = register_trace_hrtimer_interrupt(
  21131. + probe_hrtimer_interrupt, NULL);
  21132. + if (ret) {
  21133. + pr_info("wakeup trace: Couldn't assign "
  21134. + "probe_hrtimer_interrupt "
  21135. + "to trace_hrtimer_interrupt\n");
  21136. + return ret;
  21137. + }
  21138. + break;
  21139. +#endif
  21140. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  21141. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  21142. + case TIMERANDWAKEUP_LATENCY:
  21143. + if (!wakeup_latency_enabled_data.enabled ||
  21144. + !missed_timer_offsets_enabled_data.enabled)
  21145. + return -EINVAL;
  21146. + break;
  21147. +#endif
  21148. + default:
  21149. + break;
  21150. + }
  21151. + } else {
  21152. + switch (ed->latency_type) {
  21153. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  21154. + case PREEMPTIRQSOFF_LATENCY:
  21155. + {
  21156. + int cpu;
  21157. +
  21158. + unregister_trace_preemptirqsoff_hist(
  21159. + probe_preemptirqsoff_hist, NULL);
  21160. + for_each_online_cpu(cpu) {
  21161. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  21162. + per_cpu(hist_irqsoff_counting,
  21163. + cpu) = 0;
  21164. +#endif
  21165. +#ifdef CONFIG_PREEMPT_OFF_HIST
  21166. + per_cpu(hist_preemptoff_counting,
  21167. + cpu) = 0;
  21168. +#endif
  21169. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  21170. + per_cpu(hist_preemptirqsoff_counting,
  21171. + cpu) = 0;
  21172. +#endif
  21173. + }
  21174. + }
  21175. + break;
  21176. +#endif
  21177. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  21178. + case WAKEUP_LATENCY:
  21179. + {
  21180. + int cpu;
  21181. +
  21182. + unregister_trace_sched_wakeup(
  21183. + probe_wakeup_latency_hist_start, NULL);
  21184. + unregister_trace_sched_wakeup_new(
  21185. + probe_wakeup_latency_hist_start, NULL);
  21186. + unregister_trace_sched_switch(
  21187. + probe_wakeup_latency_hist_stop, NULL);
  21188. + unregister_trace_sched_migrate_task(
  21189. + probe_sched_migrate_task, NULL);
  21190. +
  21191. + for_each_online_cpu(cpu) {
  21192. + per_cpu(wakeup_task, cpu) = NULL;
  21193. + per_cpu(wakeup_sharedprio, cpu) = 0;
  21194. + }
  21195. + }
  21196. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  21197. + timerandwakeup_enabled_data.enabled = 0;
  21198. +#endif
  21199. + break;
  21200. +#endif
  21201. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  21202. + case MISSED_TIMER_OFFSETS:
  21203. + unregister_trace_hrtimer_interrupt(
  21204. + probe_hrtimer_interrupt, NULL);
  21205. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  21206. + timerandwakeup_enabled_data.enabled = 0;
  21207. +#endif
  21208. + break;
  21209. +#endif
  21210. + default:
  21211. + break;
  21212. + }
  21213. + }
  21214. + ed->enabled = enable;
  21215. + return cnt;
  21216. +}
  21217. +
  21218. +static const struct file_operations latency_hist_reset_fops = {
  21219. + .open = tracing_open_generic,
  21220. + .write = latency_hist_reset,
  21221. +};
  21222. +
  21223. +static const struct file_operations enable_fops = {
  21224. + .open = tracing_open_generic,
  21225. + .read = show_enable,
  21226. + .write = do_enable,
  21227. +};
  21228. +
  21229. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  21230. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  21231. +static const struct file_operations pid_fops = {
  21232. + .open = tracing_open_generic,
  21233. + .read = show_pid,
  21234. + .write = do_pid,
  21235. +};
  21236. +
  21237. +static const struct file_operations maxlatproc_fops = {
  21238. + .open = tracing_open_generic,
  21239. + .read = show_maxlatproc,
  21240. +};
  21241. +#endif
  21242. +
  21243. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  21244. +static notrace void probe_preemptirqsoff_hist(void *v, int reason,
  21245. + int starthist)
  21246. +{
  21247. + int cpu = raw_smp_processor_id();
  21248. + int time_set = 0;
  21249. +
  21250. + if (starthist) {
  21251. + cycle_t uninitialized_var(start);
  21252. +
  21253. + if (!preempt_count() && !irqs_disabled())
  21254. + return;
  21255. +
  21256. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  21257. + if ((reason == IRQS_OFF || reason == TRACE_START) &&
  21258. + !per_cpu(hist_irqsoff_counting, cpu)) {
  21259. + per_cpu(hist_irqsoff_counting, cpu) = 1;
  21260. + start = ftrace_now(cpu);
  21261. + time_set++;
  21262. + per_cpu(hist_irqsoff_start, cpu) = start;
  21263. + }
  21264. +#endif
  21265. +
  21266. +#ifdef CONFIG_PREEMPT_OFF_HIST
  21267. + if ((reason == PREEMPT_OFF || reason == TRACE_START) &&
  21268. + !per_cpu(hist_preemptoff_counting, cpu)) {
  21269. + per_cpu(hist_preemptoff_counting, cpu) = 1;
  21270. + if (!(time_set++))
  21271. + start = ftrace_now(cpu);
  21272. + per_cpu(hist_preemptoff_start, cpu) = start;
  21273. + }
  21274. +#endif
  21275. +
  21276. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  21277. + if (per_cpu(hist_irqsoff_counting, cpu) &&
  21278. + per_cpu(hist_preemptoff_counting, cpu) &&
  21279. + !per_cpu(hist_preemptirqsoff_counting, cpu)) {
  21280. + per_cpu(hist_preemptirqsoff_counting, cpu) = 1;
  21281. + if (!time_set)
  21282. + start = ftrace_now(cpu);
  21283. + per_cpu(hist_preemptirqsoff_start, cpu) = start;
  21284. + }
  21285. +#endif
  21286. + } else {
  21287. + cycle_t uninitialized_var(stop);
  21288. +
  21289. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  21290. + if ((reason == IRQS_ON || reason == TRACE_STOP) &&
  21291. + per_cpu(hist_irqsoff_counting, cpu)) {
  21292. + cycle_t start = per_cpu(hist_irqsoff_start, cpu);
  21293. +
  21294. + stop = ftrace_now(cpu);
  21295. + time_set++;
  21296. + if (start) {
  21297. + long latency = ((long) (stop - start)) /
  21298. + NSECS_PER_USECS;
  21299. +
  21300. + latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
  21301. + stop, NULL);
  21302. + }
  21303. + per_cpu(hist_irqsoff_counting, cpu) = 0;
  21304. + }
  21305. +#endif
  21306. +
  21307. +#ifdef CONFIG_PREEMPT_OFF_HIST
  21308. + if ((reason == PREEMPT_ON || reason == TRACE_STOP) &&
  21309. + per_cpu(hist_preemptoff_counting, cpu)) {
  21310. + cycle_t start = per_cpu(hist_preemptoff_start, cpu);
  21311. +
  21312. + if (!(time_set++))
  21313. + stop = ftrace_now(cpu);
  21314. + if (start) {
  21315. + long latency = ((long) (stop - start)) /
  21316. + NSECS_PER_USECS;
  21317. +
  21318. + latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
  21319. + 0, stop, NULL);
  21320. + }
  21321. + per_cpu(hist_preemptoff_counting, cpu) = 0;
  21322. + }
  21323. +#endif
  21324. +
  21325. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  21326. + if ((!per_cpu(hist_irqsoff_counting, cpu) ||
  21327. + !per_cpu(hist_preemptoff_counting, cpu)) &&
  21328. + per_cpu(hist_preemptirqsoff_counting, cpu)) {
  21329. + cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu);
  21330. +
  21331. + if (!time_set)
  21332. + stop = ftrace_now(cpu);
  21333. + if (start) {
  21334. + long latency = ((long) (stop - start)) /
  21335. + NSECS_PER_USECS;
  21336. +
  21337. + latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
  21338. + latency, 0, stop, NULL);
  21339. + }
  21340. + per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
  21341. + }
  21342. +#endif
  21343. + }
  21344. +}
  21345. +#endif
  21346. +
  21347. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  21348. +static DEFINE_RAW_SPINLOCK(wakeup_lock);
  21349. +static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
  21350. + int cpu)
  21351. +{
  21352. + int old_cpu = task_cpu(task);
  21353. +
  21354. + if (cpu != old_cpu) {
  21355. + unsigned long flags;
  21356. + struct task_struct *cpu_wakeup_task;
  21357. +
  21358. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  21359. +
  21360. + cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
  21361. + if (task == cpu_wakeup_task) {
  21362. + put_task_struct(cpu_wakeup_task);
  21363. + per_cpu(wakeup_task, old_cpu) = NULL;
  21364. + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
  21365. + get_task_struct(cpu_wakeup_task);
  21366. + }
  21367. +
  21368. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  21369. + }
  21370. +}
  21371. +
  21372. +static notrace void probe_wakeup_latency_hist_start(void *v,
  21373. + struct task_struct *p)
  21374. +{
  21375. + unsigned long flags;
  21376. + struct task_struct *curr = current;
  21377. + int cpu = task_cpu(p);
  21378. + struct task_struct *cpu_wakeup_task;
  21379. +
  21380. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  21381. +
  21382. + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
  21383. +
  21384. + if (wakeup_pid) {
  21385. + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
  21386. + p->prio == curr->prio)
  21387. + per_cpu(wakeup_sharedprio, cpu) = 1;
  21388. + if (likely(wakeup_pid != task_pid_nr(p)))
  21389. + goto out;
  21390. + } else {
  21391. + if (likely(!rt_task(p)) ||
  21392. + (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) ||
  21393. + p->prio > curr->prio)
  21394. + goto out;
  21395. + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
  21396. + p->prio == curr->prio)
  21397. + per_cpu(wakeup_sharedprio, cpu) = 1;
  21398. + }
  21399. +
  21400. + if (cpu_wakeup_task)
  21401. + put_task_struct(cpu_wakeup_task);
  21402. + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p;
  21403. + get_task_struct(cpu_wakeup_task);
  21404. + cpu_wakeup_task->preempt_timestamp_hist =
  21405. + ftrace_now(raw_smp_processor_id());
  21406. +out:
  21407. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  21408. +}
  21409. +
  21410. +static notrace void probe_wakeup_latency_hist_stop(void *v,
  21411. + bool preempt, struct task_struct *prev, struct task_struct *next)
  21412. +{
  21413. + unsigned long flags;
  21414. + int cpu = task_cpu(next);
  21415. + long latency;
  21416. + cycle_t stop;
  21417. + struct task_struct *cpu_wakeup_task;
  21418. +
  21419. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  21420. +
  21421. + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
  21422. +
  21423. + if (cpu_wakeup_task == NULL)
  21424. + goto out;
  21425. +
  21426. + /* Already running? */
  21427. + if (unlikely(current == cpu_wakeup_task))
  21428. + goto out_reset;
  21429. +
  21430. + if (next != cpu_wakeup_task) {
  21431. + if (next->prio < cpu_wakeup_task->prio)
  21432. + goto out_reset;
  21433. +
  21434. + if (next->prio == cpu_wakeup_task->prio)
  21435. + per_cpu(wakeup_sharedprio, cpu) = 1;
  21436. +
  21437. + goto out;
  21438. + }
  21439. +
  21440. + if (current->prio == cpu_wakeup_task->prio)
  21441. + per_cpu(wakeup_sharedprio, cpu) = 1;
  21442. +
  21443. + /*
  21444. + * The task we are waiting for is about to be switched to.
  21445. + * Calculate latency and store it in histogram.
  21446. + */
  21447. + stop = ftrace_now(raw_smp_processor_id());
  21448. +
  21449. + latency = ((long) (stop - next->preempt_timestamp_hist)) /
  21450. + NSECS_PER_USECS;
  21451. +
  21452. + if (per_cpu(wakeup_sharedprio, cpu)) {
  21453. + latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
  21454. + next);
  21455. + per_cpu(wakeup_sharedprio, cpu) = 0;
  21456. + } else {
  21457. + latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
  21458. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  21459. + if (timerandwakeup_enabled_data.enabled) {
  21460. + latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
  21461. + next->timer_offset + latency, next->timer_offset,
  21462. + stop, next);
  21463. + }
  21464. +#endif
  21465. + }
  21466. +
  21467. +out_reset:
  21468. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  21469. + next->timer_offset = 0;
  21470. +#endif
  21471. + put_task_struct(cpu_wakeup_task);
  21472. + per_cpu(wakeup_task, cpu) = NULL;
  21473. +out:
  21474. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  21475. +}
  21476. +#endif
  21477. +
  21478. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  21479. +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
  21480. + long long latency_ns, struct task_struct *curr,
  21481. + struct task_struct *task)
  21482. +{
  21483. + if (latency_ns <= 0 && task != NULL && rt_task(task) &&
  21484. + (task->prio < curr->prio ||
  21485. + (task->prio == curr->prio &&
  21486. + !cpumask_test_cpu(cpu, &task->cpus_allowed)))) {
  21487. + long latency;
  21488. + cycle_t now;
  21489. +
  21490. + if (missed_timer_offsets_pid) {
  21491. + if (likely(missed_timer_offsets_pid !=
  21492. + task_pid_nr(task)))
  21493. + return;
  21494. + }
  21495. +
  21496. + now = ftrace_now(cpu);
  21497. + latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
  21498. + latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
  21499. + task);
  21500. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  21501. + task->timer_offset = latency;
  21502. +#endif
  21503. + }
  21504. +}
  21505. +#endif
  21506. +
  21507. +static __init int latency_hist_init(void)
  21508. +{
  21509. + struct dentry *latency_hist_root = NULL;
  21510. + struct dentry *dentry;
  21511. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  21512. + struct dentry *dentry_sharedprio;
  21513. +#endif
  21514. + struct dentry *entry;
  21515. + struct dentry *enable_root;
  21516. + int i = 0;
  21517. + struct hist_data *my_hist;
  21518. + char name[64];
  21519. + char *cpufmt = "CPU%d";
  21520. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  21521. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  21522. + char *cpufmt_maxlatproc = "max_latency-CPU%d";
  21523. + struct maxlatproc_data *mp = NULL;
  21524. +#endif
  21525. +
  21526. + dentry = tracing_init_dentry();
  21527. + latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry);
  21528. + enable_root = debugfs_create_dir("enable", latency_hist_root);
  21529. +
  21530. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  21531. + dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root);
  21532. + for_each_possible_cpu(i) {
  21533. + sprintf(name, cpufmt, i);
  21534. + entry = debugfs_create_file(name, 0444, dentry,
  21535. + &per_cpu(irqsoff_hist, i), &latency_hist_fops);
  21536. + my_hist = &per_cpu(irqsoff_hist, i);
  21537. + atomic_set(&my_hist->hist_mode, 1);
  21538. + my_hist->min_lat = LONG_MAX;
  21539. + }
  21540. + entry = debugfs_create_file("reset", 0644, dentry,
  21541. + (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops);
  21542. +#endif
  21543. +
  21544. +#ifdef CONFIG_PREEMPT_OFF_HIST
  21545. + dentry = debugfs_create_dir(preemptoff_hist_dir,
  21546. + latency_hist_root);
  21547. + for_each_possible_cpu(i) {
  21548. + sprintf(name, cpufmt, i);
  21549. + entry = debugfs_create_file(name, 0444, dentry,
  21550. + &per_cpu(preemptoff_hist, i), &latency_hist_fops);
  21551. + my_hist = &per_cpu(preemptoff_hist, i);
  21552. + atomic_set(&my_hist->hist_mode, 1);
  21553. + my_hist->min_lat = LONG_MAX;
  21554. + }
  21555. + entry = debugfs_create_file("reset", 0644, dentry,
  21556. + (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops);
  21557. +#endif
  21558. +
  21559. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  21560. + dentry = debugfs_create_dir(preemptirqsoff_hist_dir,
  21561. + latency_hist_root);
  21562. + for_each_possible_cpu(i) {
  21563. + sprintf(name, cpufmt, i);
  21564. + entry = debugfs_create_file(name, 0444, dentry,
  21565. + &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops);
  21566. + my_hist = &per_cpu(preemptirqsoff_hist, i);
  21567. + atomic_set(&my_hist->hist_mode, 1);
  21568. + my_hist->min_lat = LONG_MAX;
  21569. + }
  21570. + entry = debugfs_create_file("reset", 0644, dentry,
  21571. + (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops);
  21572. +#endif
  21573. +
  21574. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  21575. + entry = debugfs_create_file("preemptirqsoff", 0644,
  21576. + enable_root, (void *)&preemptirqsoff_enabled_data,
  21577. + &enable_fops);
  21578. +#endif
  21579. +
  21580. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  21581. + dentry = debugfs_create_dir(wakeup_latency_hist_dir,
  21582. + latency_hist_root);
  21583. + dentry_sharedprio = debugfs_create_dir(
  21584. + wakeup_latency_hist_dir_sharedprio, dentry);
  21585. + for_each_possible_cpu(i) {
  21586. + sprintf(name, cpufmt, i);
  21587. +
  21588. + entry = debugfs_create_file(name, 0444, dentry,
  21589. + &per_cpu(wakeup_latency_hist, i),
  21590. + &latency_hist_fops);
  21591. + my_hist = &per_cpu(wakeup_latency_hist, i);
  21592. + atomic_set(&my_hist->hist_mode, 1);
  21593. + my_hist->min_lat = LONG_MAX;
  21594. +
  21595. + entry = debugfs_create_file(name, 0444, dentry_sharedprio,
  21596. + &per_cpu(wakeup_latency_hist_sharedprio, i),
  21597. + &latency_hist_fops);
  21598. + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i);
  21599. + atomic_set(&my_hist->hist_mode, 1);
  21600. + my_hist->min_lat = LONG_MAX;
  21601. +
  21602. + sprintf(name, cpufmt_maxlatproc, i);
  21603. +
  21604. + mp = &per_cpu(wakeup_maxlatproc, i);
  21605. + entry = debugfs_create_file(name, 0444, dentry, mp,
  21606. + &maxlatproc_fops);
  21607. + clear_maxlatprocdata(mp);
  21608. +
  21609. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, i);
  21610. + entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp,
  21611. + &maxlatproc_fops);
  21612. + clear_maxlatprocdata(mp);
  21613. + }
  21614. + entry = debugfs_create_file("pid", 0644, dentry,
  21615. + (void *)&wakeup_pid, &pid_fops);
  21616. + entry = debugfs_create_file("reset", 0644, dentry,
  21617. + (void *)WAKEUP_LATENCY, &latency_hist_reset_fops);
  21618. + entry = debugfs_create_file("reset", 0644, dentry_sharedprio,
  21619. + (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops);
  21620. + entry = debugfs_create_file("wakeup", 0644,
  21621. + enable_root, (void *)&wakeup_latency_enabled_data,
  21622. + &enable_fops);
  21623. +#endif
  21624. +
  21625. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  21626. + dentry = debugfs_create_dir(missed_timer_offsets_dir,
  21627. + latency_hist_root);
  21628. + for_each_possible_cpu(i) {
  21629. + sprintf(name, cpufmt, i);
  21630. + entry = debugfs_create_file(name, 0444, dentry,
  21631. + &per_cpu(missed_timer_offsets, i), &latency_hist_fops);
  21632. + my_hist = &per_cpu(missed_timer_offsets, i);
  21633. + atomic_set(&my_hist->hist_mode, 1);
  21634. + my_hist->min_lat = LONG_MAX;
  21635. +
  21636. + sprintf(name, cpufmt_maxlatproc, i);
  21637. + mp = &per_cpu(missed_timer_offsets_maxlatproc, i);
  21638. + entry = debugfs_create_file(name, 0444, dentry, mp,
  21639. + &maxlatproc_fops);
  21640. + clear_maxlatprocdata(mp);
  21641. + }
  21642. + entry = debugfs_create_file("pid", 0644, dentry,
  21643. + (void *)&missed_timer_offsets_pid, &pid_fops);
  21644. + entry = debugfs_create_file("reset", 0644, dentry,
  21645. + (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops);
  21646. + entry = debugfs_create_file("missed_timer_offsets", 0644,
  21647. + enable_root, (void *)&missed_timer_offsets_enabled_data,
  21648. + &enable_fops);
  21649. +#endif
  21650. +
  21651. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  21652. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  21653. + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
  21654. + latency_hist_root);
  21655. + for_each_possible_cpu(i) {
  21656. + sprintf(name, cpufmt, i);
  21657. + entry = debugfs_create_file(name, 0444, dentry,
  21658. + &per_cpu(timerandwakeup_latency_hist, i),
  21659. + &latency_hist_fops);
  21660. + my_hist = &per_cpu(timerandwakeup_latency_hist, i);
  21661. + atomic_set(&my_hist->hist_mode, 1);
  21662. + my_hist->min_lat = LONG_MAX;
  21663. +
  21664. + sprintf(name, cpufmt_maxlatproc, i);
  21665. + mp = &per_cpu(timerandwakeup_maxlatproc, i);
  21666. + entry = debugfs_create_file(name, 0444, dentry, mp,
  21667. + &maxlatproc_fops);
  21668. + clear_maxlatprocdata(mp);
  21669. + }
  21670. + entry = debugfs_create_file("reset", 0644, dentry,
  21671. + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops);
  21672. + entry = debugfs_create_file("timerandwakeup", 0644,
  21673. + enable_root, (void *)&timerandwakeup_enabled_data,
  21674. + &enable_fops);
  21675. +#endif
  21676. + return 0;
  21677. +}
  21678. +
  21679. +device_initcall(latency_hist_init);
  21680. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
  21681. index 15b02645ce8b..00d9ebcf42e2 100644
  21682. --- a/kernel/trace/trace.c
  21683. +++ b/kernel/trace/trace.c
  21684. @@ -1897,6 +1897,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
  21685. struct task_struct *tsk = current;
  21686. entry->preempt_count = pc & 0xff;
  21687. + entry->preempt_lazy_count = preempt_lazy_count();
  21688. entry->pid = (tsk) ? tsk->pid : 0;
  21689. entry->flags =
  21690. #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
  21691. @@ -1907,8 +1908,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
  21692. ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
  21693. ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
  21694. ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
  21695. - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
  21696. + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) |
  21697. + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) |
  21698. (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
  21699. +
  21700. + entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
  21701. }
  21702. EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
  21703. @@ -2898,14 +2902,17 @@ get_total_entries(struct trace_buffer *buf,
  21704. static void print_lat_help_header(struct seq_file *m)
  21705. {
  21706. - seq_puts(m, "# _------=> CPU# \n"
  21707. - "# / _-----=> irqs-off \n"
  21708. - "# | / _----=> need-resched \n"
  21709. - "# || / _---=> hardirq/softirq \n"
  21710. - "# ||| / _--=> preempt-depth \n"
  21711. - "# |||| / delay \n"
  21712. - "# cmd pid ||||| time | caller \n"
  21713. - "# \\ / ||||| \\ | / \n");
  21714. + seq_puts(m, "# _--------=> CPU# \n"
  21715. + "# / _-------=> irqs-off \n"
  21716. + "# | / _------=> need-resched \n"
  21717. + "# || / _-----=> need-resched_lazy \n"
  21718. + "# ||| / _----=> hardirq/softirq \n"
  21719. + "# |||| / _---=> preempt-depth \n"
  21720. + "# ||||| / _--=> preempt-lazy-depth\n"
  21721. + "# |||||| / _-=> migrate-disable \n"
  21722. + "# ||||||| / delay \n"
  21723. + "# cmd pid |||||||| time | caller \n"
  21724. + "# \\ / |||||||| \\ | / \n");
  21725. }
  21726. static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
  21727. @@ -2931,11 +2938,14 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file
  21728. print_event_info(buf, m);
  21729. seq_puts(m, "# _-----=> irqs-off\n"
  21730. "# / _----=> need-resched\n"
  21731. - "# | / _---=> hardirq/softirq\n"
  21732. - "# || / _--=> preempt-depth\n"
  21733. - "# ||| / delay\n"
  21734. - "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
  21735. - "# | | | |||| | |\n");
  21736. + "# |/ _-----=> need-resched_lazy\n"
  21737. + "# || / _---=> hardirq/softirq\n"
  21738. + "# ||| / _--=> preempt-depth\n"
  21739. + "# |||| / _-=> preempt-lazy-depth\n"
  21740. + "# ||||| / _-=> migrate-disable \n"
  21741. + "# |||||| / delay\n"
  21742. + "# TASK-PID CPU# ||||||| TIMESTAMP FUNCTION\n"
  21743. + "# | | | ||||||| | |\n");
  21744. }
  21745. void
  21746. diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
  21747. index b0d8576c27ae..702b9376b278 100644
  21748. --- a/kernel/trace/trace.h
  21749. +++ b/kernel/trace/trace.h
  21750. @@ -124,6 +124,7 @@ struct kretprobe_trace_entry_head {
  21751. * NEED_RESCHED - reschedule is requested
  21752. * HARDIRQ - inside an interrupt handler
  21753. * SOFTIRQ - inside a softirq handler
  21754. + * NEED_RESCHED_LAZY - lazy reschedule is requested
  21755. */
  21756. enum trace_flag_type {
  21757. TRACE_FLAG_IRQS_OFF = 0x01,
  21758. @@ -133,6 +134,7 @@ enum trace_flag_type {
  21759. TRACE_FLAG_SOFTIRQ = 0x10,
  21760. TRACE_FLAG_PREEMPT_RESCHED = 0x20,
  21761. TRACE_FLAG_NMI = 0x40,
  21762. + TRACE_FLAG_NEED_RESCHED_LAZY = 0x80,
  21763. };
  21764. #define TRACE_BUF_SIZE 1024
  21765. diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
  21766. index 03c0a48c3ac4..0b85d516b491 100644
  21767. --- a/kernel/trace/trace_events.c
  21768. +++ b/kernel/trace/trace_events.c
  21769. @@ -187,6 +187,8 @@ static int trace_define_common_fields(void)
  21770. __common_field(unsigned char, flags);
  21771. __common_field(unsigned char, preempt_count);
  21772. __common_field(int, pid);
  21773. + __common_field(unsigned short, migrate_disable);
  21774. + __common_field(unsigned short, padding);
  21775. return ret;
  21776. }
  21777. diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
  21778. index 03cdff84d026..940bd10b4406 100644
  21779. --- a/kernel/trace/trace_irqsoff.c
  21780. +++ b/kernel/trace/trace_irqsoff.c
  21781. @@ -13,6 +13,7 @@
  21782. #include <linux/uaccess.h>
  21783. #include <linux/module.h>
  21784. #include <linux/ftrace.h>
  21785. +#include <trace/events/hist.h>
  21786. #include "trace.h"
  21787. @@ -424,11 +425,13 @@ void start_critical_timings(void)
  21788. {
  21789. if (preempt_trace() || irq_trace())
  21790. start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  21791. + trace_preemptirqsoff_hist_rcuidle(TRACE_START, 1);
  21792. }
  21793. EXPORT_SYMBOL_GPL(start_critical_timings);
  21794. void stop_critical_timings(void)
  21795. {
  21796. + trace_preemptirqsoff_hist_rcuidle(TRACE_STOP, 0);
  21797. if (preempt_trace() || irq_trace())
  21798. stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  21799. }
  21800. @@ -438,6 +441,7 @@ EXPORT_SYMBOL_GPL(stop_critical_timings);
  21801. #ifdef CONFIG_PROVE_LOCKING
  21802. void time_hardirqs_on(unsigned long a0, unsigned long a1)
  21803. {
  21804. + trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0);
  21805. if (!preempt_trace() && irq_trace())
  21806. stop_critical_timing(a0, a1);
  21807. }
  21808. @@ -446,6 +450,7 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1)
  21809. {
  21810. if (!preempt_trace() && irq_trace())
  21811. start_critical_timing(a0, a1);
  21812. + trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1);
  21813. }
  21814. #else /* !CONFIG_PROVE_LOCKING */
  21815. @@ -471,6 +476,7 @@ inline void print_irqtrace_events(struct task_struct *curr)
  21816. */
  21817. void trace_hardirqs_on(void)
  21818. {
  21819. + trace_preemptirqsoff_hist(IRQS_ON, 0);
  21820. if (!preempt_trace() && irq_trace())
  21821. stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  21822. }
  21823. @@ -480,11 +486,13 @@ void trace_hardirqs_off(void)
  21824. {
  21825. if (!preempt_trace() && irq_trace())
  21826. start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  21827. + trace_preemptirqsoff_hist(IRQS_OFF, 1);
  21828. }
  21829. EXPORT_SYMBOL(trace_hardirqs_off);
  21830. __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
  21831. {
  21832. + trace_preemptirqsoff_hist(IRQS_ON, 0);
  21833. if (!preempt_trace() && irq_trace())
  21834. stop_critical_timing(CALLER_ADDR0, caller_addr);
  21835. }
  21836. @@ -494,6 +502,7 @@ __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
  21837. {
  21838. if (!preempt_trace() && irq_trace())
  21839. start_critical_timing(CALLER_ADDR0, caller_addr);
  21840. + trace_preemptirqsoff_hist(IRQS_OFF, 1);
  21841. }
  21842. EXPORT_SYMBOL(trace_hardirqs_off_caller);
  21843. @@ -503,12 +512,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
  21844. #ifdef CONFIG_PREEMPT_TRACER
  21845. void trace_preempt_on(unsigned long a0, unsigned long a1)
  21846. {
  21847. + trace_preemptirqsoff_hist(PREEMPT_ON, 0);
  21848. if (preempt_trace() && !irq_trace())
  21849. stop_critical_timing(a0, a1);
  21850. }
  21851. void trace_preempt_off(unsigned long a0, unsigned long a1)
  21852. {
  21853. + trace_preemptirqsoff_hist(PREEMPT_ON, 1);
  21854. if (preempt_trace() && !irq_trace())
  21855. start_critical_timing(a0, a1);
  21856. }
  21857. diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
  21858. index 3fc20422c166..65a6dde71a7d 100644
  21859. --- a/kernel/trace/trace_output.c
  21860. +++ b/kernel/trace/trace_output.c
  21861. @@ -386,6 +386,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
  21862. {
  21863. char hardsoft_irq;
  21864. char need_resched;
  21865. + char need_resched_lazy;
  21866. char irqs_off;
  21867. int hardirq;
  21868. int softirq;
  21869. @@ -416,6 +417,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
  21870. break;
  21871. }
  21872. + need_resched_lazy =
  21873. + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
  21874. +
  21875. hardsoft_irq =
  21876. (nmi && hardirq) ? 'Z' :
  21877. nmi ? 'z' :
  21878. @@ -424,14 +428,25 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
  21879. softirq ? 's' :
  21880. '.' ;
  21881. - trace_seq_printf(s, "%c%c%c",
  21882. - irqs_off, need_resched, hardsoft_irq);
  21883. + trace_seq_printf(s, "%c%c%c%c",
  21884. + irqs_off, need_resched, need_resched_lazy,
  21885. + hardsoft_irq);
  21886. if (entry->preempt_count)
  21887. trace_seq_printf(s, "%x", entry->preempt_count);
  21888. else
  21889. trace_seq_putc(s, '.');
  21890. + if (entry->preempt_lazy_count)
  21891. + trace_seq_printf(s, "%x", entry->preempt_lazy_count);
  21892. + else
  21893. + trace_seq_putc(s, '.');
  21894. +
  21895. + if (entry->migrate_disable)
  21896. + trace_seq_printf(s, "%x", entry->migrate_disable);
  21897. + else
  21898. + trace_seq_putc(s, '.');
  21899. +
  21900. return !trace_seq_has_overflowed(s);
  21901. }
  21902. diff --git a/kernel/user.c b/kernel/user.c
  21903. index b069ccbfb0b0..1a2e88e98b5e 100644
  21904. --- a/kernel/user.c
  21905. +++ b/kernel/user.c
  21906. @@ -161,11 +161,11 @@ void free_uid(struct user_struct *up)
  21907. if (!up)
  21908. return;
  21909. - local_irq_save(flags);
  21910. + local_irq_save_nort(flags);
  21911. if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
  21912. free_user(up, flags);
  21913. else
  21914. - local_irq_restore(flags);
  21915. + local_irq_restore_nort(flags);
  21916. }
  21917. struct user_struct *alloc_uid(kuid_t uid)
  21918. diff --git a/kernel/watchdog.c b/kernel/watchdog.c
  21919. index 63177be0159e..59fe007ad496 100644
  21920. --- a/kernel/watchdog.c
  21921. +++ b/kernel/watchdog.c
  21922. @@ -381,6 +381,7 @@ static void watchdog_enable(unsigned int cpu)
  21923. /* kick off the timer for the hardlockup detector */
  21924. hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  21925. hrtimer->function = watchdog_timer_fn;
  21926. + hrtimer->irqsafe = 1;
  21927. /* Enable the perf event */
  21928. watchdog_nmi_enable(cpu);
  21929. diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
  21930. index 12b8dd640786..4c90d2ee7433 100644
  21931. --- a/kernel/watchdog_hld.c
  21932. +++ b/kernel/watchdog_hld.c
  21933. @@ -19,6 +19,7 @@
  21934. static DEFINE_PER_CPU(bool, hard_watchdog_warn);
  21935. static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
  21936. static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
  21937. +static DEFINE_RAW_SPINLOCK(watchdog_output_lock);
  21938. /* boot commands */
  21939. /*
  21940. @@ -104,6 +105,13 @@ static void watchdog_overflow_callback(struct perf_event *event,
  21941. /* only print hardlockups once */
  21942. if (__this_cpu_read(hard_watchdog_warn) == true)
  21943. return;
  21944. + /*
  21945. + * If early-printk is enabled then make sure we do not
  21946. + * lock up in printk() and kill console logging:
  21947. + */
  21948. + printk_kill();
  21949. +
  21950. + raw_spin_lock(&watchdog_output_lock);
  21951. pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
  21952. print_modules();
  21953. @@ -121,6 +129,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
  21954. !test_and_set_bit(0, &hardlockup_allcpu_dumped))
  21955. trigger_allbutself_cpu_backtrace();
  21956. + raw_spin_unlock(&watchdog_output_lock);
  21957. if (hardlockup_panic)
  21958. nmi_panic(regs, "Hard LOCKUP");
  21959. diff --git a/kernel/workqueue.c b/kernel/workqueue.c
  21960. index 181c2ad0cb54..7eed129f114a 100644
  21961. --- a/kernel/workqueue.c
  21962. +++ b/kernel/workqueue.c
  21963. @@ -48,6 +48,8 @@
  21964. #include <linux/nodemask.h>
  21965. #include <linux/moduleparam.h>
  21966. #include <linux/uaccess.h>
  21967. +#include <linux/locallock.h>
  21968. +#include <linux/delay.h>
  21969. #include "workqueue_internal.h"
  21970. @@ -122,11 +124,16 @@ enum {
  21971. * cpu or grabbing pool->lock is enough for read access. If
  21972. * POOL_DISASSOCIATED is set, it's identical to L.
  21973. *
  21974. + * On RT we need the extra protection via rt_lock_idle_list() for
  21975. + * the list manipulations against read access from
  21976. + * wq_worker_sleeping(). All other places are nicely serialized via
  21977. + * pool->lock.
  21978. + *
  21979. * A: pool->attach_mutex protected.
  21980. *
  21981. * PL: wq_pool_mutex protected.
  21982. *
  21983. - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
  21984. + * PR: wq_pool_mutex protected for writes. RCU protected for reads.
  21985. *
  21986. * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
  21987. *
  21988. @@ -135,7 +142,7 @@ enum {
  21989. *
  21990. * WQ: wq->mutex protected.
  21991. *
  21992. - * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
  21993. + * WR: wq->mutex protected for writes. RCU protected for reads.
  21994. *
  21995. * MD: wq_mayday_lock protected.
  21996. */
  21997. @@ -185,7 +192,7 @@ struct worker_pool {
  21998. atomic_t nr_running ____cacheline_aligned_in_smp;
  21999. /*
  22000. - * Destruction of pool is sched-RCU protected to allow dereferences
  22001. + * Destruction of pool is RCU protected to allow dereferences
  22002. * from get_work_pool().
  22003. */
  22004. struct rcu_head rcu;
  22005. @@ -214,7 +221,7 @@ struct pool_workqueue {
  22006. /*
  22007. * Release of unbound pwq is punted to system_wq. See put_pwq()
  22008. * and pwq_unbound_release_workfn() for details. pool_workqueue
  22009. - * itself is also sched-RCU protected so that the first pwq can be
  22010. + * itself is also RCU protected so that the first pwq can be
  22011. * determined without grabbing wq->mutex.
  22012. */
  22013. struct work_struct unbound_release_work;
  22014. @@ -349,6 +356,8 @@ EXPORT_SYMBOL_GPL(system_power_efficient_wq);
  22015. struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
  22016. EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
  22017. +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock);
  22018. +
  22019. static int worker_thread(void *__worker);
  22020. static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  22021. @@ -356,20 +365,20 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  22022. #include <trace/events/workqueue.h>
  22023. #define assert_rcu_or_pool_mutex() \
  22024. - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
  22025. + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
  22026. !lockdep_is_held(&wq_pool_mutex), \
  22027. - "sched RCU or wq_pool_mutex should be held")
  22028. + "RCU or wq_pool_mutex should be held")
  22029. #define assert_rcu_or_wq_mutex(wq) \
  22030. - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
  22031. + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
  22032. !lockdep_is_held(&wq->mutex), \
  22033. - "sched RCU or wq->mutex should be held")
  22034. + "RCU or wq->mutex should be held")
  22035. #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
  22036. - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
  22037. + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
  22038. !lockdep_is_held(&wq->mutex) && \
  22039. !lockdep_is_held(&wq_pool_mutex), \
  22040. - "sched RCU, wq->mutex or wq_pool_mutex should be held")
  22041. + "RCU, wq->mutex or wq_pool_mutex should be held")
  22042. #define for_each_cpu_worker_pool(pool, cpu) \
  22043. for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
  22044. @@ -381,7 +390,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  22045. * @pool: iteration cursor
  22046. * @pi: integer used for iteration
  22047. *
  22048. - * This must be called either with wq_pool_mutex held or sched RCU read
  22049. + * This must be called either with wq_pool_mutex held or RCU read
  22050. * locked. If the pool needs to be used beyond the locking in effect, the
  22051. * caller is responsible for guaranteeing that the pool stays online.
  22052. *
  22053. @@ -413,7 +422,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  22054. * @pwq: iteration cursor
  22055. * @wq: the target workqueue
  22056. *
  22057. - * This must be called either with wq->mutex held or sched RCU read locked.
  22058. + * This must be called either with wq->mutex held or RCU read locked.
  22059. * If the pwq needs to be used beyond the locking in effect, the caller is
  22060. * responsible for guaranteeing that the pwq stays online.
  22061. *
  22062. @@ -425,6 +434,31 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  22063. if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
  22064. else
  22065. +#ifdef CONFIG_PREEMPT_RT_BASE
  22066. +static inline void rt_lock_idle_list(struct worker_pool *pool)
  22067. +{
  22068. + preempt_disable();
  22069. +}
  22070. +static inline void rt_unlock_idle_list(struct worker_pool *pool)
  22071. +{
  22072. + preempt_enable();
  22073. +}
  22074. +static inline void sched_lock_idle_list(struct worker_pool *pool) { }
  22075. +static inline void sched_unlock_idle_list(struct worker_pool *pool) { }
  22076. +#else
  22077. +static inline void rt_lock_idle_list(struct worker_pool *pool) { }
  22078. +static inline void rt_unlock_idle_list(struct worker_pool *pool) { }
  22079. +static inline void sched_lock_idle_list(struct worker_pool *pool)
  22080. +{
  22081. + spin_lock_irq(&pool->lock);
  22082. +}
  22083. +static inline void sched_unlock_idle_list(struct worker_pool *pool)
  22084. +{
  22085. + spin_unlock_irq(&pool->lock);
  22086. +}
  22087. +#endif
  22088. +
  22089. +
  22090. #ifdef CONFIG_DEBUG_OBJECTS_WORK
  22091. static struct debug_obj_descr work_debug_descr;
  22092. @@ -549,7 +583,7 @@ static int worker_pool_assign_id(struct worker_pool *pool)
  22093. * @wq: the target workqueue
  22094. * @node: the node ID
  22095. *
  22096. - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
  22097. + * This must be called with any of wq_pool_mutex, wq->mutex or RCU
  22098. * read locked.
  22099. * If the pwq needs to be used beyond the locking in effect, the caller is
  22100. * responsible for guaranteeing that the pwq stays online.
  22101. @@ -693,8 +727,8 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
  22102. * @work: the work item of interest
  22103. *
  22104. * Pools are created and destroyed under wq_pool_mutex, and allows read
  22105. - * access under sched-RCU read lock. As such, this function should be
  22106. - * called under wq_pool_mutex or with preemption disabled.
  22107. + * access under RCU read lock. As such, this function should be
  22108. + * called under wq_pool_mutex or inside of a rcu_read_lock() region.
  22109. *
  22110. * All fields of the returned pool are accessible as long as the above
  22111. * mentioned locking is in effect. If the returned pool needs to be used
  22112. @@ -831,50 +865,45 @@ static struct worker *first_idle_worker(struct worker_pool *pool)
  22113. */
  22114. static void wake_up_worker(struct worker_pool *pool)
  22115. {
  22116. - struct worker *worker = first_idle_worker(pool);
  22117. + struct worker *worker;
  22118. +
  22119. + rt_lock_idle_list(pool);
  22120. +
  22121. + worker = first_idle_worker(pool);
  22122. if (likely(worker))
  22123. wake_up_process(worker->task);
  22124. +
  22125. + rt_unlock_idle_list(pool);
  22126. }
  22127. /**
  22128. - * wq_worker_waking_up - a worker is waking up
  22129. + * wq_worker_running - a worker is running again
  22130. * @task: task waking up
  22131. - * @cpu: CPU @task is waking up to
  22132. *
  22133. - * This function is called during try_to_wake_up() when a worker is
  22134. - * being awoken.
  22135. - *
  22136. - * CONTEXT:
  22137. - * spin_lock_irq(rq->lock)
  22138. + * This function is called when a worker returns from schedule()
  22139. */
  22140. -void wq_worker_waking_up(struct task_struct *task, int cpu)
  22141. +void wq_worker_running(struct task_struct *task)
  22142. {
  22143. struct worker *worker = kthread_data(task);
  22144. - if (!(worker->flags & WORKER_NOT_RUNNING)) {
  22145. - WARN_ON_ONCE(worker->pool->cpu != cpu);
  22146. + if (!worker->sleeping)
  22147. + return;
  22148. + if (!(worker->flags & WORKER_NOT_RUNNING))
  22149. atomic_inc(&worker->pool->nr_running);
  22150. - }
  22151. + worker->sleeping = 0;
  22152. }
  22153. /**
  22154. * wq_worker_sleeping - a worker is going to sleep
  22155. * @task: task going to sleep
  22156. *
  22157. - * This function is called during schedule() when a busy worker is
  22158. - * going to sleep. Worker on the same cpu can be woken up by
  22159. - * returning pointer to its task.
  22160. - *
  22161. - * CONTEXT:
  22162. - * spin_lock_irq(rq->lock)
  22163. - *
  22164. - * Return:
  22165. - * Worker task on @cpu to wake up, %NULL if none.
  22166. + * This function is called from schedule() when a busy worker is
  22167. + * going to sleep.
  22168. */
  22169. -struct task_struct *wq_worker_sleeping(struct task_struct *task)
  22170. +void wq_worker_sleeping(struct task_struct *task)
  22171. {
  22172. - struct worker *worker = kthread_data(task), *to_wakeup = NULL;
  22173. + struct worker *worker = kthread_data(task);
  22174. struct worker_pool *pool;
  22175. /*
  22176. @@ -883,29 +912,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
  22177. * checking NOT_RUNNING.
  22178. */
  22179. if (worker->flags & WORKER_NOT_RUNNING)
  22180. - return NULL;
  22181. + return;
  22182. pool = worker->pool;
  22183. - /* this can only happen on the local cpu */
  22184. - if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
  22185. - return NULL;
  22186. + if (WARN_ON_ONCE(worker->sleeping))
  22187. + return;
  22188. +
  22189. + worker->sleeping = 1;
  22190. /*
  22191. * The counterpart of the following dec_and_test, implied mb,
  22192. * worklist not empty test sequence is in insert_work().
  22193. * Please read comment there.
  22194. - *
  22195. - * NOT_RUNNING is clear. This means that we're bound to and
  22196. - * running on the local cpu w/ rq lock held and preemption
  22197. - * disabled, which in turn means that none else could be
  22198. - * manipulating idle_list, so dereferencing idle_list without pool
  22199. - * lock is safe.
  22200. */
  22201. if (atomic_dec_and_test(&pool->nr_running) &&
  22202. - !list_empty(&pool->worklist))
  22203. - to_wakeup = first_idle_worker(pool);
  22204. - return to_wakeup ? to_wakeup->task : NULL;
  22205. + !list_empty(&pool->worklist)) {
  22206. + sched_lock_idle_list(pool);
  22207. + wake_up_worker(pool);
  22208. + sched_unlock_idle_list(pool);
  22209. + }
  22210. }
  22211. /**
  22212. @@ -1099,12 +1125,14 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)
  22213. {
  22214. if (pwq) {
  22215. /*
  22216. - * As both pwqs and pools are sched-RCU protected, the
  22217. + * As both pwqs and pools are RCU protected, the
  22218. * following lock operations are safe.
  22219. */
  22220. - spin_lock_irq(&pwq->pool->lock);
  22221. + rcu_read_lock();
  22222. + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock);
  22223. put_pwq(pwq);
  22224. - spin_unlock_irq(&pwq->pool->lock);
  22225. + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock);
  22226. + rcu_read_unlock();
  22227. }
  22228. }
  22229. @@ -1208,7 +1236,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
  22230. struct worker_pool *pool;
  22231. struct pool_workqueue *pwq;
  22232. - local_irq_save(*flags);
  22233. + local_lock_irqsave(pendingb_lock, *flags);
  22234. /* try to steal the timer if it exists */
  22235. if (is_dwork) {
  22236. @@ -1227,6 +1255,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
  22237. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
  22238. return 0;
  22239. + rcu_read_lock();
  22240. /*
  22241. * The queueing is in progress, or it is already queued. Try to
  22242. * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
  22243. @@ -1265,14 +1294,16 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
  22244. set_work_pool_and_keep_pending(work, pool->id);
  22245. spin_unlock(&pool->lock);
  22246. + rcu_read_unlock();
  22247. return 1;
  22248. }
  22249. spin_unlock(&pool->lock);
  22250. fail:
  22251. - local_irq_restore(*flags);
  22252. + rcu_read_unlock();
  22253. + local_unlock_irqrestore(pendingb_lock, *flags);
  22254. if (work_is_canceling(work))
  22255. return -ENOENT;
  22256. - cpu_relax();
  22257. + cpu_chill();
  22258. return -EAGAIN;
  22259. }
  22260. @@ -1374,7 +1405,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
  22261. * queued or lose PENDING. Grabbing PENDING and queueing should
  22262. * happen with IRQ disabled.
  22263. */
  22264. - WARN_ON_ONCE(!irqs_disabled());
  22265. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  22266. debug_work_activate(work);
  22267. @@ -1382,6 +1413,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
  22268. if (unlikely(wq->flags & __WQ_DRAINING) &&
  22269. WARN_ON_ONCE(!is_chained_work(wq)))
  22270. return;
  22271. + rcu_read_lock();
  22272. retry:
  22273. if (req_cpu == WORK_CPU_UNBOUND)
  22274. cpu = wq_select_unbound_cpu(raw_smp_processor_id());
  22275. @@ -1438,10 +1470,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
  22276. /* pwq determined, queue */
  22277. trace_workqueue_queue_work(req_cpu, pwq, work);
  22278. - if (WARN_ON(!list_empty(&work->entry))) {
  22279. - spin_unlock(&pwq->pool->lock);
  22280. - return;
  22281. - }
  22282. + if (WARN_ON(!list_empty(&work->entry)))
  22283. + goto out;
  22284. pwq->nr_in_flight[pwq->work_color]++;
  22285. work_flags = work_color_to_flags(pwq->work_color);
  22286. @@ -1459,7 +1489,9 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
  22287. insert_work(pwq, work, worklist, work_flags);
  22288. +out:
  22289. spin_unlock(&pwq->pool->lock);
  22290. + rcu_read_unlock();
  22291. }
  22292. /**
  22293. @@ -1479,14 +1511,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
  22294. bool ret = false;
  22295. unsigned long flags;
  22296. - local_irq_save(flags);
  22297. + local_lock_irqsave(pendingb_lock,flags);
  22298. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
  22299. __queue_work(cpu, wq, work);
  22300. ret = true;
  22301. }
  22302. - local_irq_restore(flags);
  22303. + local_unlock_irqrestore(pendingb_lock, flags);
  22304. return ret;
  22305. }
  22306. EXPORT_SYMBOL(queue_work_on);
  22307. @@ -1554,14 +1586,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
  22308. unsigned long flags;
  22309. /* read the comment in __queue_work() */
  22310. - local_irq_save(flags);
  22311. + local_lock_irqsave(pendingb_lock, flags);
  22312. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
  22313. __queue_delayed_work(cpu, wq, dwork, delay);
  22314. ret = true;
  22315. }
  22316. - local_irq_restore(flags);
  22317. + local_unlock_irqrestore(pendingb_lock, flags);
  22318. return ret;
  22319. }
  22320. EXPORT_SYMBOL(queue_delayed_work_on);
  22321. @@ -1596,7 +1628,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
  22322. if (likely(ret >= 0)) {
  22323. __queue_delayed_work(cpu, wq, dwork, delay);
  22324. - local_irq_restore(flags);
  22325. + local_unlock_irqrestore(pendingb_lock, flags);
  22326. }
  22327. /* -ENOENT from try_to_grab_pending() becomes %true */
  22328. @@ -1629,7 +1661,9 @@ static void worker_enter_idle(struct worker *worker)
  22329. worker->last_active = jiffies;
  22330. /* idle_list is LIFO */
  22331. + rt_lock_idle_list(pool);
  22332. list_add(&worker->entry, &pool->idle_list);
  22333. + rt_unlock_idle_list(pool);
  22334. if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
  22335. mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
  22336. @@ -1662,7 +1696,9 @@ static void worker_leave_idle(struct worker *worker)
  22337. return;
  22338. worker_clr_flags(worker, WORKER_IDLE);
  22339. pool->nr_idle--;
  22340. + rt_lock_idle_list(pool);
  22341. list_del_init(&worker->entry);
  22342. + rt_unlock_idle_list(pool);
  22343. }
  22344. static struct worker *alloc_worker(int node)
  22345. @@ -1828,7 +1864,9 @@ static void destroy_worker(struct worker *worker)
  22346. pool->nr_workers--;
  22347. pool->nr_idle--;
  22348. + rt_lock_idle_list(pool);
  22349. list_del_init(&worker->entry);
  22350. + rt_unlock_idle_list(pool);
  22351. worker->flags |= WORKER_DIE;
  22352. wake_up_process(worker->task);
  22353. }
  22354. @@ -2780,14 +2818,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
  22355. might_sleep();
  22356. - local_irq_disable();
  22357. + rcu_read_lock();
  22358. pool = get_work_pool(work);
  22359. if (!pool) {
  22360. - local_irq_enable();
  22361. + rcu_read_unlock();
  22362. return false;
  22363. }
  22364. - spin_lock(&pool->lock);
  22365. + spin_lock_irq(&pool->lock);
  22366. /* see the comment in try_to_grab_pending() with the same code */
  22367. pwq = get_work_pwq(work);
  22368. if (pwq) {
  22369. @@ -2816,10 +2854,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
  22370. else
  22371. lock_map_acquire_read(&pwq->wq->lockdep_map);
  22372. lock_map_release(&pwq->wq->lockdep_map);
  22373. -
  22374. + rcu_read_unlock();
  22375. return true;
  22376. already_gone:
  22377. spin_unlock_irq(&pool->lock);
  22378. + rcu_read_unlock();
  22379. return false;
  22380. }
  22381. @@ -2906,7 +2945,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
  22382. /* tell other tasks trying to grab @work to back off */
  22383. mark_work_canceling(work);
  22384. - local_irq_restore(flags);
  22385. + local_unlock_irqrestore(pendingb_lock, flags);
  22386. flush_work(work);
  22387. clear_work_data(work);
  22388. @@ -2961,10 +3000,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync);
  22389. */
  22390. bool flush_delayed_work(struct delayed_work *dwork)
  22391. {
  22392. - local_irq_disable();
  22393. + local_lock_irq(pendingb_lock);
  22394. if (del_timer_sync(&dwork->timer))
  22395. __queue_work(dwork->cpu, dwork->wq, &dwork->work);
  22396. - local_irq_enable();
  22397. + local_unlock_irq(pendingb_lock);
  22398. return flush_work(&dwork->work);
  22399. }
  22400. EXPORT_SYMBOL(flush_delayed_work);
  22401. @@ -2982,7 +3021,7 @@ static bool __cancel_work(struct work_struct *work, bool is_dwork)
  22402. return false;
  22403. set_work_pool_and_clear_pending(work, get_work_pool_id(work));
  22404. - local_irq_restore(flags);
  22405. + local_unlock_irqrestore(pendingb_lock, flags);
  22406. return ret;
  22407. }
  22408. @@ -3239,7 +3278,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
  22409. * put_unbound_pool - put a worker_pool
  22410. * @pool: worker_pool to put
  22411. *
  22412. - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
  22413. + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
  22414. * safe manner. get_unbound_pool() calls this function on its failure path
  22415. * and this function should be able to release pools which went through,
  22416. * successfully or not, init_worker_pool().
  22417. @@ -3293,8 +3332,8 @@ static void put_unbound_pool(struct worker_pool *pool)
  22418. del_timer_sync(&pool->idle_timer);
  22419. del_timer_sync(&pool->mayday_timer);
  22420. - /* sched-RCU protected to allow dereferences from get_work_pool() */
  22421. - call_rcu_sched(&pool->rcu, rcu_free_pool);
  22422. + /* RCU protected to allow dereferences from get_work_pool() */
  22423. + call_rcu(&pool->rcu, rcu_free_pool);
  22424. }
  22425. /**
  22426. @@ -3401,14 +3440,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
  22427. put_unbound_pool(pool);
  22428. mutex_unlock(&wq_pool_mutex);
  22429. - call_rcu_sched(&pwq->rcu, rcu_free_pwq);
  22430. + call_rcu(&pwq->rcu, rcu_free_pwq);
  22431. /*
  22432. * If we're the last pwq going away, @wq is already dead and no one
  22433. * is gonna access it anymore. Schedule RCU free.
  22434. */
  22435. if (is_last)
  22436. - call_rcu_sched(&wq->rcu, rcu_free_wq);
  22437. + call_rcu(&wq->rcu, rcu_free_wq);
  22438. }
  22439. /**
  22440. @@ -4072,7 +4111,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
  22441. * The base ref is never dropped on per-cpu pwqs. Directly
  22442. * schedule RCU free.
  22443. */
  22444. - call_rcu_sched(&wq->rcu, rcu_free_wq);
  22445. + call_rcu(&wq->rcu, rcu_free_wq);
  22446. } else {
  22447. /*
  22448. * We're the sole accessor of @wq at this point. Directly
  22449. @@ -4166,7 +4205,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
  22450. struct pool_workqueue *pwq;
  22451. bool ret;
  22452. - rcu_read_lock_sched();
  22453. + rcu_read_lock();
  22454. + preempt_disable();
  22455. if (cpu == WORK_CPU_UNBOUND)
  22456. cpu = smp_processor_id();
  22457. @@ -4177,7 +4217,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
  22458. pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
  22459. ret = !list_empty(&pwq->delayed_works);
  22460. - rcu_read_unlock_sched();
  22461. + preempt_enable();
  22462. + rcu_read_unlock();
  22463. return ret;
  22464. }
  22465. @@ -4203,15 +4244,15 @@ unsigned int work_busy(struct work_struct *work)
  22466. if (work_pending(work))
  22467. ret |= WORK_BUSY_PENDING;
  22468. - local_irq_save(flags);
  22469. + rcu_read_lock();
  22470. pool = get_work_pool(work);
  22471. if (pool) {
  22472. - spin_lock(&pool->lock);
  22473. + spin_lock_irqsave(&pool->lock, flags);
  22474. if (find_worker_executing_work(pool, work))
  22475. ret |= WORK_BUSY_RUNNING;
  22476. - spin_unlock(&pool->lock);
  22477. + spin_unlock_irqrestore(&pool->lock, flags);
  22478. }
  22479. - local_irq_restore(flags);
  22480. + rcu_read_unlock();
  22481. return ret;
  22482. }
  22483. @@ -4400,7 +4441,7 @@ void show_workqueue_state(void)
  22484. unsigned long flags;
  22485. int pi;
  22486. - rcu_read_lock_sched();
  22487. + rcu_read_lock();
  22488. pr_info("Showing busy workqueues and worker pools:\n");
  22489. @@ -4453,7 +4494,7 @@ void show_workqueue_state(void)
  22490. spin_unlock_irqrestore(&pool->lock, flags);
  22491. }
  22492. - rcu_read_unlock_sched();
  22493. + rcu_read_unlock();
  22494. }
  22495. /*
  22496. @@ -4791,16 +4832,16 @@ bool freeze_workqueues_busy(void)
  22497. * nr_active is monotonically decreasing. It's safe
  22498. * to peek without lock.
  22499. */
  22500. - rcu_read_lock_sched();
  22501. + rcu_read_lock();
  22502. for_each_pwq(pwq, wq) {
  22503. WARN_ON_ONCE(pwq->nr_active < 0);
  22504. if (pwq->nr_active) {
  22505. busy = true;
  22506. - rcu_read_unlock_sched();
  22507. + rcu_read_unlock();
  22508. goto out_unlock;
  22509. }
  22510. }
  22511. - rcu_read_unlock_sched();
  22512. + rcu_read_unlock();
  22513. }
  22514. out_unlock:
  22515. mutex_unlock(&wq_pool_mutex);
  22516. @@ -4990,7 +5031,8 @@ static ssize_t wq_pool_ids_show(struct device *dev,
  22517. const char *delim = "";
  22518. int node, written = 0;
  22519. - rcu_read_lock_sched();
  22520. + get_online_cpus();
  22521. + rcu_read_lock();
  22522. for_each_node(node) {
  22523. written += scnprintf(buf + written, PAGE_SIZE - written,
  22524. "%s%d:%d", delim, node,
  22525. @@ -4998,7 +5040,8 @@ static ssize_t wq_pool_ids_show(struct device *dev,
  22526. delim = " ";
  22527. }
  22528. written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
  22529. - rcu_read_unlock_sched();
  22530. + rcu_read_unlock();
  22531. + put_online_cpus();
  22532. return written;
  22533. }
  22534. diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
  22535. index 29fa81f0f51a..42d1e3974554 100644
  22536. --- a/kernel/workqueue_internal.h
  22537. +++ b/kernel/workqueue_internal.h
  22538. @@ -44,6 +44,7 @@ struct worker {
  22539. unsigned long last_active; /* L: last active timestamp */
  22540. unsigned int flags; /* X: flags */
  22541. int id; /* I: worker id */
  22542. + int sleeping; /* None */
  22543. /*
  22544. * Opaque string set with work_set_desc(). Printed out with task
  22545. @@ -69,7 +70,7 @@ static inline struct worker *current_wq_worker(void)
  22546. * Scheduler hooks for concurrency managed workqueue. Only to be used from
  22547. * sched/core.c and workqueue.c.
  22548. */
  22549. -void wq_worker_waking_up(struct task_struct *task, int cpu);
  22550. -struct task_struct *wq_worker_sleeping(struct task_struct *task);
  22551. +void wq_worker_running(struct task_struct *task);
  22552. +void wq_worker_sleeping(struct task_struct *task);
  22553. #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
  22554. diff --git a/lib/Kconfig b/lib/Kconfig
  22555. index 260a80e313b9..b06becb3f477 100644
  22556. --- a/lib/Kconfig
  22557. +++ b/lib/Kconfig
  22558. @@ -400,6 +400,7 @@ config CHECK_SIGNATURE
  22559. config CPUMASK_OFFSTACK
  22560. bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
  22561. + depends on !PREEMPT_RT_FULL
  22562. help
  22563. Use dynamic allocation for cpumask_var_t, instead of putting
  22564. them on the stack. This is a bit more expensive, but avoids
  22565. diff --git a/lib/debugobjects.c b/lib/debugobjects.c
  22566. index 056052dc8e91..d8494e126de8 100644
  22567. --- a/lib/debugobjects.c
  22568. +++ b/lib/debugobjects.c
  22569. @@ -308,7 +308,10 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
  22570. struct debug_obj *obj;
  22571. unsigned long flags;
  22572. - fill_pool();
  22573. +#ifdef CONFIG_PREEMPT_RT_FULL
  22574. + if (preempt_count() == 0 && !irqs_disabled())
  22575. +#endif
  22576. + fill_pool();
  22577. db = get_bucket((unsigned long) addr);
  22578. diff --git a/lib/idr.c b/lib/idr.c
  22579. index 6098336df267..9decbe914595 100644
  22580. --- a/lib/idr.c
  22581. +++ b/lib/idr.c
  22582. @@ -30,6 +30,7 @@
  22583. #include <linux/idr.h>
  22584. #include <linux/spinlock.h>
  22585. #include <linux/percpu.h>
  22586. +#include <linux/locallock.h>
  22587. #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1)
  22588. #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
  22589. @@ -45,6 +46,37 @@ static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head);
  22590. static DEFINE_PER_CPU(int, idr_preload_cnt);
  22591. static DEFINE_SPINLOCK(simple_ida_lock);
  22592. +#ifdef CONFIG_PREEMPT_RT_FULL
  22593. +static DEFINE_LOCAL_IRQ_LOCK(idr_lock);
  22594. +
  22595. +static inline void idr_preload_lock(void)
  22596. +{
  22597. + local_lock(idr_lock);
  22598. +}
  22599. +
  22600. +static inline void idr_preload_unlock(void)
  22601. +{
  22602. + local_unlock(idr_lock);
  22603. +}
  22604. +
  22605. +void idr_preload_end(void)
  22606. +{
  22607. + idr_preload_unlock();
  22608. +}
  22609. +EXPORT_SYMBOL(idr_preload_end);
  22610. +#else
  22611. +static inline void idr_preload_lock(void)
  22612. +{
  22613. + preempt_disable();
  22614. +}
  22615. +
  22616. +static inline void idr_preload_unlock(void)
  22617. +{
  22618. + preempt_enable();
  22619. +}
  22620. +#endif
  22621. +
  22622. +
  22623. /* the maximum ID which can be allocated given idr->layers */
  22624. static int idr_max(int layers)
  22625. {
  22626. @@ -115,14 +147,14 @@ static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr)
  22627. * context. See idr_preload() for details.
  22628. */
  22629. if (!in_interrupt()) {
  22630. - preempt_disable();
  22631. + idr_preload_lock();
  22632. new = __this_cpu_read(idr_preload_head);
  22633. if (new) {
  22634. __this_cpu_write(idr_preload_head, new->ary[0]);
  22635. __this_cpu_dec(idr_preload_cnt);
  22636. new->ary[0] = NULL;
  22637. }
  22638. - preempt_enable();
  22639. + idr_preload_unlock();
  22640. if (new)
  22641. return new;
  22642. }
  22643. @@ -366,7 +398,6 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id,
  22644. idr_mark_full(pa, id);
  22645. }
  22646. -
  22647. /**
  22648. * idr_preload - preload for idr_alloc()
  22649. * @gfp_mask: allocation mask to use for preloading
  22650. @@ -401,7 +432,7 @@ void idr_preload(gfp_t gfp_mask)
  22651. WARN_ON_ONCE(in_interrupt());
  22652. might_sleep_if(gfpflags_allow_blocking(gfp_mask));
  22653. - preempt_disable();
  22654. + idr_preload_lock();
  22655. /*
  22656. * idr_alloc() is likely to succeed w/o full idr_layer buffer and
  22657. @@ -413,9 +444,9 @@ void idr_preload(gfp_t gfp_mask)
  22658. while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
  22659. struct idr_layer *new;
  22660. - preempt_enable();
  22661. + idr_preload_unlock();
  22662. new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
  22663. - preempt_disable();
  22664. + idr_preload_lock();
  22665. if (!new)
  22666. break;
  22667. diff --git a/lib/irq_poll.c b/lib/irq_poll.c
  22668. index 1d6565e81030..b23a79761df7 100644
  22669. --- a/lib/irq_poll.c
  22670. +++ b/lib/irq_poll.c
  22671. @@ -36,6 +36,7 @@ void irq_poll_sched(struct irq_poll *iop)
  22672. list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
  22673. __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
  22674. local_irq_restore(flags);
  22675. + preempt_check_resched_rt();
  22676. }
  22677. EXPORT_SYMBOL(irq_poll_sched);
  22678. @@ -71,6 +72,7 @@ void irq_poll_complete(struct irq_poll *iop)
  22679. local_irq_save(flags);
  22680. __irq_poll_complete(iop);
  22681. local_irq_restore(flags);
  22682. + preempt_check_resched_rt();
  22683. }
  22684. EXPORT_SYMBOL(irq_poll_complete);
  22685. @@ -95,6 +97,7 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h)
  22686. }
  22687. local_irq_enable();
  22688. + preempt_check_resched_rt();
  22689. /* Even though interrupts have been re-enabled, this
  22690. * access is safe because interrupts can only add new
  22691. @@ -132,6 +135,7 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h)
  22692. __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
  22693. local_irq_enable();
  22694. + preempt_check_resched_rt();
  22695. }
  22696. /**
  22697. @@ -195,6 +199,7 @@ static int irq_poll_cpu_dead(unsigned int cpu)
  22698. this_cpu_ptr(&blk_cpu_iopoll));
  22699. __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
  22700. local_irq_enable();
  22701. + preempt_check_resched_rt();
  22702. return 0;
  22703. }
  22704. diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
  22705. index f3a217ea0388..4611b156ef79 100644
  22706. --- a/lib/locking-selftest.c
  22707. +++ b/lib/locking-selftest.c
  22708. @@ -590,6 +590,8 @@ GENERATE_TESTCASE(init_held_rsem)
  22709. #include "locking-selftest-spin-hardirq.h"
  22710. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
  22711. +#ifndef CONFIG_PREEMPT_RT_FULL
  22712. +
  22713. #include "locking-selftest-rlock-hardirq.h"
  22714. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
  22715. @@ -605,9 +607,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock)
  22716. #include "locking-selftest-wlock-softirq.h"
  22717. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
  22718. +#endif
  22719. +
  22720. #undef E1
  22721. #undef E2
  22722. +#ifndef CONFIG_PREEMPT_RT_FULL
  22723. /*
  22724. * Enabling hardirqs with a softirq-safe lock held:
  22725. */
  22726. @@ -640,6 +645,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
  22727. #undef E1
  22728. #undef E2
  22729. +#endif
  22730. +
  22731. /*
  22732. * Enabling irqs with an irq-safe lock held:
  22733. */
  22734. @@ -663,6 +670,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
  22735. #include "locking-selftest-spin-hardirq.h"
  22736. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
  22737. +#ifndef CONFIG_PREEMPT_RT_FULL
  22738. +
  22739. #include "locking-selftest-rlock-hardirq.h"
  22740. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
  22741. @@ -678,6 +687,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock)
  22742. #include "locking-selftest-wlock-softirq.h"
  22743. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
  22744. +#endif
  22745. +
  22746. #undef E1
  22747. #undef E2
  22748. @@ -709,6 +720,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
  22749. #include "locking-selftest-spin-hardirq.h"
  22750. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
  22751. +#ifndef CONFIG_PREEMPT_RT_FULL
  22752. +
  22753. #include "locking-selftest-rlock-hardirq.h"
  22754. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
  22755. @@ -724,6 +737,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock)
  22756. #include "locking-selftest-wlock-softirq.h"
  22757. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
  22758. +#endif
  22759. +
  22760. #undef E1
  22761. #undef E2
  22762. #undef E3
  22763. @@ -757,6 +772,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
  22764. #include "locking-selftest-spin-hardirq.h"
  22765. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
  22766. +#ifndef CONFIG_PREEMPT_RT_FULL
  22767. +
  22768. #include "locking-selftest-rlock-hardirq.h"
  22769. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
  22770. @@ -772,10 +789,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock)
  22771. #include "locking-selftest-wlock-softirq.h"
  22772. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
  22773. +#endif
  22774. +
  22775. #undef E1
  22776. #undef E2
  22777. #undef E3
  22778. +#ifndef CONFIG_PREEMPT_RT_FULL
  22779. +
  22780. /*
  22781. * read-lock / write-lock irq inversion.
  22782. *
  22783. @@ -838,6 +859,10 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
  22784. #undef E2
  22785. #undef E3
  22786. +#endif
  22787. +
  22788. +#ifndef CONFIG_PREEMPT_RT_FULL
  22789. +
  22790. /*
  22791. * read-lock / write-lock recursion that is actually safe.
  22792. */
  22793. @@ -876,6 +901,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
  22794. #undef E2
  22795. #undef E3
  22796. +#endif
  22797. +
  22798. /*
  22799. * read-lock / write-lock recursion that is unsafe.
  22800. */
  22801. @@ -1858,6 +1885,7 @@ void locking_selftest(void)
  22802. printk(" --------------------------------------------------------------------------\n");
  22803. +#ifndef CONFIG_PREEMPT_RT_FULL
  22804. /*
  22805. * irq-context testcases:
  22806. */
  22807. @@ -1870,6 +1898,28 @@ void locking_selftest(void)
  22808. DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
  22809. // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
  22810. +#else
  22811. + /* On -rt, we only do hardirq context test for raw spinlock */
  22812. + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12);
  22813. + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21);
  22814. +
  22815. + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12);
  22816. + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21);
  22817. +
  22818. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123);
  22819. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132);
  22820. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213);
  22821. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231);
  22822. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312);
  22823. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321);
  22824. +
  22825. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123);
  22826. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132);
  22827. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213);
  22828. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231);
  22829. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312);
  22830. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321);
  22831. +#endif
  22832. ww_tests();
  22833. diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
  22834. index 6d40944960de..822a2c027e72 100644
  22835. --- a/lib/percpu_ida.c
  22836. +++ b/lib/percpu_ida.c
  22837. @@ -26,6 +26,9 @@
  22838. #include <linux/string.h>
  22839. #include <linux/spinlock.h>
  22840. #include <linux/percpu_ida.h>
  22841. +#include <linux/locallock.h>
  22842. +
  22843. +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock);
  22844. struct percpu_ida_cpu {
  22845. /*
  22846. @@ -148,13 +151,13 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
  22847. unsigned long flags;
  22848. int tag;
  22849. - local_irq_save(flags);
  22850. + local_lock_irqsave(irq_off_lock, flags);
  22851. tags = this_cpu_ptr(pool->tag_cpu);
  22852. /* Fastpath */
  22853. tag = alloc_local_tag(tags);
  22854. if (likely(tag >= 0)) {
  22855. - local_irq_restore(flags);
  22856. + local_unlock_irqrestore(irq_off_lock, flags);
  22857. return tag;
  22858. }
  22859. @@ -173,6 +176,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
  22860. if (!tags->nr_free)
  22861. alloc_global_tags(pool, tags);
  22862. +
  22863. if (!tags->nr_free)
  22864. steal_tags(pool, tags);
  22865. @@ -184,7 +188,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
  22866. }
  22867. spin_unlock(&pool->lock);
  22868. - local_irq_restore(flags);
  22869. + local_unlock_irqrestore(irq_off_lock, flags);
  22870. if (tag >= 0 || state == TASK_RUNNING)
  22871. break;
  22872. @@ -196,7 +200,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
  22873. schedule();
  22874. - local_irq_save(flags);
  22875. + local_lock_irqsave(irq_off_lock, flags);
  22876. tags = this_cpu_ptr(pool->tag_cpu);
  22877. }
  22878. if (state != TASK_RUNNING)
  22879. @@ -221,7 +225,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
  22880. BUG_ON(tag >= pool->nr_tags);
  22881. - local_irq_save(flags);
  22882. + local_lock_irqsave(irq_off_lock, flags);
  22883. tags = this_cpu_ptr(pool->tag_cpu);
  22884. spin_lock(&tags->lock);
  22885. @@ -253,7 +257,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
  22886. spin_unlock(&pool->lock);
  22887. }
  22888. - local_irq_restore(flags);
  22889. + local_unlock_irqrestore(irq_off_lock, flags);
  22890. }
  22891. EXPORT_SYMBOL_GPL(percpu_ida_free);
  22892. @@ -345,7 +349,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
  22893. struct percpu_ida_cpu *remote;
  22894. unsigned cpu, i, err = 0;
  22895. - local_irq_save(flags);
  22896. + local_lock_irqsave(irq_off_lock, flags);
  22897. for_each_possible_cpu(cpu) {
  22898. remote = per_cpu_ptr(pool->tag_cpu, cpu);
  22899. spin_lock(&remote->lock);
  22900. @@ -367,7 +371,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
  22901. }
  22902. spin_unlock(&pool->lock);
  22903. out:
  22904. - local_irq_restore(flags);
  22905. + local_unlock_irqrestore(irq_off_lock, flags);
  22906. return err;
  22907. }
  22908. EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
  22909. diff --git a/lib/radix-tree.c b/lib/radix-tree.c
  22910. index 8e6d552c40dd..741da5a77fd5 100644
  22911. --- a/lib/radix-tree.c
  22912. +++ b/lib/radix-tree.c
  22913. @@ -36,7 +36,7 @@
  22914. #include <linux/bitops.h>
  22915. #include <linux/rcupdate.h>
  22916. #include <linux/preempt.h> /* in_interrupt() */
  22917. -
  22918. +#include <linux/locallock.h>
  22919. /* Number of nodes in fully populated tree of given height */
  22920. static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
  22921. @@ -68,6 +68,7 @@ struct radix_tree_preload {
  22922. struct radix_tree_node *nodes;
  22923. };
  22924. static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
  22925. +static DEFINE_LOCAL_IRQ_LOCK(radix_tree_preloads_lock);
  22926. static inline void *node_to_entry(void *ptr)
  22927. {
  22928. @@ -290,13 +291,14 @@ radix_tree_node_alloc(struct radix_tree_root *root)
  22929. * succeed in getting a node here (and never reach
  22930. * kmem_cache_alloc)
  22931. */
  22932. - rtp = this_cpu_ptr(&radix_tree_preloads);
  22933. + rtp = &get_locked_var(radix_tree_preloads_lock, radix_tree_preloads);
  22934. if (rtp->nr) {
  22935. ret = rtp->nodes;
  22936. rtp->nodes = ret->private_data;
  22937. ret->private_data = NULL;
  22938. rtp->nr--;
  22939. }
  22940. + put_locked_var(radix_tree_preloads_lock, radix_tree_preloads);
  22941. /*
  22942. * Update the allocation stack trace as this is more useful
  22943. * for debugging.
  22944. @@ -357,14 +359,14 @@ static int __radix_tree_preload(gfp_t gfp_mask, int nr)
  22945. */
  22946. gfp_mask &= ~__GFP_ACCOUNT;
  22947. - preempt_disable();
  22948. + local_lock(radix_tree_preloads_lock);
  22949. rtp = this_cpu_ptr(&radix_tree_preloads);
  22950. while (rtp->nr < nr) {
  22951. - preempt_enable();
  22952. + local_unlock(radix_tree_preloads_lock);
  22953. node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
  22954. if (node == NULL)
  22955. goto out;
  22956. - preempt_disable();
  22957. + local_lock(radix_tree_preloads_lock);
  22958. rtp = this_cpu_ptr(&radix_tree_preloads);
  22959. if (rtp->nr < nr) {
  22960. node->private_data = rtp->nodes;
  22961. @@ -406,7 +408,7 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
  22962. if (gfpflags_allow_blocking(gfp_mask))
  22963. return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
  22964. /* Preloading doesn't help anything with this gfp mask, skip it */
  22965. - preempt_disable();
  22966. + local_lock(radix_tree_preloads_lock);
  22967. return 0;
  22968. }
  22969. EXPORT_SYMBOL(radix_tree_maybe_preload);
  22970. @@ -422,7 +424,7 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
  22971. /* Preloading doesn't help anything with this gfp mask, skip it */
  22972. if (!gfpflags_allow_blocking(gfp_mask)) {
  22973. - preempt_disable();
  22974. + local_lock(radix_tree_preloads_lock);
  22975. return 0;
  22976. }
  22977. @@ -456,6 +458,12 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
  22978. return __radix_tree_preload(gfp_mask, nr_nodes);
  22979. }
  22980. +void radix_tree_preload_end(void)
  22981. +{
  22982. + local_unlock(radix_tree_preloads_lock);
  22983. +}
  22984. +EXPORT_SYMBOL(radix_tree_preload_end);
  22985. +
  22986. /*
  22987. * The maximum index which can be stored in a radix tree
  22988. */
  22989. diff --git a/lib/scatterlist.c b/lib/scatterlist.c
  22990. index 004fc70fc56a..ccc46992a517 100644
  22991. --- a/lib/scatterlist.c
  22992. +++ b/lib/scatterlist.c
  22993. @@ -620,7 +620,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
  22994. flush_kernel_dcache_page(miter->page);
  22995. if (miter->__flags & SG_MITER_ATOMIC) {
  22996. - WARN_ON_ONCE(preemptible());
  22997. + WARN_ON_ONCE(!pagefault_disabled());
  22998. kunmap_atomic(miter->addr);
  22999. } else
  23000. kunmap(miter->page);
  23001. @@ -664,7 +664,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
  23002. if (!sg_miter_skip(&miter, skip))
  23003. return false;
  23004. - local_irq_save(flags);
  23005. + local_irq_save_nort(flags);
  23006. while (sg_miter_next(&miter) && offset < buflen) {
  23007. unsigned int len;
  23008. @@ -681,7 +681,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
  23009. sg_miter_stop(&miter);
  23010. - local_irq_restore(flags);
  23011. + local_irq_restore_nort(flags);
  23012. return offset;
  23013. }
  23014. EXPORT_SYMBOL(sg_copy_buffer);
  23015. diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
  23016. index 1afec32de6f2..11fa431046a8 100644
  23017. --- a/lib/smp_processor_id.c
  23018. +++ b/lib/smp_processor_id.c
  23019. @@ -39,8 +39,9 @@ notrace static unsigned int check_preemption_disabled(const char *what1,
  23020. if (!printk_ratelimit())
  23021. goto out_enable;
  23022. - printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n",
  23023. - what1, what2, preempt_count() - 1, current->comm, current->pid);
  23024. + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x %08x] code: %s/%d\n",
  23025. + what1, what2, preempt_count() - 1, __migrate_disabled(current),
  23026. + current->comm, current->pid);
  23027. print_symbol("caller is %s\n", (long)__builtin_return_address(0));
  23028. dump_stack();
  23029. diff --git a/mm/Kconfig b/mm/Kconfig
  23030. index 86e3e0e74d20..77e5862a1ed2 100644
  23031. --- a/mm/Kconfig
  23032. +++ b/mm/Kconfig
  23033. @@ -410,7 +410,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
  23034. config TRANSPARENT_HUGEPAGE
  23035. bool "Transparent Hugepage Support"
  23036. - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
  23037. + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
  23038. select COMPACTION
  23039. select RADIX_TREE_MULTIORDER
  23040. help
  23041. diff --git a/mm/backing-dev.c b/mm/backing-dev.c
  23042. index 6ff2d7744223..b5a91dd53b5f 100644
  23043. --- a/mm/backing-dev.c
  23044. +++ b/mm/backing-dev.c
  23045. @@ -457,9 +457,9 @@ void wb_congested_put(struct bdi_writeback_congested *congested)
  23046. {
  23047. unsigned long flags;
  23048. - local_irq_save(flags);
  23049. + local_irq_save_nort(flags);
  23050. if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
  23051. - local_irq_restore(flags);
  23052. + local_irq_restore_nort(flags);
  23053. return;
  23054. }
  23055. diff --git a/mm/compaction.c b/mm/compaction.c
  23056. index 70e6bec46dc2..6678ed58b7c6 100644
  23057. --- a/mm/compaction.c
  23058. +++ b/mm/compaction.c
  23059. @@ -1593,10 +1593,12 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
  23060. block_start_pfn(cc->migrate_pfn, cc->order);
  23061. if (cc->last_migrated_pfn < current_block_start) {
  23062. - cpu = get_cpu();
  23063. + cpu = get_cpu_light();
  23064. + local_lock_irq(swapvec_lock);
  23065. lru_add_drain_cpu(cpu);
  23066. + local_unlock_irq(swapvec_lock);
  23067. drain_local_pages(zone);
  23068. - put_cpu();
  23069. + put_cpu_light();
  23070. /* No more flushing until we migrate again */
  23071. cc->last_migrated_pfn = 0;
  23072. }
  23073. diff --git a/mm/filemap.c b/mm/filemap.c
  23074. index edfb90e3830c..a8d2c7a73d54 100644
  23075. --- a/mm/filemap.c
  23076. +++ b/mm/filemap.c
  23077. @@ -159,9 +159,12 @@ static int page_cache_tree_insert(struct address_space *mapping,
  23078. * node->private_list is protected by
  23079. * mapping->tree_lock.
  23080. */
  23081. - if (!list_empty(&node->private_list))
  23082. - list_lru_del(&workingset_shadow_nodes,
  23083. + if (!list_empty(&node->private_list)) {
  23084. + local_lock(workingset_shadow_lock);
  23085. + list_lru_del(&__workingset_shadow_nodes,
  23086. &node->private_list);
  23087. + local_unlock(workingset_shadow_lock);
  23088. + }
  23089. }
  23090. return 0;
  23091. }
  23092. @@ -217,8 +220,10 @@ static void page_cache_tree_delete(struct address_space *mapping,
  23093. if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
  23094. list_empty(&node->private_list)) {
  23095. node->private_data = mapping;
  23096. - list_lru_add(&workingset_shadow_nodes,
  23097. - &node->private_list);
  23098. + local_lock(workingset_shadow_lock);
  23099. + list_lru_add(&__workingset_shadow_nodes,
  23100. + &node->private_list);
  23101. + local_unlock(workingset_shadow_lock);
  23102. }
  23103. }
  23104. diff --git a/mm/highmem.c b/mm/highmem.c
  23105. index 50b4ca6787f0..77518a3b35a1 100644
  23106. --- a/mm/highmem.c
  23107. +++ b/mm/highmem.c
  23108. @@ -29,10 +29,11 @@
  23109. #include <linux/kgdb.h>
  23110. #include <asm/tlbflush.h>
  23111. -
  23112. +#ifndef CONFIG_PREEMPT_RT_FULL
  23113. #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
  23114. DEFINE_PER_CPU(int, __kmap_atomic_idx);
  23115. #endif
  23116. +#endif
  23117. /*
  23118. * Virtual_count is not a pure "count".
  23119. @@ -107,8 +108,9 @@ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color)
  23120. unsigned long totalhigh_pages __read_mostly;
  23121. EXPORT_SYMBOL(totalhigh_pages);
  23122. -
  23123. +#ifndef CONFIG_PREEMPT_RT_FULL
  23124. EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
  23125. +#endif
  23126. unsigned int nr_free_highpages (void)
  23127. {
  23128. diff --git a/mm/memcontrol.c b/mm/memcontrol.c
  23129. index 2a800c4a39bd..c04403033aec 100644
  23130. --- a/mm/memcontrol.c
  23131. +++ b/mm/memcontrol.c
  23132. @@ -67,6 +67,7 @@
  23133. #include <net/sock.h>
  23134. #include <net/ip.h>
  23135. #include "slab.h"
  23136. +#include <linux/locallock.h>
  23137. #include <asm/uaccess.h>
  23138. @@ -92,6 +93,8 @@ int do_swap_account __read_mostly;
  23139. #define do_swap_account 0
  23140. #endif
  23141. +static DEFINE_LOCAL_IRQ_LOCK(event_lock);
  23142. +
  23143. /* Whether legacy memory+swap accounting is active */
  23144. static bool do_memsw_account(void)
  23145. {
  23146. @@ -1795,7 +1798,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
  23147. return;
  23148. /* Notify other cpus that system-wide "drain" is running */
  23149. get_online_cpus();
  23150. - curcpu = get_cpu();
  23151. + curcpu = get_cpu_light();
  23152. for_each_online_cpu(cpu) {
  23153. struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
  23154. struct mem_cgroup *memcg;
  23155. @@ -1812,7 +1815,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
  23156. schedule_work_on(cpu, &stock->work);
  23157. }
  23158. }
  23159. - put_cpu();
  23160. + put_cpu_light();
  23161. put_online_cpus();
  23162. mutex_unlock(&percpu_charge_mutex);
  23163. }
  23164. @@ -4558,12 +4561,12 @@ static int mem_cgroup_move_account(struct page *page,
  23165. ret = 0;
  23166. - local_irq_disable();
  23167. + local_lock_irq(event_lock);
  23168. mem_cgroup_charge_statistics(to, page, compound, nr_pages);
  23169. memcg_check_events(to, page);
  23170. mem_cgroup_charge_statistics(from, page, compound, -nr_pages);
  23171. memcg_check_events(from, page);
  23172. - local_irq_enable();
  23173. + local_unlock_irq(event_lock);
  23174. out_unlock:
  23175. unlock_page(page);
  23176. out:
  23177. @@ -5438,10 +5441,10 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
  23178. commit_charge(page, memcg, lrucare);
  23179. - local_irq_disable();
  23180. + local_lock_irq(event_lock);
  23181. mem_cgroup_charge_statistics(memcg, page, compound, nr_pages);
  23182. memcg_check_events(memcg, page);
  23183. - local_irq_enable();
  23184. + local_unlock_irq(event_lock);
  23185. if (do_memsw_account() && PageSwapCache(page)) {
  23186. swp_entry_t entry = { .val = page_private(page) };
  23187. @@ -5497,14 +5500,14 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
  23188. memcg_oom_recover(memcg);
  23189. }
  23190. - local_irq_save(flags);
  23191. + local_lock_irqsave(event_lock, flags);
  23192. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
  23193. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
  23194. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
  23195. __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
  23196. __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
  23197. memcg_check_events(memcg, dummy_page);
  23198. - local_irq_restore(flags);
  23199. + local_unlock_irqrestore(event_lock, flags);
  23200. if (!mem_cgroup_is_root(memcg))
  23201. css_put_many(&memcg->css, nr_pages);
  23202. @@ -5659,10 +5662,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
  23203. commit_charge(newpage, memcg, false);
  23204. - local_irq_save(flags);
  23205. + local_lock_irqsave(event_lock, flags);
  23206. mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages);
  23207. memcg_check_events(memcg, newpage);
  23208. - local_irq_restore(flags);
  23209. + local_unlock_irqrestore(event_lock, flags);
  23210. }
  23211. DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
  23212. @@ -5853,6 +5856,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
  23213. {
  23214. struct mem_cgroup *memcg, *swap_memcg;
  23215. unsigned short oldid;
  23216. + unsigned long flags;
  23217. VM_BUG_ON_PAGE(PageLRU(page), page);
  23218. VM_BUG_ON_PAGE(page_count(page), page);
  23219. @@ -5893,12 +5897,16 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
  23220. * important here to have the interrupts disabled because it is the
  23221. * only synchronisation we have for udpating the per-CPU variables.
  23222. */
  23223. + local_lock_irqsave(event_lock, flags);
  23224. +#ifndef CONFIG_PREEMPT_RT_BASE
  23225. VM_BUG_ON(!irqs_disabled());
  23226. +#endif
  23227. mem_cgroup_charge_statistics(memcg, page, false, -1);
  23228. memcg_check_events(memcg, page);
  23229. if (!mem_cgroup_is_root(memcg))
  23230. css_put(&memcg->css);
  23231. + local_unlock_irqrestore(event_lock, flags);
  23232. }
  23233. /*
  23234. diff --git a/mm/mmu_context.c b/mm/mmu_context.c
  23235. index 6f4d27c5bb32..5cd25c745a8f 100644
  23236. --- a/mm/mmu_context.c
  23237. +++ b/mm/mmu_context.c
  23238. @@ -23,6 +23,7 @@ void use_mm(struct mm_struct *mm)
  23239. struct task_struct *tsk = current;
  23240. task_lock(tsk);
  23241. + preempt_disable_rt();
  23242. active_mm = tsk->active_mm;
  23243. if (active_mm != mm) {
  23244. atomic_inc(&mm->mm_count);
  23245. @@ -30,6 +31,7 @@ void use_mm(struct mm_struct *mm)
  23246. }
  23247. tsk->mm = mm;
  23248. switch_mm(active_mm, mm, tsk);
  23249. + preempt_enable_rt();
  23250. task_unlock(tsk);
  23251. #ifdef finish_arch_post_lock_switch
  23252. finish_arch_post_lock_switch();
  23253. diff --git a/mm/page_alloc.c b/mm/page_alloc.c
  23254. index fbc38888252b..1cb08e1406ea 100644
  23255. --- a/mm/page_alloc.c
  23256. +++ b/mm/page_alloc.c
  23257. @@ -61,6 +61,7 @@
  23258. #include <linux/page_ext.h>
  23259. #include <linux/hugetlb.h>
  23260. #include <linux/sched/rt.h>
  23261. +#include <linux/locallock.h>
  23262. #include <linux/page_owner.h>
  23263. #include <linux/kthread.h>
  23264. #include <linux/memcontrol.h>
  23265. @@ -281,6 +282,18 @@ EXPORT_SYMBOL(nr_node_ids);
  23266. EXPORT_SYMBOL(nr_online_nodes);
  23267. #endif
  23268. +static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
  23269. +
  23270. +#ifdef CONFIG_PREEMPT_RT_BASE
  23271. +# define cpu_lock_irqsave(cpu, flags) \
  23272. + local_lock_irqsave_on(pa_lock, flags, cpu)
  23273. +# define cpu_unlock_irqrestore(cpu, flags) \
  23274. + local_unlock_irqrestore_on(pa_lock, flags, cpu)
  23275. +#else
  23276. +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
  23277. +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
  23278. +#endif
  23279. +
  23280. int page_group_by_mobility_disabled __read_mostly;
  23281. #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
  23282. @@ -1092,7 +1105,7 @@ static bool bulkfree_pcp_prepare(struct page *page)
  23283. #endif /* CONFIG_DEBUG_VM */
  23284. /*
  23285. - * Frees a number of pages from the PCP lists
  23286. + * Frees a number of pages which have been collected from the pcp lists.
  23287. * Assumes all pages on list are in same zone, and of same order.
  23288. * count is the number of pages to free.
  23289. *
  23290. @@ -1103,19 +1116,58 @@ static bool bulkfree_pcp_prepare(struct page *page)
  23291. * pinned" detection logic.
  23292. */
  23293. static void free_pcppages_bulk(struct zone *zone, int count,
  23294. - struct per_cpu_pages *pcp)
  23295. + struct list_head *list)
  23296. {
  23297. - int migratetype = 0;
  23298. - int batch_free = 0;
  23299. unsigned long nr_scanned;
  23300. bool isolated_pageblocks;
  23301. + unsigned long flags;
  23302. +
  23303. + spin_lock_irqsave(&zone->lock, flags);
  23304. - spin_lock(&zone->lock);
  23305. isolated_pageblocks = has_isolate_pageblock(zone);
  23306. nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
  23307. if (nr_scanned)
  23308. __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
  23309. + while (!list_empty(list)) {
  23310. + struct page *page;
  23311. + int mt; /* migratetype of the to-be-freed page */
  23312. +
  23313. + page = list_first_entry(list, struct page, lru);
  23314. + /* must delete as __free_one_page list manipulates */
  23315. + list_del(&page->lru);
  23316. +
  23317. + mt = get_pcppage_migratetype(page);
  23318. + /* MIGRATE_ISOLATE page should not go to pcplists */
  23319. + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
  23320. + /* Pageblock could have been isolated meanwhile */
  23321. + if (unlikely(isolated_pageblocks))
  23322. + mt = get_pageblock_migratetype(page);
  23323. +
  23324. + if (bulkfree_pcp_prepare(page))
  23325. + continue;
  23326. +
  23327. + __free_one_page(page, page_to_pfn(page), zone, 0, mt);
  23328. + trace_mm_page_pcpu_drain(page, 0, mt);
  23329. + count--;
  23330. + }
  23331. + WARN_ON(count != 0);
  23332. + spin_unlock_irqrestore(&zone->lock, flags);
  23333. +}
  23334. +
  23335. +/*
  23336. + * Moves a number of pages from the PCP lists to free list which
  23337. + * is freed outside of the locked region.
  23338. + *
  23339. + * Assumes all pages on list are in same zone, and of same order.
  23340. + * count is the number of pages to free.
  23341. + */
  23342. +static void isolate_pcp_pages(int count, struct per_cpu_pages *src,
  23343. + struct list_head *dst)
  23344. +{
  23345. + int migratetype = 0;
  23346. + int batch_free = 0;
  23347. +
  23348. while (count) {
  23349. struct page *page;
  23350. struct list_head *list;
  23351. @@ -1131,7 +1183,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
  23352. batch_free++;
  23353. if (++migratetype == MIGRATE_PCPTYPES)
  23354. migratetype = 0;
  23355. - list = &pcp->lists[migratetype];
  23356. + list = &src->lists[migratetype];
  23357. } while (list_empty(list));
  23358. /* This is the only non-empty list. Free them all. */
  23359. @@ -1139,27 +1191,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
  23360. batch_free = count;
  23361. do {
  23362. - int mt; /* migratetype of the to-be-freed page */
  23363. -
  23364. page = list_last_entry(list, struct page, lru);
  23365. - /* must delete as __free_one_page list manipulates */
  23366. list_del(&page->lru);
  23367. - mt = get_pcppage_migratetype(page);
  23368. - /* MIGRATE_ISOLATE page should not go to pcplists */
  23369. - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
  23370. - /* Pageblock could have been isolated meanwhile */
  23371. - if (unlikely(isolated_pageblocks))
  23372. - mt = get_pageblock_migratetype(page);
  23373. -
  23374. - if (bulkfree_pcp_prepare(page))
  23375. - continue;
  23376. -
  23377. - __free_one_page(page, page_to_pfn(page), zone, 0, mt);
  23378. - trace_mm_page_pcpu_drain(page, 0, mt);
  23379. + list_add(&page->lru, dst);
  23380. } while (--count && --batch_free && !list_empty(list));
  23381. }
  23382. - spin_unlock(&zone->lock);
  23383. }
  23384. static void free_one_page(struct zone *zone,
  23385. @@ -1168,7 +1205,9 @@ static void free_one_page(struct zone *zone,
  23386. int migratetype)
  23387. {
  23388. unsigned long nr_scanned;
  23389. - spin_lock(&zone->lock);
  23390. + unsigned long flags;
  23391. +
  23392. + spin_lock_irqsave(&zone->lock, flags);
  23393. nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
  23394. if (nr_scanned)
  23395. __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
  23396. @@ -1178,7 +1217,7 @@ static void free_one_page(struct zone *zone,
  23397. migratetype = get_pfnblock_migratetype(page, pfn);
  23398. }
  23399. __free_one_page(page, pfn, zone, order, migratetype);
  23400. - spin_unlock(&zone->lock);
  23401. + spin_unlock_irqrestore(&zone->lock, flags);
  23402. }
  23403. static void __meminit __init_single_page(struct page *page, unsigned long pfn,
  23404. @@ -1264,10 +1303,10 @@ static void __free_pages_ok(struct page *page, unsigned int order)
  23405. return;
  23406. migratetype = get_pfnblock_migratetype(page, pfn);
  23407. - local_irq_save(flags);
  23408. + local_lock_irqsave(pa_lock, flags);
  23409. __count_vm_events(PGFREE, 1 << order);
  23410. free_one_page(page_zone(page), page, pfn, order, migratetype);
  23411. - local_irq_restore(flags);
  23412. + local_unlock_irqrestore(pa_lock, flags);
  23413. }
  23414. static void __init __free_pages_boot_core(struct page *page, unsigned int order)
  23415. @@ -2282,16 +2321,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
  23416. void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
  23417. {
  23418. unsigned long flags;
  23419. + LIST_HEAD(dst);
  23420. int to_drain, batch;
  23421. - local_irq_save(flags);
  23422. + local_lock_irqsave(pa_lock, flags);
  23423. batch = READ_ONCE(pcp->batch);
  23424. to_drain = min(pcp->count, batch);
  23425. if (to_drain > 0) {
  23426. - free_pcppages_bulk(zone, to_drain, pcp);
  23427. + isolate_pcp_pages(to_drain, pcp, &dst);
  23428. pcp->count -= to_drain;
  23429. }
  23430. - local_irq_restore(flags);
  23431. + local_unlock_irqrestore(pa_lock, flags);
  23432. + free_pcppages_bulk(zone, to_drain, &dst);
  23433. }
  23434. #endif
  23435. @@ -2307,16 +2348,21 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
  23436. unsigned long flags;
  23437. struct per_cpu_pageset *pset;
  23438. struct per_cpu_pages *pcp;
  23439. + LIST_HEAD(dst);
  23440. + int count;
  23441. - local_irq_save(flags);
  23442. + cpu_lock_irqsave(cpu, flags);
  23443. pset = per_cpu_ptr(zone->pageset, cpu);
  23444. pcp = &pset->pcp;
  23445. - if (pcp->count) {
  23446. - free_pcppages_bulk(zone, pcp->count, pcp);
  23447. + count = pcp->count;
  23448. + if (count) {
  23449. + isolate_pcp_pages(count, pcp, &dst);
  23450. pcp->count = 0;
  23451. }
  23452. - local_irq_restore(flags);
  23453. + cpu_unlock_irqrestore(cpu, flags);
  23454. + if (count)
  23455. + free_pcppages_bulk(zone, count, &dst);
  23456. }
  23457. /*
  23458. @@ -2402,8 +2448,17 @@ void drain_all_pages(struct zone *zone)
  23459. else
  23460. cpumask_clear_cpu(cpu, &cpus_with_pcps);
  23461. }
  23462. +#ifndef CONFIG_PREEMPT_RT_BASE
  23463. on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
  23464. zone, 1);
  23465. +#else
  23466. + for_each_cpu(cpu, &cpus_with_pcps) {
  23467. + if (zone)
  23468. + drain_pages_zone(cpu, zone);
  23469. + else
  23470. + drain_pages(cpu);
  23471. + }
  23472. +#endif
  23473. }
  23474. #ifdef CONFIG_HIBERNATION
  23475. @@ -2463,7 +2518,7 @@ void free_hot_cold_page(struct page *page, bool cold)
  23476. migratetype = get_pfnblock_migratetype(page, pfn);
  23477. set_pcppage_migratetype(page, migratetype);
  23478. - local_irq_save(flags);
  23479. + local_lock_irqsave(pa_lock, flags);
  23480. __count_vm_event(PGFREE);
  23481. /*
  23482. @@ -2489,12 +2544,17 @@ void free_hot_cold_page(struct page *page, bool cold)
  23483. pcp->count++;
  23484. if (pcp->count >= pcp->high) {
  23485. unsigned long batch = READ_ONCE(pcp->batch);
  23486. - free_pcppages_bulk(zone, batch, pcp);
  23487. + LIST_HEAD(dst);
  23488. +
  23489. + isolate_pcp_pages(batch, pcp, &dst);
  23490. pcp->count -= batch;
  23491. + local_unlock_irqrestore(pa_lock, flags);
  23492. + free_pcppages_bulk(zone, batch, &dst);
  23493. + return;
  23494. }
  23495. out:
  23496. - local_irq_restore(flags);
  23497. + local_unlock_irqrestore(pa_lock, flags);
  23498. }
  23499. /*
  23500. @@ -2629,7 +2689,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
  23501. struct per_cpu_pages *pcp;
  23502. struct list_head *list;
  23503. - local_irq_save(flags);
  23504. + local_lock_irqsave(pa_lock, flags);
  23505. do {
  23506. pcp = &this_cpu_ptr(zone->pageset)->pcp;
  23507. list = &pcp->lists[migratetype];
  23508. @@ -2656,7 +2716,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
  23509. * allocate greater than order-1 page units with __GFP_NOFAIL.
  23510. */
  23511. WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
  23512. - spin_lock_irqsave(&zone->lock, flags);
  23513. + local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
  23514. do {
  23515. page = NULL;
  23516. @@ -2668,22 +2728,24 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
  23517. if (!page)
  23518. page = __rmqueue(zone, order, migratetype);
  23519. } while (page && check_new_pages(page, order));
  23520. - spin_unlock(&zone->lock);
  23521. - if (!page)
  23522. + if (!page) {
  23523. + spin_unlock(&zone->lock);
  23524. goto failed;
  23525. + }
  23526. __mod_zone_freepage_state(zone, -(1 << order),
  23527. get_pcppage_migratetype(page));
  23528. + spin_unlock(&zone->lock);
  23529. }
  23530. __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
  23531. zone_statistics(preferred_zone, zone, gfp_flags);
  23532. - local_irq_restore(flags);
  23533. + local_unlock_irqrestore(pa_lock, flags);
  23534. VM_BUG_ON_PAGE(bad_range(zone, page), page);
  23535. return page;
  23536. failed:
  23537. - local_irq_restore(flags);
  23538. + local_unlock_irqrestore(pa_lock, flags);
  23539. return NULL;
  23540. }
  23541. @@ -6561,7 +6623,9 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
  23542. int cpu = (unsigned long)hcpu;
  23543. if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
  23544. + local_lock_irq_on(swapvec_lock, cpu);
  23545. lru_add_drain_cpu(cpu);
  23546. + local_unlock_irq_on(swapvec_lock, cpu);
  23547. drain_pages(cpu);
  23548. /*
  23549. @@ -6587,6 +6651,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
  23550. void __init page_alloc_init(void)
  23551. {
  23552. hotcpu_notifier(page_alloc_cpu_notify, 0);
  23553. + local_irq_lock_init(pa_lock);
  23554. }
  23555. /*
  23556. @@ -7422,7 +7487,7 @@ void zone_pcp_reset(struct zone *zone)
  23557. struct per_cpu_pageset *pset;
  23558. /* avoid races with drain_pages() */
  23559. - local_irq_save(flags);
  23560. + local_lock_irqsave(pa_lock, flags);
  23561. if (zone->pageset != &boot_pageset) {
  23562. for_each_online_cpu(cpu) {
  23563. pset = per_cpu_ptr(zone->pageset, cpu);
  23564. @@ -7431,7 +7496,7 @@ void zone_pcp_reset(struct zone *zone)
  23565. free_percpu(zone->pageset);
  23566. zone->pageset = &boot_pageset;
  23567. }
  23568. - local_irq_restore(flags);
  23569. + local_unlock_irqrestore(pa_lock, flags);
  23570. }
  23571. #ifdef CONFIG_MEMORY_HOTREMOVE
  23572. diff --git a/mm/percpu.c b/mm/percpu.c
  23573. index f014cebbf405..4e739fcf91bf 100644
  23574. --- a/mm/percpu.c
  23575. +++ b/mm/percpu.c
  23576. @@ -1283,18 +1283,7 @@ void free_percpu(void __percpu *ptr)
  23577. }
  23578. EXPORT_SYMBOL_GPL(free_percpu);
  23579. -/**
  23580. - * is_kernel_percpu_address - test whether address is from static percpu area
  23581. - * @addr: address to test
  23582. - *
  23583. - * Test whether @addr belongs to in-kernel static percpu area. Module
  23584. - * static percpu areas are not considered. For those, use
  23585. - * is_module_percpu_address().
  23586. - *
  23587. - * RETURNS:
  23588. - * %true if @addr is from in-kernel static percpu area, %false otherwise.
  23589. - */
  23590. -bool is_kernel_percpu_address(unsigned long addr)
  23591. +bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr)
  23592. {
  23593. #ifdef CONFIG_SMP
  23594. const size_t static_size = __per_cpu_end - __per_cpu_start;
  23595. @@ -1303,15 +1292,38 @@ bool is_kernel_percpu_address(unsigned long addr)
  23596. for_each_possible_cpu(cpu) {
  23597. void *start = per_cpu_ptr(base, cpu);
  23598. + void *va = (void *)addr;
  23599. - if ((void *)addr >= start && (void *)addr < start + static_size)
  23600. + if (va >= start && va < start + static_size) {
  23601. + if (can_addr) {
  23602. + *can_addr = (unsigned long) (va - start);
  23603. + *can_addr += (unsigned long)
  23604. + per_cpu_ptr(base, get_boot_cpu_id());
  23605. + }
  23606. return true;
  23607. - }
  23608. + }
  23609. + }
  23610. #endif
  23611. /* on UP, can't distinguish from other static vars, always false */
  23612. return false;
  23613. }
  23614. +/**
  23615. + * is_kernel_percpu_address - test whether address is from static percpu area
  23616. + * @addr: address to test
  23617. + *
  23618. + * Test whether @addr belongs to in-kernel static percpu area. Module
  23619. + * static percpu areas are not considered. For those, use
  23620. + * is_module_percpu_address().
  23621. + *
  23622. + * RETURNS:
  23623. + * %true if @addr is from in-kernel static percpu area, %false otherwise.
  23624. + */
  23625. +bool is_kernel_percpu_address(unsigned long addr)
  23626. +{
  23627. + return __is_kernel_percpu_address(addr, NULL);
  23628. +}
  23629. +
  23630. /**
  23631. * per_cpu_ptr_to_phys - convert translated percpu address to physical address
  23632. * @addr: the address to be converted to physical address
  23633. diff --git a/mm/slab.h b/mm/slab.h
  23634. index ceb7d70cdb76..dfd281e43fbe 100644
  23635. --- a/mm/slab.h
  23636. +++ b/mm/slab.h
  23637. @@ -426,7 +426,11 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
  23638. * The slab lists for all objects.
  23639. */
  23640. struct kmem_cache_node {
  23641. +#ifdef CONFIG_SLUB
  23642. + raw_spinlock_t list_lock;
  23643. +#else
  23644. spinlock_t list_lock;
  23645. +#endif
  23646. #ifdef CONFIG_SLAB
  23647. struct list_head slabs_partial; /* partial list first, better asm code */
  23648. diff --git a/mm/slub.c b/mm/slub.c
  23649. index edc79ca3c6d5..67eb368b9314 100644
  23650. --- a/mm/slub.c
  23651. +++ b/mm/slub.c
  23652. @@ -1144,7 +1144,7 @@ static noinline int free_debug_processing(
  23653. unsigned long uninitialized_var(flags);
  23654. int ret = 0;
  23655. - spin_lock_irqsave(&n->list_lock, flags);
  23656. + raw_spin_lock_irqsave(&n->list_lock, flags);
  23657. slab_lock(page);
  23658. if (s->flags & SLAB_CONSISTENCY_CHECKS) {
  23659. @@ -1179,7 +1179,7 @@ static noinline int free_debug_processing(
  23660. bulk_cnt, cnt);
  23661. slab_unlock(page);
  23662. - spin_unlock_irqrestore(&n->list_lock, flags);
  23663. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  23664. if (!ret)
  23665. slab_fix(s, "Object at 0x%p not freed", object);
  23666. return ret;
  23667. @@ -1307,6 +1307,12 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node,
  23668. #endif /* CONFIG_SLUB_DEBUG */
  23669. +struct slub_free_list {
  23670. + raw_spinlock_t lock;
  23671. + struct list_head list;
  23672. +};
  23673. +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
  23674. +
  23675. /*
  23676. * Hooks for other subsystems that check memory allocations. In a typical
  23677. * production configuration these hooks all should produce no code at all.
  23678. @@ -1530,10 +1536,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
  23679. void *start, *p;
  23680. int idx, order;
  23681. bool shuffle;
  23682. + bool enableirqs = false;
  23683. flags &= gfp_allowed_mask;
  23684. if (gfpflags_allow_blocking(flags))
  23685. + enableirqs = true;
  23686. +#ifdef CONFIG_PREEMPT_RT_FULL
  23687. + if (system_state == SYSTEM_RUNNING)
  23688. + enableirqs = true;
  23689. +#endif
  23690. + if (enableirqs)
  23691. local_irq_enable();
  23692. flags |= s->allocflags;
  23693. @@ -1608,7 +1621,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
  23694. page->frozen = 1;
  23695. out:
  23696. - if (gfpflags_allow_blocking(flags))
  23697. + if (enableirqs)
  23698. local_irq_disable();
  23699. if (!page)
  23700. return NULL;
  23701. @@ -1667,6 +1680,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
  23702. __free_pages(page, order);
  23703. }
  23704. +static void free_delayed(struct list_head *h)
  23705. +{
  23706. + while(!list_empty(h)) {
  23707. + struct page *page = list_first_entry(h, struct page, lru);
  23708. +
  23709. + list_del(&page->lru);
  23710. + __free_slab(page->slab_cache, page);
  23711. + }
  23712. +}
  23713. +
  23714. #define need_reserve_slab_rcu \
  23715. (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
  23716. @@ -1698,6 +1721,12 @@ static void free_slab(struct kmem_cache *s, struct page *page)
  23717. }
  23718. call_rcu(head, rcu_free_slab);
  23719. + } else if (irqs_disabled()) {
  23720. + struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
  23721. +
  23722. + raw_spin_lock(&f->lock);
  23723. + list_add(&page->lru, &f->list);
  23724. + raw_spin_unlock(&f->lock);
  23725. } else
  23726. __free_slab(s, page);
  23727. }
  23728. @@ -1805,7 +1834,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
  23729. if (!n || !n->nr_partial)
  23730. return NULL;
  23731. - spin_lock(&n->list_lock);
  23732. + raw_spin_lock(&n->list_lock);
  23733. list_for_each_entry_safe(page, page2, &n->partial, lru) {
  23734. void *t;
  23735. @@ -1830,7 +1859,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
  23736. break;
  23737. }
  23738. - spin_unlock(&n->list_lock);
  23739. + raw_spin_unlock(&n->list_lock);
  23740. return object;
  23741. }
  23742. @@ -2076,7 +2105,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
  23743. * that acquire_slab() will see a slab page that
  23744. * is frozen
  23745. */
  23746. - spin_lock(&n->list_lock);
  23747. + raw_spin_lock(&n->list_lock);
  23748. }
  23749. } else {
  23750. m = M_FULL;
  23751. @@ -2087,7 +2116,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
  23752. * slabs from diagnostic functions will not see
  23753. * any frozen slabs.
  23754. */
  23755. - spin_lock(&n->list_lock);
  23756. + raw_spin_lock(&n->list_lock);
  23757. }
  23758. }
  23759. @@ -2122,7 +2151,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
  23760. goto redo;
  23761. if (lock)
  23762. - spin_unlock(&n->list_lock);
  23763. + raw_spin_unlock(&n->list_lock);
  23764. if (m == M_FREE) {
  23765. stat(s, DEACTIVATE_EMPTY);
  23766. @@ -2154,10 +2183,10 @@ static void unfreeze_partials(struct kmem_cache *s,
  23767. n2 = get_node(s, page_to_nid(page));
  23768. if (n != n2) {
  23769. if (n)
  23770. - spin_unlock(&n->list_lock);
  23771. + raw_spin_unlock(&n->list_lock);
  23772. n = n2;
  23773. - spin_lock(&n->list_lock);
  23774. + raw_spin_lock(&n->list_lock);
  23775. }
  23776. do {
  23777. @@ -2186,7 +2215,7 @@ static void unfreeze_partials(struct kmem_cache *s,
  23778. }
  23779. if (n)
  23780. - spin_unlock(&n->list_lock);
  23781. + raw_spin_unlock(&n->list_lock);
  23782. while (discard_page) {
  23783. page = discard_page;
  23784. @@ -2225,14 +2254,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
  23785. pobjects = oldpage->pobjects;
  23786. pages = oldpage->pages;
  23787. if (drain && pobjects > s->cpu_partial) {
  23788. + struct slub_free_list *f;
  23789. unsigned long flags;
  23790. + LIST_HEAD(tofree);
  23791. /*
  23792. * partial array is full. Move the existing
  23793. * set to the per node partial list.
  23794. */
  23795. local_irq_save(flags);
  23796. unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
  23797. + f = this_cpu_ptr(&slub_free_list);
  23798. + raw_spin_lock(&f->lock);
  23799. + list_splice_init(&f->list, &tofree);
  23800. + raw_spin_unlock(&f->lock);
  23801. local_irq_restore(flags);
  23802. + free_delayed(&tofree);
  23803. oldpage = NULL;
  23804. pobjects = 0;
  23805. pages = 0;
  23806. @@ -2304,7 +2340,22 @@ static bool has_cpu_slab(int cpu, void *info)
  23807. static void flush_all(struct kmem_cache *s)
  23808. {
  23809. + LIST_HEAD(tofree);
  23810. + int cpu;
  23811. +
  23812. on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
  23813. + for_each_online_cpu(cpu) {
  23814. + struct slub_free_list *f;
  23815. +
  23816. + if (!has_cpu_slab(cpu, s))
  23817. + continue;
  23818. +
  23819. + f = &per_cpu(slub_free_list, cpu);
  23820. + raw_spin_lock_irq(&f->lock);
  23821. + list_splice_init(&f->list, &tofree);
  23822. + raw_spin_unlock_irq(&f->lock);
  23823. + free_delayed(&tofree);
  23824. + }
  23825. }
  23826. /*
  23827. @@ -2359,10 +2410,10 @@ static unsigned long count_partial(struct kmem_cache_node *n,
  23828. unsigned long x = 0;
  23829. struct page *page;
  23830. - spin_lock_irqsave(&n->list_lock, flags);
  23831. + raw_spin_lock_irqsave(&n->list_lock, flags);
  23832. list_for_each_entry(page, &n->partial, lru)
  23833. x += get_count(page);
  23834. - spin_unlock_irqrestore(&n->list_lock, flags);
  23835. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  23836. return x;
  23837. }
  23838. #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
  23839. @@ -2500,8 +2551,10 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
  23840. * already disabled (which is the case for bulk allocation).
  23841. */
  23842. static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  23843. - unsigned long addr, struct kmem_cache_cpu *c)
  23844. + unsigned long addr, struct kmem_cache_cpu *c,
  23845. + struct list_head *to_free)
  23846. {
  23847. + struct slub_free_list *f;
  23848. void *freelist;
  23849. struct page *page;
  23850. @@ -2561,6 +2614,13 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  23851. VM_BUG_ON(!c->page->frozen);
  23852. c->freelist = get_freepointer(s, freelist);
  23853. c->tid = next_tid(c->tid);
  23854. +
  23855. +out:
  23856. + f = this_cpu_ptr(&slub_free_list);
  23857. + raw_spin_lock(&f->lock);
  23858. + list_splice_init(&f->list, to_free);
  23859. + raw_spin_unlock(&f->lock);
  23860. +
  23861. return freelist;
  23862. new_slab:
  23863. @@ -2592,7 +2652,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  23864. deactivate_slab(s, page, get_freepointer(s, freelist));
  23865. c->page = NULL;
  23866. c->freelist = NULL;
  23867. - return freelist;
  23868. + goto out;
  23869. }
  23870. /*
  23871. @@ -2604,6 +2664,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  23872. {
  23873. void *p;
  23874. unsigned long flags;
  23875. + LIST_HEAD(tofree);
  23876. local_irq_save(flags);
  23877. #ifdef CONFIG_PREEMPT
  23878. @@ -2615,8 +2676,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  23879. c = this_cpu_ptr(s->cpu_slab);
  23880. #endif
  23881. - p = ___slab_alloc(s, gfpflags, node, addr, c);
  23882. + p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
  23883. local_irq_restore(flags);
  23884. + free_delayed(&tofree);
  23885. return p;
  23886. }
  23887. @@ -2802,7 +2864,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  23888. do {
  23889. if (unlikely(n)) {
  23890. - spin_unlock_irqrestore(&n->list_lock, flags);
  23891. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  23892. n = NULL;
  23893. }
  23894. prior = page->freelist;
  23895. @@ -2834,7 +2896,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  23896. * Otherwise the list_lock will synchronize with
  23897. * other processors updating the list of slabs.
  23898. */
  23899. - spin_lock_irqsave(&n->list_lock, flags);
  23900. + raw_spin_lock_irqsave(&n->list_lock, flags);
  23901. }
  23902. }
  23903. @@ -2876,7 +2938,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  23904. add_partial(n, page, DEACTIVATE_TO_TAIL);
  23905. stat(s, FREE_ADD_PARTIAL);
  23906. }
  23907. - spin_unlock_irqrestore(&n->list_lock, flags);
  23908. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  23909. return;
  23910. slab_empty:
  23911. @@ -2891,7 +2953,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  23912. remove_full(s, n, page);
  23913. }
  23914. - spin_unlock_irqrestore(&n->list_lock, flags);
  23915. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  23916. stat(s, FREE_SLAB);
  23917. discard_slab(s, page);
  23918. }
  23919. @@ -3096,6 +3158,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
  23920. void **p)
  23921. {
  23922. struct kmem_cache_cpu *c;
  23923. + LIST_HEAD(to_free);
  23924. int i;
  23925. /* memcg and kmem_cache debug support */
  23926. @@ -3119,7 +3182,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
  23927. * of re-populating per CPU c->freelist
  23928. */
  23929. p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
  23930. - _RET_IP_, c);
  23931. + _RET_IP_, c, &to_free);
  23932. if (unlikely(!p[i]))
  23933. goto error;
  23934. @@ -3131,6 +3194,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
  23935. }
  23936. c->tid = next_tid(c->tid);
  23937. local_irq_enable();
  23938. + free_delayed(&to_free);
  23939. /* Clear memory outside IRQ disabled fastpath loop */
  23940. if (unlikely(flags & __GFP_ZERO)) {
  23941. @@ -3278,7 +3342,7 @@ static void
  23942. init_kmem_cache_node(struct kmem_cache_node *n)
  23943. {
  23944. n->nr_partial = 0;
  23945. - spin_lock_init(&n->list_lock);
  23946. + raw_spin_lock_init(&n->list_lock);
  23947. INIT_LIST_HEAD(&n->partial);
  23948. #ifdef CONFIG_SLUB_DEBUG
  23949. atomic_long_set(&n->nr_slabs, 0);
  23950. @@ -3622,6 +3686,10 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
  23951. const char *text)
  23952. {
  23953. #ifdef CONFIG_SLUB_DEBUG
  23954. +#ifdef CONFIG_PREEMPT_RT_BASE
  23955. + /* XXX move out of irq-off section */
  23956. + slab_err(s, page, text, s->name);
  23957. +#else
  23958. void *addr = page_address(page);
  23959. void *p;
  23960. unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
  23961. @@ -3642,6 +3710,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
  23962. slab_unlock(page);
  23963. kfree(map);
  23964. #endif
  23965. +#endif
  23966. }
  23967. /*
  23968. @@ -3655,7 +3724,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
  23969. struct page *page, *h;
  23970. BUG_ON(irqs_disabled());
  23971. - spin_lock_irq(&n->list_lock);
  23972. + raw_spin_lock_irq(&n->list_lock);
  23973. list_for_each_entry_safe(page, h, &n->partial, lru) {
  23974. if (!page->inuse) {
  23975. remove_partial(n, page);
  23976. @@ -3665,7 +3734,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
  23977. "Objects remaining in %s on __kmem_cache_shutdown()");
  23978. }
  23979. }
  23980. - spin_unlock_irq(&n->list_lock);
  23981. + raw_spin_unlock_irq(&n->list_lock);
  23982. list_for_each_entry_safe(page, h, &discard, lru)
  23983. discard_slab(s, page);
  23984. @@ -3908,7 +3977,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
  23985. for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
  23986. INIT_LIST_HEAD(promote + i);
  23987. - spin_lock_irqsave(&n->list_lock, flags);
  23988. + raw_spin_lock_irqsave(&n->list_lock, flags);
  23989. /*
  23990. * Build lists of slabs to discard or promote.
  23991. @@ -3939,7 +4008,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
  23992. for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
  23993. list_splice(promote + i, &n->partial);
  23994. - spin_unlock_irqrestore(&n->list_lock, flags);
  23995. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  23996. /* Release empty slabs */
  23997. list_for_each_entry_safe(page, t, &discard, lru)
  23998. @@ -4115,6 +4184,12 @@ void __init kmem_cache_init(void)
  23999. {
  24000. static __initdata struct kmem_cache boot_kmem_cache,
  24001. boot_kmem_cache_node;
  24002. + int cpu;
  24003. +
  24004. + for_each_possible_cpu(cpu) {
  24005. + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
  24006. + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
  24007. + }
  24008. if (debug_guardpage_minorder())
  24009. slub_max_order = 0;
  24010. @@ -4323,7 +4398,7 @@ static int validate_slab_node(struct kmem_cache *s,
  24011. struct page *page;
  24012. unsigned long flags;
  24013. - spin_lock_irqsave(&n->list_lock, flags);
  24014. + raw_spin_lock_irqsave(&n->list_lock, flags);
  24015. list_for_each_entry(page, &n->partial, lru) {
  24016. validate_slab_slab(s, page, map);
  24017. @@ -4345,7 +4420,7 @@ static int validate_slab_node(struct kmem_cache *s,
  24018. s->name, count, atomic_long_read(&n->nr_slabs));
  24019. out:
  24020. - spin_unlock_irqrestore(&n->list_lock, flags);
  24021. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  24022. return count;
  24023. }
  24024. @@ -4533,12 +4608,12 @@ static int list_locations(struct kmem_cache *s, char *buf,
  24025. if (!atomic_long_read(&n->nr_slabs))
  24026. continue;
  24027. - spin_lock_irqsave(&n->list_lock, flags);
  24028. + raw_spin_lock_irqsave(&n->list_lock, flags);
  24029. list_for_each_entry(page, &n->partial, lru)
  24030. process_slab(&t, s, page, alloc, map);
  24031. list_for_each_entry(page, &n->full, lru)
  24032. process_slab(&t, s, page, alloc, map);
  24033. - spin_unlock_irqrestore(&n->list_lock, flags);
  24034. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  24035. }
  24036. for (i = 0; i < t.count; i++) {
  24037. diff --git a/mm/swap.c b/mm/swap.c
  24038. index 4dcf852e1e6d..69c3a5b24060 100644
  24039. --- a/mm/swap.c
  24040. +++ b/mm/swap.c
  24041. @@ -32,6 +32,7 @@
  24042. #include <linux/memcontrol.h>
  24043. #include <linux/gfp.h>
  24044. #include <linux/uio.h>
  24045. +#include <linux/locallock.h>
  24046. #include <linux/hugetlb.h>
  24047. #include <linux/page_idle.h>
  24048. @@ -50,6 +51,8 @@ static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
  24049. #ifdef CONFIG_SMP
  24050. static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
  24051. #endif
  24052. +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
  24053. +DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
  24054. /*
  24055. * This path almost never happens for VM activity - pages are normally
  24056. @@ -240,11 +243,11 @@ void rotate_reclaimable_page(struct page *page)
  24057. unsigned long flags;
  24058. get_page(page);
  24059. - local_irq_save(flags);
  24060. + local_lock_irqsave(rotate_lock, flags);
  24061. pvec = this_cpu_ptr(&lru_rotate_pvecs);
  24062. if (!pagevec_add(pvec, page) || PageCompound(page))
  24063. pagevec_move_tail(pvec);
  24064. - local_irq_restore(flags);
  24065. + local_unlock_irqrestore(rotate_lock, flags);
  24066. }
  24067. }
  24068. @@ -294,12 +297,13 @@ void activate_page(struct page *page)
  24069. {
  24070. page = compound_head(page);
  24071. if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  24072. - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
  24073. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  24074. + activate_page_pvecs);
  24075. get_page(page);
  24076. if (!pagevec_add(pvec, page) || PageCompound(page))
  24077. pagevec_lru_move_fn(pvec, __activate_page, NULL);
  24078. - put_cpu_var(activate_page_pvecs);
  24079. + put_locked_var(swapvec_lock, activate_page_pvecs);
  24080. }
  24081. }
  24082. @@ -326,7 +330,7 @@ void activate_page(struct page *page)
  24083. static void __lru_cache_activate_page(struct page *page)
  24084. {
  24085. - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  24086. + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
  24087. int i;
  24088. /*
  24089. @@ -348,7 +352,7 @@ static void __lru_cache_activate_page(struct page *page)
  24090. }
  24091. }
  24092. - put_cpu_var(lru_add_pvec);
  24093. + put_locked_var(swapvec_lock, lru_add_pvec);
  24094. }
  24095. /*
  24096. @@ -390,12 +394,12 @@ EXPORT_SYMBOL(mark_page_accessed);
  24097. static void __lru_cache_add(struct page *page)
  24098. {
  24099. - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  24100. + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
  24101. get_page(page);
  24102. if (!pagevec_add(pvec, page) || PageCompound(page))
  24103. __pagevec_lru_add(pvec);
  24104. - put_cpu_var(lru_add_pvec);
  24105. + put_locked_var(swapvec_lock, lru_add_pvec);
  24106. }
  24107. /**
  24108. @@ -593,9 +597,15 @@ void lru_add_drain_cpu(int cpu)
  24109. unsigned long flags;
  24110. /* No harm done if a racing interrupt already did this */
  24111. - local_irq_save(flags);
  24112. +#ifdef CONFIG_PREEMPT_RT_BASE
  24113. + local_lock_irqsave_on(rotate_lock, flags, cpu);
  24114. pagevec_move_tail(pvec);
  24115. - local_irq_restore(flags);
  24116. + local_unlock_irqrestore_on(rotate_lock, flags, cpu);
  24117. +#else
  24118. + local_lock_irqsave(rotate_lock, flags);
  24119. + pagevec_move_tail(pvec);
  24120. + local_unlock_irqrestore(rotate_lock, flags);
  24121. +#endif
  24122. }
  24123. pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
  24124. @@ -627,11 +637,12 @@ void deactivate_file_page(struct page *page)
  24125. return;
  24126. if (likely(get_page_unless_zero(page))) {
  24127. - struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
  24128. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  24129. + lru_deactivate_file_pvecs);
  24130. if (!pagevec_add(pvec, page) || PageCompound(page))
  24131. pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
  24132. - put_cpu_var(lru_deactivate_file_pvecs);
  24133. + put_locked_var(swapvec_lock, lru_deactivate_file_pvecs);
  24134. }
  24135. }
  24136. @@ -646,27 +657,31 @@ void deactivate_file_page(struct page *page)
  24137. void deactivate_page(struct page *page)
  24138. {
  24139. if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
  24140. - struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
  24141. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  24142. + lru_deactivate_pvecs);
  24143. get_page(page);
  24144. if (!pagevec_add(pvec, page) || PageCompound(page))
  24145. pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
  24146. - put_cpu_var(lru_deactivate_pvecs);
  24147. + put_locked_var(swapvec_lock, lru_deactivate_pvecs);
  24148. }
  24149. }
  24150. void lru_add_drain(void)
  24151. {
  24152. - lru_add_drain_cpu(get_cpu());
  24153. - put_cpu();
  24154. + lru_add_drain_cpu(local_lock_cpu(swapvec_lock));
  24155. + local_unlock_cpu(swapvec_lock);
  24156. }
  24157. -static void lru_add_drain_per_cpu(struct work_struct *dummy)
  24158. +#ifdef CONFIG_PREEMPT_RT_BASE
  24159. +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
  24160. {
  24161. - lru_add_drain();
  24162. + local_lock_on(swapvec_lock, cpu);
  24163. + lru_add_drain_cpu(cpu);
  24164. + local_unlock_on(swapvec_lock, cpu);
  24165. }
  24166. -static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
  24167. +#else
  24168. /*
  24169. * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
  24170. @@ -686,6 +701,22 @@ static int __init lru_init(void)
  24171. }
  24172. early_initcall(lru_init);
  24173. +static void lru_add_drain_per_cpu(struct work_struct *dummy)
  24174. +{
  24175. + lru_add_drain();
  24176. +}
  24177. +
  24178. +static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
  24179. +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
  24180. +{
  24181. + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  24182. +
  24183. + INIT_WORK(work, lru_add_drain_per_cpu);
  24184. + queue_work_on(cpu, lru_add_drain_wq, work);
  24185. + cpumask_set_cpu(cpu, has_work);
  24186. +}
  24187. +#endif
  24188. +
  24189. void lru_add_drain_all(void)
  24190. {
  24191. static DEFINE_MUTEX(lock);
  24192. @@ -697,21 +728,18 @@ void lru_add_drain_all(void)
  24193. cpumask_clear(&has_work);
  24194. for_each_online_cpu(cpu) {
  24195. - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  24196. -
  24197. if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
  24198. pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
  24199. pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
  24200. pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
  24201. - need_activate_page_drain(cpu)) {
  24202. - INIT_WORK(work, lru_add_drain_per_cpu);
  24203. - queue_work_on(cpu, lru_add_drain_wq, work);
  24204. - cpumask_set_cpu(cpu, &has_work);
  24205. - }
  24206. + need_activate_page_drain(cpu))
  24207. + remote_lru_add_drain(cpu, &has_work);
  24208. }
  24209. +#ifndef CONFIG_PREEMPT_RT_BASE
  24210. for_each_cpu(cpu, &has_work)
  24211. flush_work(&per_cpu(lru_add_drain_work, cpu));
  24212. +#endif
  24213. put_online_cpus();
  24214. mutex_unlock(&lock);
  24215. diff --git a/mm/truncate.c b/mm/truncate.c
  24216. index 9c809e7d73c3..b7681e888ba0 100644
  24217. --- a/mm/truncate.c
  24218. +++ b/mm/truncate.c
  24219. @@ -62,9 +62,12 @@ static void clear_exceptional_entry(struct address_space *mapping,
  24220. * protected by mapping->tree_lock.
  24221. */
  24222. if (!workingset_node_shadows(node) &&
  24223. - !list_empty(&node->private_list))
  24224. - list_lru_del(&workingset_shadow_nodes,
  24225. + !list_empty(&node->private_list)) {
  24226. + local_lock(workingset_shadow_lock);
  24227. + list_lru_del(&__workingset_shadow_nodes,
  24228. &node->private_list);
  24229. + local_unlock(workingset_shadow_lock);
  24230. + }
  24231. __radix_tree_delete_node(&mapping->page_tree, node);
  24232. unlock:
  24233. spin_unlock_irq(&mapping->tree_lock);
  24234. diff --git a/mm/vmalloc.c b/mm/vmalloc.c
  24235. index 195de42bea1f..b46cb686fde7 100644
  24236. --- a/mm/vmalloc.c
  24237. +++ b/mm/vmalloc.c
  24238. @@ -855,7 +855,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
  24239. struct vmap_block *vb;
  24240. struct vmap_area *va;
  24241. unsigned long vb_idx;
  24242. - int node, err;
  24243. + int node, err, cpu;
  24244. void *vaddr;
  24245. node = numa_node_id();
  24246. @@ -898,11 +898,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
  24247. BUG_ON(err);
  24248. radix_tree_preload_end();
  24249. - vbq = &get_cpu_var(vmap_block_queue);
  24250. + cpu = get_cpu_light();
  24251. + vbq = this_cpu_ptr(&vmap_block_queue);
  24252. spin_lock(&vbq->lock);
  24253. list_add_tail_rcu(&vb->free_list, &vbq->free);
  24254. spin_unlock(&vbq->lock);
  24255. - put_cpu_var(vmap_block_queue);
  24256. + put_cpu_light();
  24257. return vaddr;
  24258. }
  24259. @@ -971,6 +972,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  24260. struct vmap_block *vb;
  24261. void *vaddr = NULL;
  24262. unsigned int order;
  24263. + int cpu;
  24264. BUG_ON(offset_in_page(size));
  24265. BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
  24266. @@ -985,7 +987,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  24267. order = get_order(size);
  24268. rcu_read_lock();
  24269. - vbq = &get_cpu_var(vmap_block_queue);
  24270. + cpu = get_cpu_light();
  24271. + vbq = this_cpu_ptr(&vmap_block_queue);
  24272. list_for_each_entry_rcu(vb, &vbq->free, free_list) {
  24273. unsigned long pages_off;
  24274. @@ -1008,7 +1011,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  24275. break;
  24276. }
  24277. - put_cpu_var(vmap_block_queue);
  24278. + put_cpu_light();
  24279. rcu_read_unlock();
  24280. /* Allocate new block if nothing was found */
  24281. diff --git a/mm/vmstat.c b/mm/vmstat.c
  24282. index 6a088df04b29..abda95be88b4 100644
  24283. --- a/mm/vmstat.c
  24284. +++ b/mm/vmstat.c
  24285. @@ -245,6 +245,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
  24286. long x;
  24287. long t;
  24288. + preempt_disable_rt();
  24289. x = delta + __this_cpu_read(*p);
  24290. t = __this_cpu_read(pcp->stat_threshold);
  24291. @@ -254,6 +255,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
  24292. x = 0;
  24293. }
  24294. __this_cpu_write(*p, x);
  24295. + preempt_enable_rt();
  24296. }
  24297. EXPORT_SYMBOL(__mod_zone_page_state);
  24298. @@ -265,6 +267,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
  24299. long x;
  24300. long t;
  24301. + preempt_disable_rt();
  24302. x = delta + __this_cpu_read(*p);
  24303. t = __this_cpu_read(pcp->stat_threshold);
  24304. @@ -274,6 +277,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
  24305. x = 0;
  24306. }
  24307. __this_cpu_write(*p, x);
  24308. + preempt_enable_rt();
  24309. }
  24310. EXPORT_SYMBOL(__mod_node_page_state);
  24311. @@ -306,6 +310,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
  24312. s8 __percpu *p = pcp->vm_stat_diff + item;
  24313. s8 v, t;
  24314. + preempt_disable_rt();
  24315. v = __this_cpu_inc_return(*p);
  24316. t = __this_cpu_read(pcp->stat_threshold);
  24317. if (unlikely(v > t)) {
  24318. @@ -314,6 +319,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
  24319. zone_page_state_add(v + overstep, zone, item);
  24320. __this_cpu_write(*p, -overstep);
  24321. }
  24322. + preempt_enable_rt();
  24323. }
  24324. void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
  24325. @@ -322,6 +328,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
  24326. s8 __percpu *p = pcp->vm_node_stat_diff + item;
  24327. s8 v, t;
  24328. + preempt_disable_rt();
  24329. v = __this_cpu_inc_return(*p);
  24330. t = __this_cpu_read(pcp->stat_threshold);
  24331. if (unlikely(v > t)) {
  24332. @@ -330,6 +337,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
  24333. node_page_state_add(v + overstep, pgdat, item);
  24334. __this_cpu_write(*p, -overstep);
  24335. }
  24336. + preempt_enable_rt();
  24337. }
  24338. void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
  24339. @@ -350,6 +358,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
  24340. s8 __percpu *p = pcp->vm_stat_diff + item;
  24341. s8 v, t;
  24342. + preempt_disable_rt();
  24343. v = __this_cpu_dec_return(*p);
  24344. t = __this_cpu_read(pcp->stat_threshold);
  24345. if (unlikely(v < - t)) {
  24346. @@ -358,6 +367,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
  24347. zone_page_state_add(v - overstep, zone, item);
  24348. __this_cpu_write(*p, overstep);
  24349. }
  24350. + preempt_enable_rt();
  24351. }
  24352. void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
  24353. @@ -366,6 +376,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
  24354. s8 __percpu *p = pcp->vm_node_stat_diff + item;
  24355. s8 v, t;
  24356. + preempt_disable_rt();
  24357. v = __this_cpu_dec_return(*p);
  24358. t = __this_cpu_read(pcp->stat_threshold);
  24359. if (unlikely(v < - t)) {
  24360. @@ -374,6 +385,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
  24361. node_page_state_add(v - overstep, pgdat, item);
  24362. __this_cpu_write(*p, overstep);
  24363. }
  24364. + preempt_enable_rt();
  24365. }
  24366. void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
  24367. diff --git a/mm/workingset.c b/mm/workingset.c
  24368. index 4c4f05655e6e..b97b1e87b54c 100644
  24369. --- a/mm/workingset.c
  24370. +++ b/mm/workingset.c
  24371. @@ -334,7 +334,8 @@ void workingset_activation(struct page *page)
  24372. * point where they would still be useful.
  24373. */
  24374. -struct list_lru workingset_shadow_nodes;
  24375. +struct list_lru __workingset_shadow_nodes;
  24376. +DEFINE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
  24377. static unsigned long count_shadow_nodes(struct shrinker *shrinker,
  24378. struct shrink_control *sc)
  24379. @@ -344,9 +345,9 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
  24380. unsigned long pages;
  24381. /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
  24382. - local_irq_disable();
  24383. - shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc);
  24384. - local_irq_enable();
  24385. + local_lock_irq(workingset_shadow_lock);
  24386. + shadow_nodes = list_lru_shrink_count(&__workingset_shadow_nodes, sc);
  24387. + local_unlock_irq(workingset_shadow_lock);
  24388. if (sc->memcg) {
  24389. pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
  24390. @@ -438,9 +439,9 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
  24391. spin_unlock(&mapping->tree_lock);
  24392. ret = LRU_REMOVED_RETRY;
  24393. out:
  24394. - local_irq_enable();
  24395. + local_unlock_irq(workingset_shadow_lock);
  24396. cond_resched();
  24397. - local_irq_disable();
  24398. + local_lock_irq(workingset_shadow_lock);
  24399. spin_lock(lru_lock);
  24400. return ret;
  24401. }
  24402. @@ -451,10 +452,10 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
  24403. unsigned long ret;
  24404. /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
  24405. - local_irq_disable();
  24406. - ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc,
  24407. + local_lock_irq(workingset_shadow_lock);
  24408. + ret = list_lru_shrink_walk(&__workingset_shadow_nodes, sc,
  24409. shadow_lru_isolate, NULL);
  24410. - local_irq_enable();
  24411. + local_unlock_irq(workingset_shadow_lock);
  24412. return ret;
  24413. }
  24414. @@ -492,7 +493,7 @@ static int __init workingset_init(void)
  24415. pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
  24416. timestamp_bits, max_order, bucket_order);
  24417. - ret = __list_lru_init(&workingset_shadow_nodes, true, &shadow_nodes_key);
  24418. + ret = __list_lru_init(&__workingset_shadow_nodes, true, &shadow_nodes_key);
  24419. if (ret)
  24420. goto err;
  24421. ret = register_shrinker(&workingset_shadow_shrinker);
  24422. @@ -500,7 +501,7 @@ static int __init workingset_init(void)
  24423. goto err_list_lru;
  24424. return 0;
  24425. err_list_lru:
  24426. - list_lru_destroy(&workingset_shadow_nodes);
  24427. + list_lru_destroy(&__workingset_shadow_nodes);
  24428. err:
  24429. return ret;
  24430. }
  24431. diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
  24432. index d3548c48369f..8894f0749d8d 100644
  24433. --- a/mm/zsmalloc.c
  24434. +++ b/mm/zsmalloc.c
  24435. @@ -53,6 +53,7 @@
  24436. #include <linux/mount.h>
  24437. #include <linux/migrate.h>
  24438. #include <linux/pagemap.h>
  24439. +#include <linux/locallock.h>
  24440. #define ZSPAGE_MAGIC 0x58
  24441. @@ -70,9 +71,22 @@
  24442. */
  24443. #define ZS_MAX_ZSPAGE_ORDER 2
  24444. #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
  24445. -
  24446. #define ZS_HANDLE_SIZE (sizeof(unsigned long))
  24447. +#ifdef CONFIG_PREEMPT_RT_FULL
  24448. +
  24449. +struct zsmalloc_handle {
  24450. + unsigned long addr;
  24451. + struct mutex lock;
  24452. +};
  24453. +
  24454. +#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
  24455. +
  24456. +#else
  24457. +
  24458. +#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long))
  24459. +#endif
  24460. +
  24461. /*
  24462. * Object location (<PFN>, <obj_idx>) is encoded as
  24463. * as single (unsigned long) handle value.
  24464. @@ -327,7 +341,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
  24465. static int create_cache(struct zs_pool *pool)
  24466. {
  24467. - pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
  24468. + pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE,
  24469. 0, 0, NULL);
  24470. if (!pool->handle_cachep)
  24471. return 1;
  24472. @@ -351,10 +365,27 @@ static void destroy_cache(struct zs_pool *pool)
  24473. static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
  24474. {
  24475. - return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
  24476. - gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
  24477. + void *p;
  24478. +
  24479. + p = kmem_cache_alloc(pool->handle_cachep,
  24480. + gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
  24481. +#ifdef CONFIG_PREEMPT_RT_FULL
  24482. + if (p) {
  24483. + struct zsmalloc_handle *zh = p;
  24484. +
  24485. + mutex_init(&zh->lock);
  24486. + }
  24487. +#endif
  24488. + return (unsigned long)p;
  24489. }
  24490. +#ifdef CONFIG_PREEMPT_RT_FULL
  24491. +static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
  24492. +{
  24493. + return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1));
  24494. +}
  24495. +#endif
  24496. +
  24497. static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
  24498. {
  24499. kmem_cache_free(pool->handle_cachep, (void *)handle);
  24500. @@ -373,12 +404,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
  24501. static void record_obj(unsigned long handle, unsigned long obj)
  24502. {
  24503. +#ifdef CONFIG_PREEMPT_RT_FULL
  24504. + struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
  24505. +
  24506. + WRITE_ONCE(zh->addr, obj);
  24507. +#else
  24508. /*
  24509. * lsb of @obj represents handle lock while other bits
  24510. * represent object value the handle is pointing so
  24511. * updating shouldn't do store tearing.
  24512. */
  24513. WRITE_ONCE(*(unsigned long *)handle, obj);
  24514. +#endif
  24515. }
  24516. /* zpool driver */
  24517. @@ -467,6 +504,7 @@ MODULE_ALIAS("zpool-zsmalloc");
  24518. /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
  24519. static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
  24520. +static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock);
  24521. static bool is_zspage_isolated(struct zspage *zspage)
  24522. {
  24523. @@ -902,7 +940,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
  24524. static unsigned long handle_to_obj(unsigned long handle)
  24525. {
  24526. +#ifdef CONFIG_PREEMPT_RT_FULL
  24527. + struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
  24528. +
  24529. + return zh->addr;
  24530. +#else
  24531. return *(unsigned long *)handle;
  24532. +#endif
  24533. }
  24534. static unsigned long obj_to_head(struct page *page, void *obj)
  24535. @@ -916,22 +960,46 @@ static unsigned long obj_to_head(struct page *page, void *obj)
  24536. static inline int testpin_tag(unsigned long handle)
  24537. {
  24538. +#ifdef CONFIG_PREEMPT_RT_FULL
  24539. + struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
  24540. +
  24541. + return mutex_is_locked(&zh->lock);
  24542. +#else
  24543. return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
  24544. +#endif
  24545. }
  24546. static inline int trypin_tag(unsigned long handle)
  24547. {
  24548. +#ifdef CONFIG_PREEMPT_RT_FULL
  24549. + struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
  24550. +
  24551. + return mutex_trylock(&zh->lock);
  24552. +#else
  24553. return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
  24554. +#endif
  24555. }
  24556. static void pin_tag(unsigned long handle)
  24557. {
  24558. +#ifdef CONFIG_PREEMPT_RT_FULL
  24559. + struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
  24560. +
  24561. + return mutex_lock(&zh->lock);
  24562. +#else
  24563. bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
  24564. +#endif
  24565. }
  24566. static void unpin_tag(unsigned long handle)
  24567. {
  24568. +#ifdef CONFIG_PREEMPT_RT_FULL
  24569. + struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
  24570. +
  24571. + return mutex_unlock(&zh->lock);
  24572. +#else
  24573. bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
  24574. +#endif
  24575. }
  24576. static void reset_page(struct page *page)
  24577. @@ -1423,7 +1491,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
  24578. class = pool->size_class[class_idx];
  24579. off = (class->size * obj_idx) & ~PAGE_MASK;
  24580. - area = &get_cpu_var(zs_map_area);
  24581. + area = &get_locked_var(zs_map_area_lock, zs_map_area);
  24582. area->vm_mm = mm;
  24583. if (off + class->size <= PAGE_SIZE) {
  24584. /* this object is contained entirely within a page */
  24585. @@ -1477,7 +1545,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
  24586. __zs_unmap_object(area, pages, off, class->size);
  24587. }
  24588. - put_cpu_var(zs_map_area);
  24589. + put_locked_var(zs_map_area_lock, zs_map_area);
  24590. migrate_read_unlock(zspage);
  24591. unpin_tag(handle);
  24592. diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
  24593. index c88a6007e643..5de85b55a821 100644
  24594. --- a/net/bluetooth/hci_sock.c
  24595. +++ b/net/bluetooth/hci_sock.c
  24596. @@ -251,15 +251,13 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
  24597. }
  24598. /* Send frame to sockets with specific channel */
  24599. -void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
  24600. - int flag, struct sock *skip_sk)
  24601. +static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
  24602. + int flag, struct sock *skip_sk)
  24603. {
  24604. struct sock *sk;
  24605. BT_DBG("channel %u len %d", channel, skb->len);
  24606. - read_lock(&hci_sk_list.lock);
  24607. -
  24608. sk_for_each(sk, &hci_sk_list.head) {
  24609. struct sk_buff *nskb;
  24610. @@ -285,6 +283,13 @@ void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
  24611. kfree_skb(nskb);
  24612. }
  24613. +}
  24614. +
  24615. +void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
  24616. + int flag, struct sock *skip_sk)
  24617. +{
  24618. + read_lock(&hci_sk_list.lock);
  24619. + __hci_send_to_channel(channel, skb, flag, skip_sk);
  24620. read_unlock(&hci_sk_list.lock);
  24621. }
  24622. @@ -388,8 +393,8 @@ void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event,
  24623. hdr->index = index;
  24624. hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
  24625. - hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
  24626. - HCI_SOCK_TRUSTED, NULL);
  24627. + __hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
  24628. + HCI_SOCK_TRUSTED, NULL);
  24629. kfree_skb(skb);
  24630. }
  24631. diff --git a/net/core/dev.c b/net/core/dev.c
  24632. index 09007a71c8dd..6cb279747408 100644
  24633. --- a/net/core/dev.c
  24634. +++ b/net/core/dev.c
  24635. @@ -190,6 +190,7 @@ static unsigned int napi_gen_id = NR_CPUS;
  24636. static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
  24637. static seqcount_t devnet_rename_seq;
  24638. +static DEFINE_MUTEX(devnet_rename_mutex);
  24639. static inline void dev_base_seq_inc(struct net *net)
  24640. {
  24641. @@ -211,14 +212,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
  24642. static inline void rps_lock(struct softnet_data *sd)
  24643. {
  24644. #ifdef CONFIG_RPS
  24645. - spin_lock(&sd->input_pkt_queue.lock);
  24646. + raw_spin_lock(&sd->input_pkt_queue.raw_lock);
  24647. #endif
  24648. }
  24649. static inline void rps_unlock(struct softnet_data *sd)
  24650. {
  24651. #ifdef CONFIG_RPS
  24652. - spin_unlock(&sd->input_pkt_queue.lock);
  24653. + raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
  24654. #endif
  24655. }
  24656. @@ -888,7 +889,8 @@ int netdev_get_name(struct net *net, char *name, int ifindex)
  24657. strcpy(name, dev->name);
  24658. rcu_read_unlock();
  24659. if (read_seqcount_retry(&devnet_rename_seq, seq)) {
  24660. - cond_resched();
  24661. + mutex_lock(&devnet_rename_mutex);
  24662. + mutex_unlock(&devnet_rename_mutex);
  24663. goto retry;
  24664. }
  24665. @@ -1157,20 +1159,17 @@ int dev_change_name(struct net_device *dev, const char *newname)
  24666. if (dev->flags & IFF_UP)
  24667. return -EBUSY;
  24668. - write_seqcount_begin(&devnet_rename_seq);
  24669. + mutex_lock(&devnet_rename_mutex);
  24670. + __raw_write_seqcount_begin(&devnet_rename_seq);
  24671. - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
  24672. - write_seqcount_end(&devnet_rename_seq);
  24673. - return 0;
  24674. - }
  24675. + if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
  24676. + goto outunlock;
  24677. memcpy(oldname, dev->name, IFNAMSIZ);
  24678. err = dev_get_valid_name(net, dev, newname);
  24679. - if (err < 0) {
  24680. - write_seqcount_end(&devnet_rename_seq);
  24681. - return err;
  24682. - }
  24683. + if (err < 0)
  24684. + goto outunlock;
  24685. if (oldname[0] && !strchr(oldname, '%'))
  24686. netdev_info(dev, "renamed from %s\n", oldname);
  24687. @@ -1183,11 +1182,12 @@ int dev_change_name(struct net_device *dev, const char *newname)
  24688. if (ret) {
  24689. memcpy(dev->name, oldname, IFNAMSIZ);
  24690. dev->name_assign_type = old_assign_type;
  24691. - write_seqcount_end(&devnet_rename_seq);
  24692. - return ret;
  24693. + err = ret;
  24694. + goto outunlock;
  24695. }
  24696. - write_seqcount_end(&devnet_rename_seq);
  24697. + __raw_write_seqcount_end(&devnet_rename_seq);
  24698. + mutex_unlock(&devnet_rename_mutex);
  24699. netdev_adjacent_rename_links(dev, oldname);
  24700. @@ -1208,7 +1208,8 @@ int dev_change_name(struct net_device *dev, const char *newname)
  24701. /* err >= 0 after dev_alloc_name() or stores the first errno */
  24702. if (err >= 0) {
  24703. err = ret;
  24704. - write_seqcount_begin(&devnet_rename_seq);
  24705. + mutex_lock(&devnet_rename_mutex);
  24706. + __raw_write_seqcount_begin(&devnet_rename_seq);
  24707. memcpy(dev->name, oldname, IFNAMSIZ);
  24708. memcpy(oldname, newname, IFNAMSIZ);
  24709. dev->name_assign_type = old_assign_type;
  24710. @@ -1221,6 +1222,11 @@ int dev_change_name(struct net_device *dev, const char *newname)
  24711. }
  24712. return err;
  24713. +
  24714. +outunlock:
  24715. + __raw_write_seqcount_end(&devnet_rename_seq);
  24716. + mutex_unlock(&devnet_rename_mutex);
  24717. + return err;
  24718. }
  24719. /**
  24720. @@ -2287,6 +2293,7 @@ static void __netif_reschedule(struct Qdisc *q)
  24721. sd->output_queue_tailp = &q->next_sched;
  24722. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  24723. local_irq_restore(flags);
  24724. + preempt_check_resched_rt();
  24725. }
  24726. void __netif_schedule(struct Qdisc *q)
  24727. @@ -2371,6 +2378,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
  24728. __this_cpu_write(softnet_data.completion_queue, skb);
  24729. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  24730. local_irq_restore(flags);
  24731. + preempt_check_resched_rt();
  24732. }
  24733. EXPORT_SYMBOL(__dev_kfree_skb_irq);
  24734. @@ -3112,7 +3120,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
  24735. * This permits qdisc->running owner to get the lock more
  24736. * often and dequeue packets faster.
  24737. */
  24738. +#ifdef CONFIG_PREEMPT_RT_FULL
  24739. + contended = true;
  24740. +#else
  24741. contended = qdisc_is_running(q);
  24742. +#endif
  24743. if (unlikely(contended))
  24744. spin_lock(&q->busylock);
  24745. @@ -3175,8 +3187,10 @@ static void skb_update_prio(struct sk_buff *skb)
  24746. #define skb_update_prio(skb)
  24747. #endif
  24748. +#ifndef CONFIG_PREEMPT_RT_FULL
  24749. DEFINE_PER_CPU(int, xmit_recursion);
  24750. EXPORT_SYMBOL(xmit_recursion);
  24751. +#endif
  24752. /**
  24753. * dev_loopback_xmit - loop back @skb
  24754. @@ -3410,8 +3424,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
  24755. int cpu = smp_processor_id(); /* ok because BHs are off */
  24756. if (txq->xmit_lock_owner != cpu) {
  24757. - if (unlikely(__this_cpu_read(xmit_recursion) >
  24758. - XMIT_RECURSION_LIMIT))
  24759. + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT))
  24760. goto recursion_alert;
  24761. skb = validate_xmit_skb(skb, dev);
  24762. @@ -3421,9 +3434,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
  24763. HARD_TX_LOCK(dev, txq, cpu);
  24764. if (!netif_xmit_stopped(txq)) {
  24765. - __this_cpu_inc(xmit_recursion);
  24766. + xmit_rec_inc();
  24767. skb = dev_hard_start_xmit(skb, dev, txq, &rc);
  24768. - __this_cpu_dec(xmit_recursion);
  24769. + xmit_rec_dec();
  24770. if (dev_xmit_complete(rc)) {
  24771. HARD_TX_UNLOCK(dev, txq);
  24772. goto out;
  24773. @@ -3797,6 +3810,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
  24774. rps_unlock(sd);
  24775. local_irq_restore(flags);
  24776. + preempt_check_resched_rt();
  24777. atomic_long_inc(&skb->dev->rx_dropped);
  24778. kfree_skb(skb);
  24779. @@ -3815,7 +3829,7 @@ static int netif_rx_internal(struct sk_buff *skb)
  24780. struct rps_dev_flow voidflow, *rflow = &voidflow;
  24781. int cpu;
  24782. - preempt_disable();
  24783. + migrate_disable();
  24784. rcu_read_lock();
  24785. cpu = get_rps_cpu(skb->dev, skb, &rflow);
  24786. @@ -3825,13 +3839,13 @@ static int netif_rx_internal(struct sk_buff *skb)
  24787. ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
  24788. rcu_read_unlock();
  24789. - preempt_enable();
  24790. + migrate_enable();
  24791. } else
  24792. #endif
  24793. {
  24794. unsigned int qtail;
  24795. - ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
  24796. - put_cpu();
  24797. + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
  24798. + put_cpu_light();
  24799. }
  24800. return ret;
  24801. }
  24802. @@ -3865,11 +3879,9 @@ int netif_rx_ni(struct sk_buff *skb)
  24803. trace_netif_rx_ni_entry(skb);
  24804. - preempt_disable();
  24805. + local_bh_disable();
  24806. err = netif_rx_internal(skb);
  24807. - if (local_softirq_pending())
  24808. - do_softirq();
  24809. - preempt_enable();
  24810. + local_bh_enable();
  24811. return err;
  24812. }
  24813. @@ -4348,7 +4360,7 @@ static void flush_backlog(struct work_struct *work)
  24814. skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
  24815. if (skb->dev->reg_state == NETREG_UNREGISTERING) {
  24816. __skb_unlink(skb, &sd->input_pkt_queue);
  24817. - kfree_skb(skb);
  24818. + __skb_queue_tail(&sd->tofree_queue, skb);
  24819. input_queue_head_incr(sd);
  24820. }
  24821. }
  24822. @@ -4358,11 +4370,14 @@ static void flush_backlog(struct work_struct *work)
  24823. skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
  24824. if (skb->dev->reg_state == NETREG_UNREGISTERING) {
  24825. __skb_unlink(skb, &sd->process_queue);
  24826. - kfree_skb(skb);
  24827. + __skb_queue_tail(&sd->tofree_queue, skb);
  24828. input_queue_head_incr(sd);
  24829. }
  24830. }
  24831. + if (!skb_queue_empty(&sd->tofree_queue))
  24832. + raise_softirq_irqoff(NET_RX_SOFTIRQ);
  24833. local_bh_enable();
  24834. +
  24835. }
  24836. static void flush_all_backlogs(void)
  24837. @@ -4853,6 +4868,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
  24838. sd->rps_ipi_list = NULL;
  24839. local_irq_enable();
  24840. + preempt_check_resched_rt();
  24841. /* Send pending IPI's to kick RPS processing on remote cpus. */
  24842. while (remsd) {
  24843. @@ -4866,6 +4882,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
  24844. } else
  24845. #endif
  24846. local_irq_enable();
  24847. + preempt_check_resched_rt();
  24848. }
  24849. static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
  24850. @@ -4895,7 +4912,9 @@ static int process_backlog(struct napi_struct *napi, int quota)
  24851. while (again) {
  24852. struct sk_buff *skb;
  24853. + local_irq_disable();
  24854. while ((skb = __skb_dequeue(&sd->process_queue))) {
  24855. + local_irq_enable();
  24856. rcu_read_lock();
  24857. __netif_receive_skb(skb);
  24858. rcu_read_unlock();
  24859. @@ -4903,9 +4922,9 @@ static int process_backlog(struct napi_struct *napi, int quota)
  24860. if (++work >= quota)
  24861. return work;
  24862. + local_irq_disable();
  24863. }
  24864. - local_irq_disable();
  24865. rps_lock(sd);
  24866. if (skb_queue_empty(&sd->input_pkt_queue)) {
  24867. /*
  24868. @@ -4943,9 +4962,11 @@ void __napi_schedule(struct napi_struct *n)
  24869. local_irq_save(flags);
  24870. ____napi_schedule(this_cpu_ptr(&softnet_data), n);
  24871. local_irq_restore(flags);
  24872. + preempt_check_resched_rt();
  24873. }
  24874. EXPORT_SYMBOL(__napi_schedule);
  24875. +#ifndef CONFIG_PREEMPT_RT_FULL
  24876. /**
  24877. * __napi_schedule_irqoff - schedule for receive
  24878. * @n: entry to schedule
  24879. @@ -4957,6 +4978,7 @@ void __napi_schedule_irqoff(struct napi_struct *n)
  24880. ____napi_schedule(this_cpu_ptr(&softnet_data), n);
  24881. }
  24882. EXPORT_SYMBOL(__napi_schedule_irqoff);
  24883. +#endif
  24884. void __napi_complete(struct napi_struct *n)
  24885. {
  24886. @@ -5246,13 +5268,21 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
  24887. struct softnet_data *sd = this_cpu_ptr(&softnet_data);
  24888. unsigned long time_limit = jiffies + 2;
  24889. int budget = netdev_budget;
  24890. + struct sk_buff_head tofree_q;
  24891. + struct sk_buff *skb;
  24892. LIST_HEAD(list);
  24893. LIST_HEAD(repoll);
  24894. + __skb_queue_head_init(&tofree_q);
  24895. +
  24896. local_irq_disable();
  24897. + skb_queue_splice_init(&sd->tofree_queue, &tofree_q);
  24898. list_splice_init(&sd->poll_list, &list);
  24899. local_irq_enable();
  24900. + while ((skb = __skb_dequeue(&tofree_q)))
  24901. + kfree_skb(skb);
  24902. +
  24903. for (;;) {
  24904. struct napi_struct *n;
  24905. @@ -5283,7 +5313,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
  24906. list_splice_tail(&repoll, &list);
  24907. list_splice(&list, &sd->poll_list);
  24908. if (!list_empty(&sd->poll_list))
  24909. - __raise_softirq_irqoff(NET_RX_SOFTIRQ);
  24910. + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ);
  24911. net_rps_action_and_irq_enable(sd);
  24912. }
  24913. @@ -8045,16 +8075,20 @@ static int dev_cpu_callback(struct notifier_block *nfb,
  24914. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  24915. local_irq_enable();
  24916. + preempt_check_resched_rt();
  24917. /* Process offline CPU's input_pkt_queue */
  24918. while ((skb = __skb_dequeue(&oldsd->process_queue))) {
  24919. netif_rx_ni(skb);
  24920. input_queue_head_incr(oldsd);
  24921. }
  24922. - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
  24923. + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
  24924. netif_rx_ni(skb);
  24925. input_queue_head_incr(oldsd);
  24926. }
  24927. + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
  24928. + kfree_skb(skb);
  24929. + }
  24930. return NOTIFY_OK;
  24931. }
  24932. @@ -8359,8 +8393,9 @@ static int __init net_dev_init(void)
  24933. INIT_WORK(flush, flush_backlog);
  24934. - skb_queue_head_init(&sd->input_pkt_queue);
  24935. - skb_queue_head_init(&sd->process_queue);
  24936. + skb_queue_head_init_raw(&sd->input_pkt_queue);
  24937. + skb_queue_head_init_raw(&sd->process_queue);
  24938. + skb_queue_head_init_raw(&sd->tofree_queue);
  24939. INIT_LIST_HEAD(&sd->poll_list);
  24940. sd->output_queue_tailp = &sd->output_queue;
  24941. #ifdef CONFIG_RPS
  24942. diff --git a/net/core/filter.c b/net/core/filter.c
  24943. index 4eb4ce0aeef4..4f09d6a57217 100644
  24944. --- a/net/core/filter.c
  24945. +++ b/net/core/filter.c
  24946. @@ -1645,7 +1645,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
  24947. {
  24948. int ret;
  24949. - if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) {
  24950. + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) {
  24951. net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
  24952. kfree_skb(skb);
  24953. return -ENETDOWN;
  24954. @@ -1653,9 +1653,9 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
  24955. skb->dev = dev;
  24956. - __this_cpu_inc(xmit_recursion);
  24957. + xmit_rec_inc();
  24958. ret = dev_queue_xmit(skb);
  24959. - __this_cpu_dec(xmit_recursion);
  24960. + xmit_rec_dec();
  24961. return ret;
  24962. }
  24963. diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
  24964. index cad8e791f28e..2a9364fe62a5 100644
  24965. --- a/net/core/gen_estimator.c
  24966. +++ b/net/core/gen_estimator.c
  24967. @@ -84,7 +84,7 @@ struct gen_estimator
  24968. struct gnet_stats_basic_packed *bstats;
  24969. struct gnet_stats_rate_est64 *rate_est;
  24970. spinlock_t *stats_lock;
  24971. - seqcount_t *running;
  24972. + net_seqlock_t *running;
  24973. int ewma_log;
  24974. u32 last_packets;
  24975. unsigned long avpps;
  24976. @@ -213,7 +213,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
  24977. struct gnet_stats_basic_cpu __percpu *cpu_bstats,
  24978. struct gnet_stats_rate_est64 *rate_est,
  24979. spinlock_t *stats_lock,
  24980. - seqcount_t *running,
  24981. + net_seqlock_t *running,
  24982. struct nlattr *opt)
  24983. {
  24984. struct gen_estimator *est;
  24985. @@ -309,7 +309,7 @@ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
  24986. struct gnet_stats_basic_cpu __percpu *cpu_bstats,
  24987. struct gnet_stats_rate_est64 *rate_est,
  24988. spinlock_t *stats_lock,
  24989. - seqcount_t *running, struct nlattr *opt)
  24990. + net_seqlock_t *running, struct nlattr *opt)
  24991. {
  24992. gen_kill_estimator(bstats, rate_est);
  24993. return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt);
  24994. diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
  24995. index 508e051304fb..bc3b17b78c94 100644
  24996. --- a/net/core/gen_stats.c
  24997. +++ b/net/core/gen_stats.c
  24998. @@ -130,7 +130,7 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
  24999. }
  25000. void
  25001. -__gnet_stats_copy_basic(const seqcount_t *running,
  25002. +__gnet_stats_copy_basic(net_seqlock_t *running,
  25003. struct gnet_stats_basic_packed *bstats,
  25004. struct gnet_stats_basic_cpu __percpu *cpu,
  25005. struct gnet_stats_basic_packed *b)
  25006. @@ -143,10 +143,10 @@ __gnet_stats_copy_basic(const seqcount_t *running,
  25007. }
  25008. do {
  25009. if (running)
  25010. - seq = read_seqcount_begin(running);
  25011. + seq = net_seq_begin(running);
  25012. bstats->bytes = b->bytes;
  25013. bstats->packets = b->packets;
  25014. - } while (running && read_seqcount_retry(running, seq));
  25015. + } while (running && net_seq_retry(running, seq));
  25016. }
  25017. EXPORT_SYMBOL(__gnet_stats_copy_basic);
  25018. @@ -164,7 +164,7 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic);
  25019. * if the room in the socket buffer was not sufficient.
  25020. */
  25021. int
  25022. -gnet_stats_copy_basic(const seqcount_t *running,
  25023. +gnet_stats_copy_basic(net_seqlock_t *running,
  25024. struct gnet_dump *d,
  25025. struct gnet_stats_basic_cpu __percpu *cpu,
  25026. struct gnet_stats_basic_packed *b)
  25027. diff --git a/net/core/skbuff.c b/net/core/skbuff.c
  25028. index a64515583bc1..fec448d29f42 100644
  25029. --- a/net/core/skbuff.c
  25030. +++ b/net/core/skbuff.c
  25031. @@ -64,6 +64,7 @@
  25032. #include <linux/errqueue.h>
  25033. #include <linux/prefetch.h>
  25034. #include <linux/if_vlan.h>
  25035. +#include <linux/locallock.h>
  25036. #include <net/protocol.h>
  25037. #include <net/dst.h>
  25038. @@ -360,6 +361,8 @@ struct napi_alloc_cache {
  25039. static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
  25040. static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
  25041. +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock);
  25042. +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock);
  25043. static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  25044. {
  25045. @@ -367,10 +370,10 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  25046. unsigned long flags;
  25047. void *data;
  25048. - local_irq_save(flags);
  25049. + local_lock_irqsave(netdev_alloc_lock, flags);
  25050. nc = this_cpu_ptr(&netdev_alloc_cache);
  25051. data = __alloc_page_frag(nc, fragsz, gfp_mask);
  25052. - local_irq_restore(flags);
  25053. + local_unlock_irqrestore(netdev_alloc_lock, flags);
  25054. return data;
  25055. }
  25056. @@ -389,9 +392,13 @@ EXPORT_SYMBOL(netdev_alloc_frag);
  25057. static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  25058. {
  25059. - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  25060. + struct napi_alloc_cache *nc;
  25061. + void *data;
  25062. - return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
  25063. + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  25064. + data = __alloc_page_frag(&nc->page, fragsz, gfp_mask);
  25065. + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  25066. + return data;
  25067. }
  25068. void *napi_alloc_frag(unsigned int fragsz)
  25069. @@ -438,13 +445,13 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
  25070. if (sk_memalloc_socks())
  25071. gfp_mask |= __GFP_MEMALLOC;
  25072. - local_irq_save(flags);
  25073. + local_lock_irqsave(netdev_alloc_lock, flags);
  25074. nc = this_cpu_ptr(&netdev_alloc_cache);
  25075. data = __alloc_page_frag(nc, len, gfp_mask);
  25076. pfmemalloc = nc->pfmemalloc;
  25077. - local_irq_restore(flags);
  25078. + local_unlock_irqrestore(netdev_alloc_lock, flags);
  25079. if (unlikely(!data))
  25080. return NULL;
  25081. @@ -485,9 +492,10 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
  25082. struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
  25083. gfp_t gfp_mask)
  25084. {
  25085. - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  25086. + struct napi_alloc_cache *nc;
  25087. struct sk_buff *skb;
  25088. void *data;
  25089. + bool pfmemalloc;
  25090. len += NET_SKB_PAD + NET_IP_ALIGN;
  25091. @@ -505,7 +513,10 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
  25092. if (sk_memalloc_socks())
  25093. gfp_mask |= __GFP_MEMALLOC;
  25094. + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  25095. data = __alloc_page_frag(&nc->page, len, gfp_mask);
  25096. + pfmemalloc = nc->page.pfmemalloc;
  25097. + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  25098. if (unlikely(!data))
  25099. return NULL;
  25100. @@ -516,7 +527,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
  25101. }
  25102. /* use OR instead of assignment to avoid clearing of bits in mask */
  25103. - if (nc->page.pfmemalloc)
  25104. + if (pfmemalloc)
  25105. skb->pfmemalloc = 1;
  25106. skb->head_frag = 1;
  25107. @@ -760,23 +771,26 @@ EXPORT_SYMBOL(consume_skb);
  25108. void __kfree_skb_flush(void)
  25109. {
  25110. - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  25111. + struct napi_alloc_cache *nc;
  25112. + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  25113. /* flush skb_cache if containing objects */
  25114. if (nc->skb_count) {
  25115. kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
  25116. nc->skb_cache);
  25117. nc->skb_count = 0;
  25118. }
  25119. + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  25120. }
  25121. static inline void _kfree_skb_defer(struct sk_buff *skb)
  25122. {
  25123. - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  25124. + struct napi_alloc_cache *nc;
  25125. /* drop skb->head and call any destructors for packet */
  25126. skb_release_all(skb);
  25127. + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  25128. /* record skb to CPU local list */
  25129. nc->skb_cache[nc->skb_count++] = skb;
  25130. @@ -791,6 +805,7 @@ static inline void _kfree_skb_defer(struct sk_buff *skb)
  25131. nc->skb_cache);
  25132. nc->skb_count = 0;
  25133. }
  25134. + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  25135. }
  25136. void __kfree_skb_defer(struct sk_buff *skb)
  25137. {
  25138. diff --git a/net/core/sock.c b/net/core/sock.c
  25139. index e3b60460dc9c..8d15848c3a22 100644
  25140. --- a/net/core/sock.c
  25141. +++ b/net/core/sock.c
  25142. @@ -2493,12 +2493,11 @@ void lock_sock_nested(struct sock *sk, int subclass)
  25143. if (sk->sk_lock.owned)
  25144. __lock_sock(sk);
  25145. sk->sk_lock.owned = 1;
  25146. - spin_unlock(&sk->sk_lock.slock);
  25147. + spin_unlock_bh(&sk->sk_lock.slock);
  25148. /*
  25149. * The sk_lock has mutex_lock() semantics here:
  25150. */
  25151. mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
  25152. - local_bh_enable();
  25153. }
  25154. EXPORT_SYMBOL(lock_sock_nested);
  25155. diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
  25156. index 31f17f0bbd1c..c9525356823c 100644
  25157. --- a/net/ipv4/icmp.c
  25158. +++ b/net/ipv4/icmp.c
  25159. @@ -69,6 +69,7 @@
  25160. #include <linux/jiffies.h>
  25161. #include <linux/kernel.h>
  25162. #include <linux/fcntl.h>
  25163. +#include <linux/sysrq.h>
  25164. #include <linux/socket.h>
  25165. #include <linux/in.h>
  25166. #include <linux/inet.h>
  25167. @@ -77,6 +78,7 @@
  25168. #include <linux/string.h>
  25169. #include <linux/netfilter_ipv4.h>
  25170. #include <linux/slab.h>
  25171. +#include <linux/locallock.h>
  25172. #include <net/snmp.h>
  25173. #include <net/ip.h>
  25174. #include <net/route.h>
  25175. @@ -204,6 +206,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
  25176. *
  25177. * On SMP we have one ICMP socket per-cpu.
  25178. */
  25179. +static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock);
  25180. +
  25181. static struct sock *icmp_sk(struct net *net)
  25182. {
  25183. return *this_cpu_ptr(net->ipv4.icmp_sk);
  25184. @@ -215,12 +219,18 @@ static inline struct sock *icmp_xmit_lock(struct net *net)
  25185. local_bh_disable();
  25186. + if (!local_trylock(icmp_sk_lock)) {
  25187. + local_bh_enable();
  25188. + return NULL;
  25189. + }
  25190. +
  25191. sk = icmp_sk(net);
  25192. if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
  25193. /* This can happen if the output path signals a
  25194. * dst_link_failure() for an outgoing ICMP packet.
  25195. */
  25196. + local_unlock(icmp_sk_lock);
  25197. local_bh_enable();
  25198. return NULL;
  25199. }
  25200. @@ -230,6 +240,7 @@ static inline struct sock *icmp_xmit_lock(struct net *net)
  25201. static inline void icmp_xmit_unlock(struct sock *sk)
  25202. {
  25203. spin_unlock_bh(&sk->sk_lock.slock);
  25204. + local_unlock(icmp_sk_lock);
  25205. }
  25206. int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
  25207. @@ -358,6 +369,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
  25208. struct sock *sk;
  25209. struct sk_buff *skb;
  25210. + local_lock(icmp_sk_lock);
  25211. sk = icmp_sk(dev_net((*rt)->dst.dev));
  25212. if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
  25213. icmp_param->data_len+icmp_param->head_len,
  25214. @@ -380,6 +392,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
  25215. skb->ip_summed = CHECKSUM_NONE;
  25216. ip_push_pending_frames(sk, fl4);
  25217. }
  25218. + local_unlock(icmp_sk_lock);
  25219. }
  25220. /*
  25221. @@ -899,6 +912,30 @@ static bool icmp_redirect(struct sk_buff *skb)
  25222. return true;
  25223. }
  25224. +/*
  25225. + * 32bit and 64bit have different timestamp length, so we check for
  25226. + * the cookie at offset 20 and verify it is repeated at offset 50
  25227. + */
  25228. +#define CO_POS0 20
  25229. +#define CO_POS1 50
  25230. +#define CO_SIZE sizeof(int)
  25231. +#define ICMP_SYSRQ_SIZE 57
  25232. +
  25233. +/*
  25234. + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
  25235. + * pattern and if it matches send the next byte as a trigger to sysrq.
  25236. + */
  25237. +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb)
  25238. +{
  25239. + int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
  25240. + char *p = skb->data;
  25241. +
  25242. + if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
  25243. + !memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
  25244. + p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
  25245. + handle_sysrq(p[CO_POS0 + CO_SIZE]);
  25246. +}
  25247. +
  25248. /*
  25249. * Handle ICMP_ECHO ("ping") requests.
  25250. *
  25251. @@ -926,6 +963,11 @@ static bool icmp_echo(struct sk_buff *skb)
  25252. icmp_param.data_len = skb->len;
  25253. icmp_param.head_len = sizeof(struct icmphdr);
  25254. icmp_reply(&icmp_param, skb);
  25255. +
  25256. + if (skb->len == ICMP_SYSRQ_SIZE &&
  25257. + net->ipv4.sysctl_icmp_echo_sysrq) {
  25258. + icmp_check_sysrq(net, skb);
  25259. + }
  25260. }
  25261. /* should there be an ICMP stat for ignored echos? */
  25262. return true;
  25263. diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
  25264. index 566cfc50f7cf..4b8551d78a3b 100644
  25265. --- a/net/ipv4/sysctl_net_ipv4.c
  25266. +++ b/net/ipv4/sysctl_net_ipv4.c
  25267. @@ -680,6 +680,13 @@ static struct ctl_table ipv4_net_table[] = {
  25268. .mode = 0644,
  25269. .proc_handler = proc_dointvec
  25270. },
  25271. + {
  25272. + .procname = "icmp_echo_sysrq",
  25273. + .data = &init_net.ipv4.sysctl_icmp_echo_sysrq,
  25274. + .maxlen = sizeof(int),
  25275. + .mode = 0644,
  25276. + .proc_handler = proc_dointvec
  25277. + },
  25278. {
  25279. .procname = "icmp_ignore_bogus_error_responses",
  25280. .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
  25281. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
  25282. index b3960738464e..17699390a324 100644
  25283. --- a/net/ipv4/tcp_ipv4.c
  25284. +++ b/net/ipv4/tcp_ipv4.c
  25285. @@ -62,6 +62,7 @@
  25286. #include <linux/init.h>
  25287. #include <linux/times.h>
  25288. #include <linux/slab.h>
  25289. +#include <linux/locallock.h>
  25290. #include <net/net_namespace.h>
  25291. #include <net/icmp.h>
  25292. @@ -568,6 +569,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
  25293. }
  25294. EXPORT_SYMBOL(tcp_v4_send_check);
  25295. +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock);
  25296. /*
  25297. * This routine will send an RST to the other tcp.
  25298. *
  25299. @@ -695,7 +697,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
  25300. offsetof(struct inet_timewait_sock, tw_bound_dev_if));
  25301. arg.tos = ip_hdr(skb)->tos;
  25302. +
  25303. local_bh_disable();
  25304. + local_lock(tcp_sk_lock);
  25305. ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
  25306. skb, &TCP_SKB_CB(skb)->header.h4.opt,
  25307. ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  25308. @@ -703,6 +707,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
  25309. __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
  25310. __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
  25311. + local_unlock(tcp_sk_lock);
  25312. local_bh_enable();
  25313. #ifdef CONFIG_TCP_MD5SIG
  25314. @@ -780,12 +785,14 @@ static void tcp_v4_send_ack(struct net *net,
  25315. arg.bound_dev_if = oif;
  25316. arg.tos = tos;
  25317. local_bh_disable();
  25318. + local_lock(tcp_sk_lock);
  25319. ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
  25320. skb, &TCP_SKB_CB(skb)->header.h4.opt,
  25321. ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  25322. &arg, arg.iov[0].iov_len);
  25323. __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
  25324. + local_unlock(tcp_sk_lock);
  25325. local_bh_enable();
  25326. }
  25327. diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
  25328. index 439e597fd374..ca0daeaff370 100644
  25329. --- a/net/mac80211/rx.c
  25330. +++ b/net/mac80211/rx.c
  25331. @@ -4229,7 +4229,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
  25332. struct ieee80211_supported_band *sband;
  25333. struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
  25334. - WARN_ON_ONCE(softirq_count() == 0);
  25335. + WARN_ON_ONCE_NONRT(softirq_count() == 0);
  25336. if (WARN_ON(status->band >= NUM_NL80211_BANDS))
  25337. goto drop;
  25338. diff --git a/net/netfilter/core.c b/net/netfilter/core.c
  25339. index d869ea50623e..5cafa87b030b 100644
  25340. --- a/net/netfilter/core.c
  25341. +++ b/net/netfilter/core.c
  25342. @@ -22,12 +22,18 @@
  25343. #include <linux/proc_fs.h>
  25344. #include <linux/mutex.h>
  25345. #include <linux/slab.h>
  25346. +#include <linux/locallock.h>
  25347. #include <linux/rcupdate.h>
  25348. #include <net/net_namespace.h>
  25349. #include <net/sock.h>
  25350. #include "nf_internals.h"
  25351. +#ifdef CONFIG_PREEMPT_RT_BASE
  25352. +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
  25353. +EXPORT_PER_CPU_SYMBOL(xt_write_lock);
  25354. +#endif
  25355. +
  25356. static DEFINE_MUTEX(afinfo_mutex);
  25357. const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
  25358. diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
  25359. index 267db0d603bc..00994de54d57 100644
  25360. --- a/net/packet/af_packet.c
  25361. +++ b/net/packet/af_packet.c
  25362. @@ -63,6 +63,7 @@
  25363. #include <linux/if_packet.h>
  25364. #include <linux/wireless.h>
  25365. #include <linux/kernel.h>
  25366. +#include <linux/delay.h>
  25367. #include <linux/kmod.h>
  25368. #include <linux/slab.h>
  25369. #include <linux/vmalloc.h>
  25370. @@ -694,7 +695,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data)
  25371. if (BLOCK_NUM_PKTS(pbd)) {
  25372. while (atomic_read(&pkc->blk_fill_in_prog)) {
  25373. /* Waiting for skb_copy_bits to finish... */
  25374. - cpu_relax();
  25375. + cpu_chill();
  25376. }
  25377. }
  25378. @@ -956,7 +957,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
  25379. if (!(status & TP_STATUS_BLK_TMO)) {
  25380. while (atomic_read(&pkc->blk_fill_in_prog)) {
  25381. /* Waiting for skb_copy_bits to finish... */
  25382. - cpu_relax();
  25383. + cpu_chill();
  25384. }
  25385. }
  25386. prb_close_block(pkc, pbd, po, status);
  25387. diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
  25388. index 977f69886c00..f3e7a36b0396 100644
  25389. --- a/net/rds/ib_rdma.c
  25390. +++ b/net/rds/ib_rdma.c
  25391. @@ -34,6 +34,7 @@
  25392. #include <linux/slab.h>
  25393. #include <linux/rculist.h>
  25394. #include <linux/llist.h>
  25395. +#include <linux/delay.h>
  25396. #include "rds_single_path.h"
  25397. #include "ib_mr.h"
  25398. @@ -210,7 +211,7 @@ static inline void wait_clean_list_grace(void)
  25399. for_each_online_cpu(cpu) {
  25400. flag = &per_cpu(clean_list_grace, cpu);
  25401. while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
  25402. - cpu_relax();
  25403. + cpu_chill();
  25404. }
  25405. }
  25406. diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
  25407. index 7d921e56e715..13df56a738e5 100644
  25408. --- a/net/rxrpc/security.c
  25409. +++ b/net/rxrpc/security.c
  25410. @@ -19,9 +19,6 @@
  25411. #include <keys/rxrpc-type.h>
  25412. #include "ar-internal.h"
  25413. -static LIST_HEAD(rxrpc_security_methods);
  25414. -static DECLARE_RWSEM(rxrpc_security_sem);
  25415. -
  25416. static const struct rxrpc_security *rxrpc_security_types[] = {
  25417. [RXRPC_SECURITY_NONE] = &rxrpc_no_security,
  25418. #ifdef CONFIG_RXKAD
  25419. diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
  25420. index ea13df1be067..76c20745b502 100644
  25421. --- a/net/sched/sch_api.c
  25422. +++ b/net/sched/sch_api.c
  25423. @@ -980,7 +980,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
  25424. rcu_assign_pointer(sch->stab, stab);
  25425. }
  25426. if (tca[TCA_RATE]) {
  25427. - seqcount_t *running;
  25428. + net_seqlock_t *running;
  25429. err = -EOPNOTSUPP;
  25430. if (sch->flags & TCQ_F_MQROOT)
  25431. diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
  25432. index 9016c8baf2aa..d925f0e63679 100644
  25433. --- a/net/sched/sch_generic.c
  25434. +++ b/net/sched/sch_generic.c
  25435. @@ -425,7 +425,11 @@ struct Qdisc noop_qdisc = {
  25436. .ops = &noop_qdisc_ops,
  25437. .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
  25438. .dev_queue = &noop_netdev_queue,
  25439. +#ifdef CONFIG_PREEMPT_RT_BASE
  25440. + .running = __SEQLOCK_UNLOCKED(noop_qdisc.running),
  25441. +#else
  25442. .running = SEQCNT_ZERO(noop_qdisc.running),
  25443. +#endif
  25444. .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
  25445. };
  25446. EXPORT_SYMBOL(noop_qdisc);
  25447. @@ -624,9 +628,17 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
  25448. lockdep_set_class(&sch->busylock,
  25449. dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
  25450. +#ifdef CONFIG_PREEMPT_RT_BASE
  25451. + seqlock_init(&sch->running);
  25452. + lockdep_set_class(&sch->running.seqcount,
  25453. + dev->qdisc_running_key ?: &qdisc_running_key);
  25454. + lockdep_set_class(&sch->running.lock,
  25455. + dev->qdisc_running_key ?: &qdisc_running_key);
  25456. +#else
  25457. seqcount_init(&sch->running);
  25458. lockdep_set_class(&sch->running,
  25459. dev->qdisc_running_key ?: &qdisc_running_key);
  25460. +#endif
  25461. sch->ops = ops;
  25462. sch->enqueue = ops->enqueue;
  25463. @@ -926,7 +938,7 @@ void dev_deactivate_many(struct list_head *head)
  25464. /* Wait for outstanding qdisc_run calls. */
  25465. list_for_each_entry(dev, head, close_list)
  25466. while (some_qdisc_is_busy(dev))
  25467. - yield();
  25468. + msleep(1);
  25469. }
  25470. void dev_deactivate(struct net_device *dev)
  25471. diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
  25472. index 9c9db55a0c1e..e6583b018a72 100644
  25473. --- a/net/sunrpc/svc_xprt.c
  25474. +++ b/net/sunrpc/svc_xprt.c
  25475. @@ -396,7 +396,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
  25476. goto out;
  25477. }
  25478. - cpu = get_cpu();
  25479. + cpu = get_cpu_light();
  25480. pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
  25481. atomic_long_inc(&pool->sp_stats.packets);
  25482. @@ -432,7 +432,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
  25483. atomic_long_inc(&pool->sp_stats.threads_woken);
  25484. wake_up_process(rqstp->rq_task);
  25485. - put_cpu();
  25486. + put_cpu_light();
  25487. goto out;
  25488. }
  25489. rcu_read_unlock();
  25490. @@ -453,7 +453,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
  25491. goto redo_search;
  25492. }
  25493. rqstp = NULL;
  25494. - put_cpu();
  25495. + put_cpu_light();
  25496. out:
  25497. trace_svc_xprt_do_enqueue(xprt, rqstp);
  25498. }
  25499. diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
  25500. index 6fdc97ef6023..523e0420d7f0 100755
  25501. --- a/scripts/mkcompile_h
  25502. +++ b/scripts/mkcompile_h
  25503. @@ -4,7 +4,8 @@ TARGET=$1
  25504. ARCH=$2
  25505. SMP=$3
  25506. PREEMPT=$4
  25507. -CC=$5
  25508. +RT=$5
  25509. +CC=$6
  25510. vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
  25511. @@ -57,6 +58,7 @@ UTS_VERSION="#$VERSION"
  25512. CONFIG_FLAGS=""
  25513. if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
  25514. if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
  25515. +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi
  25516. UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
  25517. # Truncate to maximum length
  25518. diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
  25519. index 9d33c1e85c79..3d307bda86f9 100644
  25520. --- a/sound/core/pcm_native.c
  25521. +++ b/sound/core/pcm_native.c
  25522. @@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock);
  25523. void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream)
  25524. {
  25525. if (!substream->pcm->nonatomic)
  25526. - local_irq_disable();
  25527. + local_irq_disable_nort();
  25528. snd_pcm_stream_lock(substream);
  25529. }
  25530. EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq);
  25531. @@ -150,7 +150,7 @@ void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream)
  25532. {
  25533. snd_pcm_stream_unlock(substream);
  25534. if (!substream->pcm->nonatomic)
  25535. - local_irq_enable();
  25536. + local_irq_enable_nort();
  25537. }
  25538. EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq);
  25539. @@ -158,7 +158,7 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream)
  25540. {
  25541. unsigned long flags = 0;
  25542. if (!substream->pcm->nonatomic)
  25543. - local_irq_save(flags);
  25544. + local_irq_save_nort(flags);
  25545. snd_pcm_stream_lock(substream);
  25546. return flags;
  25547. }
  25548. @@ -176,7 +176,7 @@ void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream,
  25549. {
  25550. snd_pcm_stream_unlock(substream);
  25551. if (!substream->pcm->nonatomic)
  25552. - local_irq_restore(flags);
  25553. + local_irq_restore_nort(flags);
  25554. }
  25555. EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore);