patch-realtime 788 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678156791568015681156821568315684156851568615687156881568915690156911569215693156941569515696156971569815699157001570115702157031570415705157061570715708157091571015711157121571315714157151571615717157181571915720157211572215723157241572515726157271572815729157301573115732157331573415735157361573715738157391574015741157421574315744157451574615747157481574915750157511575215753157541575515756157571575815759157601576115762157631576415765157661576715768157691577015771157721577315774157751577615777157781577915780157811578215783157841578515786157871578815789157901579115792157931579415795157961579715798157991580015801158021580315804158051580615807158081580915810158111581215813158141581515816158171581815819158201582115822158231582415825158261582715828158291583015831158321583315834158351583615837158381583915840158411584215843158441584515846158471584815849158501585115852158531585415855158561585715858158591586015861158621586315864158651586615867158681586915870158711587215873158741587515876158771587815879158801588115882158831588415885158861588715888158891589015891158921589315894158951589615897158981589915900159011590215903159041590515906159071590815909159101591115912159131591415915159161591715918159191592015921159221592315924159251592615927159281592915930159311593215933159341593515936159371593815939159401594115942159431594415945159461594715948159491595015951159521595315954159551595615957159581595915960159611596215963159641596515966159671596815969159701597115972159731597415975159761597715978159791598015981159821598315984159851598615987159881598915990159911599215993159941599515996159971599815999160001600116002160031600416005160061600716008160091601016011160121601316014160151601616017160181601916020160211602216023160241602516026160271602816029160301603116032160331603416035160361603716038160391604016041160421604316044160451604616047160481604916050160511605216053160541605516056160571605816059160601606116062160631606416065160661606716068160691607016071160721607316074160751607616077160781607916080160811608216083160841608516086160871608816089160901609116092160931609416095160961609716098160991610016101161021610316104161051610616107161081610916110161111611216113161141611516116161171611816119161201612116122161231612416125161261612716128161291613016131161321613316134161351613616137161381613916140161411614216143161441614516146161471614816149161501615116152161531615416155161561615716158161591616016161161621616316164161651616616167161681616916170161711617216173161741617516176161771617816179161801618116182161831618416185161861618716188161891619016191161921619316194161951619616197161981619916200162011620216203162041620516206162071620816209162101621116212162131621416215162161621716218162191622016221162221622316224162251622616227162281622916230162311623216233162341623516236162371623816239162401624116242162431624416245162461624716248162491625016251162521625316254162551625616257162581625916260162611626216263162641626516266162671626816269162701627116272162731627416275162761627716278162791628016281162821628316284162851628616287162881628916290162911629216293162941629516296162971629816299163001630116302163031630416305163061630716308163091631016311163121631316314163151631616317163181631916320163211632216323163241632516326163271632816329163301633116332163331633416335163361633716338163391634016341163421634316344163451634616347163481634916350163511635216353163541635516356163571635816359163601636116362163631636416365163661636716368163691637016371163721637316374163751637616377163781637916380163811638216383163841638516386163871638816389163901639116392163931639416395163961639716398163991640016401164021640316404164051640616407164081640916410164111641216413164141641516416164171641816419164201642116422164231642416425164261642716428164291643016431164321643316434164351643616437164381643916440164411644216443164441644516446164471644816449164501645116452164531645416455164561645716458164591646016461164621646316464164651646616467164681646916470164711647216473164741647516476164771647816479164801648116482164831648416485164861648716488164891649016491164921649316494164951649616497164981649916500165011650216503165041650516506165071650816509165101651116512165131651416515165161651716518165191652016521165221652316524165251652616527165281652916530165311653216533165341653516536165371653816539165401654116542165431654416545165461654716548165491655016551165521655316554165551655616557165581655916560165611656216563165641656516566165671656816569165701657116572165731657416575165761657716578165791658016581165821658316584165851658616587165881658916590165911659216593165941659516596165971659816599166001660116602166031660416605166061660716608166091661016611166121661316614166151661616617166181661916620166211662216623166241662516626166271662816629166301663116632166331663416635166361663716638166391664016641166421664316644166451664616647166481664916650166511665216653166541665516656166571665816659166601666116662166631666416665166661666716668166691667016671166721667316674166751667616677166781667916680166811668216683166841668516686166871668816689166901669116692166931669416695166961669716698166991670016701167021670316704167051670616707167081670916710167111671216713167141671516716167171671816719167201672116722167231672416725167261672716728167291673016731167321673316734167351673616737167381673916740167411674216743167441674516746167471674816749167501675116752167531675416755167561675716758167591676016761167621676316764167651676616767167681676916770167711677216773167741677516776167771677816779167801678116782167831678416785167861678716788167891679016791167921679316794167951679616797167981679916800168011680216803168041680516806168071680816809168101681116812168131681416815168161681716818168191682016821168221682316824168251682616827168281682916830168311683216833168341683516836168371683816839168401684116842168431684416845168461684716848168491685016851168521685316854168551685616857168581685916860168611686216863168641686516866168671686816869168701687116872168731687416875168761687716878168791688016881168821688316884168851688616887168881688916890168911689216893168941689516896168971689816899169001690116902169031690416905169061690716908169091691016911169121691316914169151691616917169181691916920169211692216923169241692516926169271692816929169301693116932169331693416935169361693716938169391694016941169421694316944169451694616947169481694916950169511695216953169541695516956169571695816959169601696116962169631696416965169661696716968169691697016971169721697316974169751697616977169781697916980169811698216983169841698516986169871698816989169901699116992169931699416995169961699716998169991700017001170021700317004170051700617007170081700917010170111701217013170141701517016170171701817019170201702117022170231702417025170261702717028170291703017031170321703317034170351703617037170381703917040170411704217043170441704517046170471704817049170501705117052170531705417055170561705717058170591706017061170621706317064170651706617067170681706917070170711707217073170741707517076170771707817079170801708117082170831708417085170861708717088170891709017091170921709317094170951709617097170981709917100171011710217103171041710517106171071710817109171101711117112171131711417115171161711717118171191712017121171221712317124171251712617127171281712917130171311713217133171341713517136171371713817139171401714117142171431714417145171461714717148171491715017151171521715317154171551715617157171581715917160171611716217163171641716517166171671716817169171701717117172171731717417175171761717717178171791718017181171821718317184171851718617187171881718917190171911719217193171941719517196171971719817199172001720117202172031720417205172061720717208172091721017211172121721317214172151721617217172181721917220172211722217223172241722517226172271722817229172301723117232172331723417235172361723717238172391724017241172421724317244172451724617247172481724917250172511725217253172541725517256172571725817259172601726117262172631726417265172661726717268172691727017271172721727317274172751727617277172781727917280172811728217283172841728517286172871728817289172901729117292172931729417295172961729717298172991730017301173021730317304173051730617307173081730917310173111731217313173141731517316173171731817319173201732117322173231732417325173261732717328173291733017331173321733317334173351733617337173381733917340173411734217343173441734517346173471734817349173501735117352173531735417355173561735717358173591736017361173621736317364173651736617367173681736917370173711737217373173741737517376173771737817379173801738117382173831738417385173861738717388173891739017391173921739317394173951739617397173981739917400174011740217403174041740517406174071740817409174101741117412174131741417415174161741717418174191742017421174221742317424174251742617427174281742917430174311743217433174341743517436174371743817439174401744117442174431744417445174461744717448174491745017451174521745317454174551745617457174581745917460174611746217463174641746517466174671746817469174701747117472174731747417475174761747717478174791748017481174821748317484174851748617487174881748917490174911749217493174941749517496174971749817499175001750117502175031750417505175061750717508175091751017511175121751317514175151751617517175181751917520175211752217523175241752517526175271752817529175301753117532175331753417535175361753717538175391754017541175421754317544175451754617547175481754917550175511755217553175541755517556175571755817559175601756117562175631756417565175661756717568175691757017571175721757317574175751757617577175781757917580175811758217583175841758517586175871758817589175901759117592175931759417595175961759717598175991760017601176021760317604176051760617607176081760917610176111761217613176141761517616176171761817619176201762117622176231762417625176261762717628176291763017631176321763317634176351763617637176381763917640176411764217643176441764517646176471764817649176501765117652176531765417655176561765717658176591766017661176621766317664176651766617667176681766917670176711767217673176741767517676176771767817679176801768117682176831768417685176861768717688176891769017691176921769317694176951769617697176981769917700177011770217703177041770517706177071770817709177101771117712177131771417715177161771717718177191772017721177221772317724177251772617727177281772917730177311773217733177341773517736177371773817739177401774117742177431774417745177461774717748177491775017751177521775317754177551775617757177581775917760177611776217763177641776517766177671776817769177701777117772177731777417775177761777717778177791778017781177821778317784177851778617787177881778917790177911779217793177941779517796177971779817799178001780117802178031780417805178061780717808178091781017811178121781317814178151781617817178181781917820178211782217823178241782517826178271782817829178301783117832178331783417835178361783717838178391784017841178421784317844178451784617847178481784917850178511785217853178541785517856178571785817859178601786117862178631786417865178661786717868178691787017871178721787317874178751787617877178781787917880178811788217883178841788517886178871788817889178901789117892178931789417895178961789717898178991790017901179021790317904179051790617907179081790917910179111791217913179141791517916179171791817919179201792117922179231792417925179261792717928179291793017931179321793317934179351793617937179381793917940179411794217943179441794517946179471794817949179501795117952179531795417955179561795717958179591796017961179621796317964179651796617967179681796917970179711797217973179741797517976179771797817979179801798117982179831798417985179861798717988179891799017991179921799317994179951799617997179981799918000180011800218003180041800518006180071800818009180101801118012180131801418015180161801718018180191802018021180221802318024180251802618027180281802918030180311803218033180341803518036180371803818039180401804118042180431804418045180461804718048180491805018051180521805318054180551805618057180581805918060180611806218063180641806518066180671806818069180701807118072180731807418075180761807718078180791808018081180821808318084180851808618087180881808918090180911809218093180941809518096180971809818099181001810118102181031810418105181061810718108181091811018111181121811318114181151811618117181181811918120181211812218123181241812518126181271812818129181301813118132181331813418135181361813718138181391814018141181421814318144181451814618147181481814918150181511815218153181541815518156181571815818159181601816118162181631816418165181661816718168181691817018171181721817318174181751817618177181781817918180181811818218183181841818518186181871818818189181901819118192181931819418195181961819718198181991820018201182021820318204182051820618207182081820918210182111821218213182141821518216182171821818219182201822118222182231822418225182261822718228182291823018231182321823318234182351823618237182381823918240182411824218243182441824518246182471824818249182501825118252182531825418255182561825718258182591826018261182621826318264182651826618267182681826918270182711827218273182741827518276182771827818279182801828118282182831828418285182861828718288182891829018291182921829318294182951829618297182981829918300183011830218303183041830518306183071830818309183101831118312183131831418315183161831718318183191832018321183221832318324183251832618327183281832918330183311833218333183341833518336183371833818339183401834118342183431834418345183461834718348183491835018351183521835318354183551835618357183581835918360183611836218363183641836518366183671836818369183701837118372183731837418375183761837718378183791838018381183821838318384183851838618387183881838918390183911839218393183941839518396183971839818399184001840118402184031840418405184061840718408184091841018411184121841318414184151841618417184181841918420184211842218423184241842518426184271842818429184301843118432184331843418435184361843718438184391844018441184421844318444184451844618447184481844918450184511845218453184541845518456184571845818459184601846118462184631846418465184661846718468184691847018471184721847318474184751847618477184781847918480184811848218483184841848518486184871848818489184901849118492184931849418495184961849718498184991850018501185021850318504185051850618507185081850918510185111851218513185141851518516185171851818519185201852118522185231852418525185261852718528185291853018531185321853318534185351853618537185381853918540185411854218543185441854518546185471854818549185501855118552185531855418555185561855718558185591856018561185621856318564185651856618567185681856918570185711857218573185741857518576185771857818579185801858118582185831858418585185861858718588185891859018591185921859318594185951859618597185981859918600186011860218603186041860518606186071860818609186101861118612186131861418615186161861718618186191862018621186221862318624186251862618627186281862918630186311863218633186341863518636186371863818639186401864118642186431864418645186461864718648186491865018651186521865318654186551865618657186581865918660186611866218663186641866518666186671866818669186701867118672186731867418675186761867718678186791868018681186821868318684186851868618687186881868918690186911869218693186941869518696186971869818699187001870118702187031870418705187061870718708187091871018711187121871318714187151871618717187181871918720187211872218723187241872518726187271872818729187301873118732187331873418735187361873718738187391874018741187421874318744187451874618747187481874918750187511875218753187541875518756187571875818759187601876118762187631876418765187661876718768187691877018771187721877318774187751877618777187781877918780187811878218783187841878518786187871878818789187901879118792187931879418795187961879718798187991880018801188021880318804188051880618807188081880918810188111881218813188141881518816188171881818819188201882118822188231882418825188261882718828188291883018831188321883318834188351883618837188381883918840188411884218843188441884518846188471884818849188501885118852188531885418855188561885718858188591886018861188621886318864188651886618867188681886918870188711887218873188741887518876188771887818879188801888118882188831888418885188861888718888188891889018891188921889318894188951889618897188981889918900189011890218903189041890518906189071890818909189101891118912189131891418915189161891718918189191892018921189221892318924189251892618927189281892918930189311893218933189341893518936189371893818939189401894118942189431894418945189461894718948189491895018951189521895318954189551895618957189581895918960189611896218963189641896518966189671896818969189701897118972189731897418975189761897718978189791898018981189821898318984189851898618987189881898918990189911899218993189941899518996189971899818999190001900119002190031900419005190061900719008190091901019011190121901319014190151901619017190181901919020190211902219023190241902519026190271902819029190301903119032190331903419035190361903719038190391904019041190421904319044190451904619047190481904919050190511905219053190541905519056190571905819059190601906119062190631906419065190661906719068190691907019071190721907319074190751907619077190781907919080190811908219083190841908519086190871908819089190901909119092190931909419095190961909719098190991910019101191021910319104191051910619107191081910919110191111911219113191141911519116191171911819119191201912119122191231912419125191261912719128191291913019131191321913319134191351913619137191381913919140191411914219143191441914519146191471914819149191501915119152191531915419155191561915719158191591916019161191621916319164191651916619167191681916919170191711917219173191741917519176191771917819179191801918119182191831918419185191861918719188191891919019191191921919319194191951919619197191981919919200192011920219203192041920519206192071920819209192101921119212192131921419215192161921719218192191922019221192221922319224192251922619227192281922919230192311923219233192341923519236192371923819239192401924119242192431924419245192461924719248192491925019251192521925319254192551925619257192581925919260192611926219263192641926519266192671926819269192701927119272192731927419275192761927719278192791928019281192821928319284192851928619287192881928919290192911929219293192941929519296192971929819299193001930119302193031930419305193061930719308193091931019311193121931319314193151931619317193181931919320193211932219323193241932519326193271932819329193301933119332193331933419335193361933719338193391934019341193421934319344193451934619347193481934919350193511935219353193541935519356193571935819359193601936119362193631936419365193661936719368193691937019371193721937319374193751937619377193781937919380193811938219383193841938519386193871938819389193901939119392193931939419395193961939719398193991940019401194021940319404194051940619407194081940919410194111941219413194141941519416194171941819419194201942119422194231942419425194261942719428194291943019431194321943319434194351943619437194381943919440194411944219443194441944519446194471944819449194501945119452194531945419455194561945719458194591946019461194621946319464194651946619467194681946919470194711947219473194741947519476194771947819479194801948119482194831948419485194861948719488194891949019491194921949319494194951949619497194981949919500195011950219503195041950519506195071950819509195101951119512195131951419515195161951719518195191952019521195221952319524195251952619527195281952919530195311953219533195341953519536195371953819539195401954119542195431954419545195461954719548195491955019551195521955319554195551955619557195581955919560195611956219563195641956519566195671956819569195701957119572195731957419575195761957719578195791958019581195821958319584195851958619587195881958919590195911959219593195941959519596195971959819599196001960119602196031960419605196061960719608196091961019611196121961319614196151961619617196181961919620196211962219623196241962519626196271962819629196301963119632196331963419635196361963719638196391964019641196421964319644196451964619647196481964919650196511965219653196541965519656196571965819659196601966119662196631966419665196661966719668196691967019671196721967319674196751967619677196781967919680196811968219683196841968519686196871968819689196901969119692196931969419695196961969719698196991970019701197021970319704197051970619707197081970919710197111971219713197141971519716197171971819719197201972119722197231972419725197261972719728197291973019731197321973319734197351973619737197381973919740197411974219743197441974519746197471974819749197501975119752197531975419755197561975719758197591976019761197621976319764197651976619767197681976919770197711977219773197741977519776197771977819779197801978119782197831978419785197861978719788197891979019791197921979319794197951979619797197981979919800198011980219803198041980519806198071980819809198101981119812198131981419815198161981719818198191982019821198221982319824198251982619827198281982919830198311983219833198341983519836198371983819839198401984119842198431984419845198461984719848198491985019851198521985319854198551985619857198581985919860198611986219863198641986519866198671986819869198701987119872198731987419875198761987719878198791988019881198821988319884198851988619887198881988919890198911989219893198941989519896198971989819899199001990119902199031990419905199061990719908199091991019911199121991319914199151991619917199181991919920199211992219923199241992519926199271992819929199301993119932199331993419935199361993719938199391994019941199421994319944199451994619947199481994919950199511995219953199541995519956199571995819959199601996119962199631996419965199661996719968199691997019971199721997319974199751997619977199781997919980199811998219983199841998519986199871998819989199901999119992199931999419995199961999719998199992000020001200022000320004200052000620007200082000920010200112001220013200142001520016200172001820019200202002120022200232002420025200262002720028200292003020031200322003320034200352003620037200382003920040200412004220043200442004520046200472004820049200502005120052200532005420055200562005720058200592006020061200622006320064200652006620067200682006920070200712007220073200742007520076200772007820079200802008120082200832008420085200862008720088200892009020091200922009320094200952009620097200982009920100201012010220103201042010520106201072010820109201102011120112201132011420115201162011720118201192012020121201222012320124201252012620127201282012920130201312013220133201342013520136201372013820139201402014120142201432014420145201462014720148201492015020151201522015320154201552015620157201582015920160201612016220163201642016520166201672016820169201702017120172201732017420175201762017720178201792018020181201822018320184201852018620187201882018920190201912019220193201942019520196201972019820199202002020120202202032020420205202062020720208202092021020211202122021320214202152021620217202182021920220202212022220223202242022520226202272022820229202302023120232202332023420235202362023720238202392024020241202422024320244202452024620247202482024920250202512025220253202542025520256202572025820259202602026120262202632026420265202662026720268202692027020271202722027320274202752027620277202782027920280202812028220283202842028520286202872028820289202902029120292202932029420295202962029720298202992030020301203022030320304203052030620307203082030920310203112031220313203142031520316203172031820319203202032120322203232032420325203262032720328203292033020331203322033320334203352033620337203382033920340203412034220343203442034520346203472034820349203502035120352203532035420355203562035720358203592036020361203622036320364203652036620367203682036920370203712037220373203742037520376203772037820379203802038120382203832038420385203862038720388203892039020391203922039320394203952039620397203982039920400204012040220403204042040520406204072040820409204102041120412204132041420415204162041720418204192042020421204222042320424204252042620427204282042920430204312043220433204342043520436204372043820439204402044120442204432044420445204462044720448204492045020451204522045320454204552045620457204582045920460204612046220463204642046520466204672046820469204702047120472204732047420475204762047720478204792048020481204822048320484204852048620487204882048920490204912049220493204942049520496204972049820499205002050120502205032050420505205062050720508205092051020511205122051320514205152051620517205182051920520205212052220523205242052520526205272052820529205302053120532205332053420535205362053720538205392054020541205422054320544205452054620547205482054920550205512055220553205542055520556205572055820559205602056120562205632056420565205662056720568205692057020571205722057320574205752057620577205782057920580205812058220583205842058520586205872058820589205902059120592205932059420595205962059720598205992060020601206022060320604206052060620607206082060920610206112061220613206142061520616206172061820619206202062120622206232062420625206262062720628206292063020631206322063320634206352063620637206382063920640206412064220643206442064520646206472064820649206502065120652206532065420655206562065720658206592066020661206622066320664206652066620667206682066920670206712067220673206742067520676206772067820679206802068120682206832068420685206862068720688206892069020691206922069320694206952069620697206982069920700207012070220703207042070520706207072070820709207102071120712207132071420715207162071720718207192072020721207222072320724207252072620727207282072920730207312073220733207342073520736207372073820739207402074120742207432074420745207462074720748207492075020751207522075320754207552075620757207582075920760207612076220763207642076520766207672076820769207702077120772207732077420775207762077720778207792078020781207822078320784207852078620787207882078920790207912079220793207942079520796207972079820799208002080120802208032080420805208062080720808208092081020811208122081320814208152081620817208182081920820208212082220823208242082520826208272082820829208302083120832208332083420835208362083720838208392084020841208422084320844208452084620847208482084920850208512085220853208542085520856208572085820859208602086120862208632086420865208662086720868208692087020871208722087320874208752087620877208782087920880208812088220883208842088520886208872088820889208902089120892208932089420895208962089720898208992090020901209022090320904209052090620907209082090920910209112091220913209142091520916209172091820919209202092120922209232092420925209262092720928209292093020931209322093320934209352093620937209382093920940209412094220943209442094520946209472094820949209502095120952209532095420955209562095720958209592096020961209622096320964209652096620967209682096920970209712097220973209742097520976209772097820979209802098120982209832098420985209862098720988209892099020991209922099320994209952099620997209982099921000210012100221003210042100521006210072100821009210102101121012210132101421015210162101721018210192102021021210222102321024210252102621027210282102921030210312103221033210342103521036210372103821039210402104121042210432104421045210462104721048210492105021051210522105321054210552105621057210582105921060210612106221063210642106521066210672106821069210702107121072210732107421075210762107721078210792108021081210822108321084210852108621087210882108921090210912109221093210942109521096210972109821099211002110121102211032110421105211062110721108211092111021111211122111321114211152111621117211182111921120211212112221123211242112521126211272112821129211302113121132211332113421135211362113721138211392114021141211422114321144211452114621147211482114921150211512115221153211542115521156211572115821159211602116121162211632116421165211662116721168211692117021171211722117321174211752117621177211782117921180211812118221183211842118521186211872118821189211902119121192211932119421195211962119721198211992120021201212022120321204212052120621207212082120921210212112121221213212142121521216212172121821219212202122121222212232122421225212262122721228212292123021231212322123321234212352123621237212382123921240212412124221243212442124521246212472124821249212502125121252212532125421255212562125721258212592126021261212622126321264212652126621267212682126921270212712127221273212742127521276212772127821279212802128121282212832128421285212862128721288212892129021291212922129321294212952129621297212982129921300213012130221303213042130521306213072130821309213102131121312213132131421315213162131721318213192132021321213222132321324213252132621327213282132921330213312133221333213342133521336213372133821339213402134121342213432134421345213462134721348213492135021351213522135321354213552135621357213582135921360213612136221363213642136521366213672136821369213702137121372213732137421375213762137721378213792138021381213822138321384213852138621387213882138921390213912139221393213942139521396213972139821399214002140121402214032140421405214062140721408214092141021411214122141321414214152141621417214182141921420214212142221423214242142521426214272142821429214302143121432214332143421435214362143721438214392144021441214422144321444214452144621447214482144921450214512145221453214542145521456214572145821459214602146121462214632146421465214662146721468214692147021471214722147321474214752147621477214782147921480214812148221483214842148521486214872148821489214902149121492214932149421495214962149721498214992150021501215022150321504215052150621507215082150921510215112151221513215142151521516215172151821519215202152121522215232152421525215262152721528215292153021531215322153321534215352153621537215382153921540215412154221543215442154521546215472154821549215502155121552215532155421555215562155721558215592156021561215622156321564215652156621567215682156921570215712157221573215742157521576215772157821579215802158121582215832158421585215862158721588215892159021591215922159321594215952159621597215982159921600216012160221603216042160521606216072160821609216102161121612216132161421615216162161721618216192162021621216222162321624216252162621627216282162921630216312163221633216342163521636216372163821639216402164121642216432164421645216462164721648216492165021651216522165321654216552165621657216582165921660216612166221663216642166521666216672166821669216702167121672216732167421675216762167721678216792168021681216822168321684216852168621687216882168921690216912169221693216942169521696216972169821699217002170121702217032170421705217062170721708217092171021711217122171321714217152171621717217182171921720217212172221723217242172521726217272172821729217302173121732217332173421735217362173721738217392174021741217422174321744217452174621747217482174921750217512175221753217542175521756217572175821759217602176121762217632176421765217662176721768217692177021771217722177321774217752177621777217782177921780217812178221783217842178521786217872178821789217902179121792217932179421795217962179721798217992180021801218022180321804218052180621807218082180921810218112181221813218142181521816218172181821819218202182121822218232182421825218262182721828218292183021831218322183321834218352183621837218382183921840218412184221843218442184521846218472184821849218502185121852218532185421855218562185721858218592186021861218622186321864218652186621867218682186921870218712187221873218742187521876218772187821879218802188121882218832188421885218862188721888218892189021891218922189321894218952189621897218982189921900219012190221903219042190521906219072190821909219102191121912219132191421915219162191721918219192192021921219222192321924219252192621927219282192921930219312193221933219342193521936219372193821939219402194121942219432194421945219462194721948219492195021951219522195321954219552195621957219582195921960219612196221963219642196521966219672196821969219702197121972219732197421975219762197721978219792198021981219822198321984219852198621987219882198921990219912199221993219942199521996219972199821999220002200122002220032200422005220062200722008220092201022011220122201322014220152201622017220182201922020220212202222023220242202522026220272202822029220302203122032220332203422035220362203722038220392204022041220422204322044220452204622047220482204922050220512205222053220542205522056220572205822059220602206122062220632206422065220662206722068220692207022071220722207322074220752207622077220782207922080220812208222083220842208522086220872208822089220902209122092220932209422095220962209722098220992210022101221022210322104221052210622107221082210922110221112211222113221142211522116221172211822119221202212122122221232212422125221262212722128221292213022131221322213322134221352213622137221382213922140221412214222143221442214522146221472214822149221502215122152221532215422155221562215722158221592216022161221622216322164221652216622167221682216922170221712217222173221742217522176221772217822179221802218122182221832218422185221862218722188221892219022191221922219322194221952219622197221982219922200222012220222203222042220522206222072220822209222102221122212222132221422215222162221722218222192222022221222222222322224222252222622227222282222922230222312223222233222342223522236222372223822239222402224122242222432224422245222462224722248222492225022251222522225322254222552225622257222582225922260222612226222263222642226522266222672226822269222702227122272222732227422275222762227722278222792228022281222822228322284222852228622287222882228922290222912229222293222942229522296222972229822299223002230122302223032230422305223062230722308223092231022311223122231322314223152231622317223182231922320223212232222323223242232522326223272232822329223302233122332223332233422335223362233722338223392234022341223422234322344223452234622347223482234922350223512235222353223542235522356223572235822359223602236122362223632236422365223662236722368223692237022371223722237322374223752237622377223782237922380223812238222383223842238522386223872238822389223902239122392223932239422395223962239722398223992240022401224022240322404224052240622407224082240922410224112241222413224142241522416224172241822419224202242122422224232242422425224262242722428224292243022431224322243322434224352243622437224382243922440224412244222443224442244522446224472244822449224502245122452224532245422455224562245722458224592246022461224622246322464224652246622467224682246922470224712247222473224742247522476224772247822479224802248122482224832248422485224862248722488224892249022491224922249322494224952249622497224982249922500225012250222503225042250522506225072250822509225102251122512225132251422515225162251722518225192252022521225222252322524225252252622527225282252922530225312253222533225342253522536225372253822539225402254122542225432254422545225462254722548225492255022551225522255322554225552255622557225582255922560225612256222563225642256522566225672256822569225702257122572225732257422575225762257722578225792258022581225822258322584225852258622587225882258922590225912259222593225942259522596225972259822599226002260122602226032260422605226062260722608226092261022611226122261322614226152261622617226182261922620226212262222623226242262522626226272262822629226302263122632226332263422635226362263722638226392264022641226422264322644226452264622647226482264922650226512265222653226542265522656226572265822659226602266122662226632266422665226662266722668226692267022671226722267322674226752267622677226782267922680226812268222683226842268522686226872268822689226902269122692226932269422695226962269722698226992270022701227022270322704227052270622707227082270922710227112271222713227142271522716227172271822719227202272122722227232272422725227262272722728227292273022731227322273322734227352273622737227382273922740227412274222743227442274522746227472274822749227502275122752227532275422755227562275722758227592276022761227622276322764227652276622767227682276922770227712277222773227742277522776227772277822779227802278122782227832278422785227862278722788227892279022791227922279322794227952279622797227982279922800228012280222803228042280522806228072280822809228102281122812228132281422815228162281722818228192282022821228222282322824228252282622827228282282922830228312283222833228342283522836228372283822839228402284122842228432284422845228462284722848228492285022851228522285322854228552285622857228582285922860228612286222863228642286522866228672286822869228702287122872228732287422875228762287722878228792288022881228822288322884228852288622887228882288922890228912289222893228942289522896228972289822899229002290122902229032290422905229062290722908229092291022911229122291322914229152291622917229182291922920229212292222923229242292522926229272292822929229302293122932229332293422935229362293722938229392294022941229422294322944229452294622947229482294922950229512295222953229542295522956229572295822959229602296122962229632296422965229662296722968229692297022971229722297322974229752297622977229782297922980229812298222983229842298522986229872298822989229902299122992229932299422995229962299722998229992300023001230022300323004230052300623007230082300923010230112301223013230142301523016230172301823019230202302123022230232302423025230262302723028230292303023031230322303323034230352303623037230382303923040230412304223043230442304523046230472304823049230502305123052230532305423055230562305723058230592306023061230622306323064230652306623067230682306923070230712307223073230742307523076230772307823079230802308123082230832308423085230862308723088230892309023091230922309323094230952309623097230982309923100231012310223103231042310523106231072310823109231102311123112231132311423115231162311723118231192312023121231222312323124231252312623127231282312923130231312313223133231342313523136231372313823139231402314123142231432314423145231462314723148231492315023151231522315323154231552315623157231582315923160231612316223163231642316523166231672316823169231702317123172231732317423175231762317723178231792318023181231822318323184231852318623187231882318923190231912319223193231942319523196231972319823199232002320123202232032320423205232062320723208232092321023211232122321323214232152321623217232182321923220232212322223223232242322523226232272322823229232302323123232232332323423235232362323723238232392324023241232422324323244232452324623247232482324923250232512325223253232542325523256232572325823259232602326123262232632326423265232662326723268232692327023271232722327323274232752327623277232782327923280232812328223283232842328523286232872328823289232902329123292232932329423295232962329723298232992330023301233022330323304233052330623307233082330923310233112331223313233142331523316233172331823319233202332123322233232332423325233262332723328233292333023331233322333323334233352333623337233382333923340233412334223343233442334523346233472334823349233502335123352233532335423355233562335723358233592336023361233622336323364233652336623367233682336923370233712337223373233742337523376233772337823379233802338123382233832338423385233862338723388233892339023391233922339323394233952339623397233982339923400234012340223403234042340523406234072340823409234102341123412234132341423415234162341723418234192342023421234222342323424234252342623427234282342923430234312343223433234342343523436234372343823439234402344123442234432344423445234462344723448234492345023451234522345323454234552345623457234582345923460234612346223463234642346523466234672346823469234702347123472234732347423475234762347723478234792348023481234822348323484234852348623487234882348923490234912349223493234942349523496234972349823499235002350123502235032350423505235062350723508235092351023511235122351323514235152351623517235182351923520235212352223523235242352523526235272352823529235302353123532235332353423535235362353723538235392354023541235422354323544235452354623547235482354923550235512355223553235542355523556235572355823559235602356123562235632356423565235662356723568235692357023571235722357323574235752357623577235782357923580235812358223583235842358523586235872358823589235902359123592235932359423595235962359723598235992360023601236022360323604236052360623607236082360923610236112361223613236142361523616236172361823619236202362123622236232362423625236262362723628236292363023631236322363323634236352363623637236382363923640236412364223643236442364523646236472364823649236502365123652236532365423655236562365723658236592366023661236622366323664236652366623667236682366923670236712367223673236742367523676236772367823679236802368123682236832368423685236862368723688236892369023691236922369323694236952369623697236982369923700237012370223703237042370523706237072370823709237102371123712237132371423715237162371723718237192372023721237222372323724237252372623727237282372923730237312373223733237342373523736237372373823739237402374123742237432374423745237462374723748237492375023751237522375323754237552375623757237582375923760237612376223763237642376523766237672376823769237702377123772237732377423775237762377723778237792378023781237822378323784237852378623787237882378923790237912379223793237942379523796237972379823799238002380123802238032380423805238062380723808238092381023811238122381323814238152381623817238182381923820238212382223823238242382523826238272382823829238302383123832238332383423835238362383723838238392384023841238422384323844238452384623847238482384923850238512385223853238542385523856238572385823859238602386123862238632386423865238662386723868238692387023871238722387323874238752387623877238782387923880238812388223883238842388523886238872388823889238902389123892238932389423895238962389723898238992390023901239022390323904239052390623907239082390923910239112391223913239142391523916239172391823919239202392123922239232392423925239262392723928239292393023931239322393323934239352393623937239382393923940239412394223943239442394523946239472394823949239502395123952239532395423955239562395723958239592396023961239622396323964239652396623967239682396923970239712397223973239742397523976239772397823979239802398123982239832398423985239862398723988239892399023991239922399323994239952399623997239982399924000240012400224003240042400524006240072400824009240102401124012240132401424015240162401724018240192402024021240222402324024240252402624027240282402924030240312403224033240342403524036240372403824039240402404124042240432404424045240462404724048240492405024051240522405324054240552405624057240582405924060240612406224063240642406524066240672406824069240702407124072240732407424075240762407724078240792408024081240822408324084240852408624087240882408924090240912409224093240942409524096240972409824099241002410124102241032410424105241062410724108241092411024111241122411324114241152411624117241182411924120241212412224123241242412524126241272412824129241302413124132241332413424135241362413724138241392414024141241422414324144241452414624147241482414924150241512415224153241542415524156241572415824159241602416124162241632416424165241662416724168241692417024171241722417324174241752417624177241782417924180241812418224183241842418524186241872418824189241902419124192241932419424195241962419724198241992420024201242022420324204242052420624207242082420924210242112421224213242142421524216242172421824219242202422124222242232422424225242262422724228242292423024231242322423324234242352423624237242382423924240242412424224243242442424524246242472424824249242502425124252242532425424255242562425724258242592426024261242622426324264242652426624267242682426924270242712427224273242742427524276242772427824279242802428124282242832428424285242862428724288242892429024291242922429324294242952429624297242982429924300243012430224303243042430524306243072430824309243102431124312243132431424315243162431724318243192432024321243222432324324243252432624327243282432924330243312433224333243342433524336243372433824339243402434124342243432434424345243462434724348243492435024351243522435324354243552435624357243582435924360243612436224363243642436524366243672436824369243702437124372243732437424375243762437724378243792438024381243822438324384243852438624387243882438924390243912439224393243942439524396243972439824399244002440124402244032440424405244062440724408244092441024411244122441324414244152441624417244182441924420244212442224423244242442524426244272442824429244302443124432244332443424435244362443724438244392444024441244422444324444244452444624447244482444924450244512445224453244542445524456244572445824459244602446124462244632446424465244662446724468244692447024471244722447324474244752447624477244782447924480244812448224483244842448524486244872448824489244902449124492244932449424495244962449724498244992450024501245022450324504245052450624507245082450924510245112451224513245142451524516245172451824519245202452124522245232452424525245262452724528245292453024531245322453324534245352453624537245382453924540245412454224543245442454524546245472454824549245502455124552245532455424555245562455724558245592456024561245622456324564245652456624567245682456924570245712457224573245742457524576245772457824579245802458124582245832458424585245862458724588245892459024591245922459324594245952459624597245982459924600246012460224603246042460524606246072460824609246102461124612246132461424615246162461724618246192462024621246222462324624246252462624627246282462924630246312463224633246342463524636246372463824639246402464124642246432464424645246462464724648246492465024651246522465324654246552465624657246582465924660246612466224663246642466524666246672466824669246702467124672246732467424675246762467724678246792468024681246822468324684246852468624687246882468924690246912469224693246942469524696246972469824699247002470124702247032470424705247062470724708247092471024711247122471324714247152471624717247182471924720247212472224723247242472524726247272472824729247302473124732247332473424735247362473724738247392474024741247422474324744247452474624747247482474924750247512475224753247542475524756247572475824759247602476124762247632476424765247662476724768247692477024771247722477324774247752477624777247782477924780247812478224783247842478524786247872478824789247902479124792247932479424795247962479724798247992480024801248022480324804248052480624807248082480924810248112481224813248142481524816248172481824819248202482124822248232482424825248262482724828248292483024831248322483324834248352483624837248382483924840248412484224843248442484524846248472484824849248502485124852248532485424855248562485724858248592486024861248622486324864248652486624867248682486924870248712487224873248742487524876248772487824879248802488124882248832488424885248862488724888248892489024891248922489324894248952489624897248982489924900249012490224903249042490524906249072490824909249102491124912249132491424915249162491724918249192492024921249222492324924249252492624927249282492924930249312493224933249342493524936249372493824939249402494124942249432494424945249462494724948249492495024951249522495324954249552495624957249582495924960249612496224963249642496524966249672496824969249702497124972249732497424975249762497724978249792498024981249822498324984249852498624987249882498924990249912499224993249942499524996249972499824999250002500125002250032500425005250062500725008250092501025011250122501325014250152501625017250182501925020250212502225023250242502525026250272502825029250302503125032250332503425035250362503725038250392504025041250422504325044250452504625047250482504925050250512505225053250542505525056250572505825059250602506125062250632506425065250662506725068250692507025071250722507325074250752507625077250782507925080250812508225083250842508525086250872508825089250902509125092250932509425095250962509725098250992510025101251022510325104251052510625107251082510925110251112511225113251142511525116251172511825119251202512125122251232512425125251262512725128251292513025131251322513325134251352513625137251382513925140251412514225143251442514525146251472514825149251502515125152251532515425155251562515725158251592516025161251622516325164251652516625167251682516925170251712517225173251742517525176251772517825179251802518125182251832518425185251862518725188251892519025191251922519325194251952519625197251982519925200252012520225203252042520525206252072520825209252102521125212252132521425215252162521725218252192522025221252222522325224252252522625227252282522925230252312523225233252342523525236252372523825239252402524125242252432524425245252462524725248252492525025251252522525325254252552525625257252582525925260252612526225263252642526525266252672526825269252702527125272252732527425275252762527725278252792528025281252822528325284252852528625287252882528925290252912529225293252942529525296252972529825299253002530125302253032530425305253062530725308253092531025311253122531325314253152531625317253182531925320253212532225323253242532525326253272532825329253302533125332253332533425335253362533725338253392534025341253422534325344253452534625347253482534925350253512535225353253542535525356253572535825359253602536125362253632536425365253662536725368253692537025371253722537325374253752537625377253782537925380253812538225383253842538525386253872538825389253902539125392253932539425395253962539725398253992540025401254022540325404254052540625407254082540925410254112541225413254142541525416254172541825419254202542125422254232542425425254262542725428254292543025431254322543325434254352543625437254382543925440254412544225443254442544525446254472544825449254502545125452254532545425455254562545725458254592546025461254622546325464254652546625467254682546925470254712547225473254742547525476254772547825479254802548125482254832548425485254862548725488254892549025491254922549325494254952549625497254982549925500255012550225503255042550525506255072550825509255102551125512255132551425515255162551725518255192552025521255222552325524255252552625527255282552925530255312553225533255342553525536255372553825539255402554125542255432554425545255462554725548255492555025551255522555325554255552555625557255582555925560255612556225563255642556525566255672556825569255702557125572255732557425575255762557725578255792558025581255822558325584255852558625587255882558925590255912559225593255942559525596255972559825599256002560125602256032560425605256062560725608256092561025611256122561325614256152561625617256182561925620256212562225623256242562525626256272562825629256302563125632256332563425635256362563725638256392564025641256422564325644256452564625647256482564925650256512565225653256542565525656256572565825659256602566125662256632566425665256662566725668256692567025671256722567325674256752567625677256782567925680256812568225683256842568525686256872568825689256902569125692256932569425695256962569725698256992570025701257022570325704257052570625707257082570925710257112571225713257142571525716257172571825719257202572125722257232572425725257262572725728257292573025731257322573325734257352573625737257382573925740257412574225743257442574525746257472574825749257502575125752257532575425755257562575725758257592576025761257622576325764257652576625767257682576925770257712577225773257742577525776257772577825779257802578125782257832578425785257862578725788257892579025791257922579325794257952579625797257982579925800258012580225803258042580525806258072580825809258102581125812258132581425815258162581725818258192582025821258222582325824258252582625827258282582925830258312583225833258342583525836258372583825839258402584125842258432584425845258462584725848258492585025851258522585325854258552585625857258582585925860258612586225863258642586525866258672586825869258702587125872258732587425875258762587725878258792588025881258822588325884258852588625887258882588925890258912589225893258942589525896258972589825899259002590125902259032590425905259062590725908259092591025911259122591325914259152591625917259182591925920259212592225923259242592525926259272592825929259302593125932259332593425935259362593725938259392594025941259422594325944259452594625947259482594925950259512595225953259542595525956259572595825959259602596125962259632596425965259662596725968259692597025971259722597325974259752597625977259782597925980259812598225983259842598525986259872598825989259902599125992259932599425995259962599725998259992600026001260022600326004260052600626007260082600926010260112601226013260142601526016260172601826019260202602126022260232602426025260262602726028260292603026031260322603326034260352603626037260382603926040260412604226043260442604526046260472604826049260502605126052260532605426055260562605726058260592606026061260622606326064260652606626067260682606926070260712607226073260742607526076260772607826079260802608126082260832608426085260862608726088260892609026091260922609326094260952609626097260982609926100261012610226103261042610526106261072610826109261102611126112261132611426115261162611726118261192612026121261222612326124261252612626127261282612926130261312613226133261342613526136261372613826139261402614126142261432614426145261462614726148261492615026151261522615326154261552615626157261582615926160261612616226163261642616526166261672616826169261702617126172261732617426175261762617726178261792618026181261822618326184261852618626187261882618926190261912619226193261942619526196261972619826199262002620126202262032620426205262062620726208262092621026211262122621326214262152621626217262182621926220262212622226223262242622526226262272622826229262302623126232262332623426235262362623726238262392624026241262422624326244262452624626247262482624926250262512625226253262542625526256262572625826259262602626126262262632626426265262662626726268262692627026271262722627326274262752627626277262782627926280262812628226283262842628526286262872628826289262902629126292262932629426295262962629726298262992630026301263022630326304263052630626307263082630926310263112631226313263142631526316263172631826319263202632126322263232632426325263262632726328263292633026331263322633326334263352633626337263382633926340263412634226343263442634526346263472634826349263502635126352263532635426355263562635726358263592636026361263622636326364263652636626367263682636926370263712637226373263742637526376263772637826379263802638126382263832638426385263862638726388263892639026391263922639326394263952639626397263982639926400264012640226403264042640526406264072640826409264102641126412264132641426415264162641726418264192642026421264222642326424264252642626427264282642926430264312643226433264342643526436264372643826439264402644126442264432644426445264462644726448264492645026451264522645326454264552645626457264582645926460264612646226463264642646526466264672646826469264702647126472264732647426475264762647726478264792648026481264822648326484264852648626487264882648926490264912649226493264942649526496264972649826499265002650126502265032650426505265062650726508265092651026511265122651326514265152651626517265182651926520265212652226523265242652526526265272652826529265302653126532265332653426535265362653726538265392654026541265422654326544265452654626547265482654926550265512655226553265542655526556265572655826559265602656126562265632656426565265662656726568265692657026571265722657326574265752657626577265782657926580265812658226583265842658526586265872658826589265902659126592265932659426595265962659726598265992660026601266022660326604266052660626607266082660926610266112661226613266142661526616266172661826619266202662126622266232662426625266262662726628266292663026631266322663326634266352663626637266382663926640
  1. diff --git a/Documentation/hwlat_detector.txt b/Documentation/hwlat_detector.txt
  2. new file mode 100644
  3. index 000000000000..cb61516483d3
  4. --- /dev/null
  5. +++ b/Documentation/hwlat_detector.txt
  6. @@ -0,0 +1,64 @@
  7. +Introduction:
  8. +-------------
  9. +
  10. +The module hwlat_detector is a special purpose kernel module that is used to
  11. +detect large system latencies induced by the behavior of certain underlying
  12. +hardware or firmware, independent of Linux itself. The code was developed
  13. +originally to detect SMIs (System Management Interrupts) on x86 systems,
  14. +however there is nothing x86 specific about this patchset. It was
  15. +originally written for use by the "RT" patch since the Real Time
  16. +kernel is highly latency sensitive.
  17. +
  18. +SMIs are usually not serviced by the Linux kernel, which typically does not
  19. +even know that they are occuring. SMIs are instead are set up by BIOS code
  20. +and are serviced by BIOS code, usually for "critical" events such as
  21. +management of thermal sensors and fans. Sometimes though, SMIs are used for
  22. +other tasks and those tasks can spend an inordinate amount of time in the
  23. +handler (sometimes measured in milliseconds). Obviously this is a problem if
  24. +you are trying to keep event service latencies down in the microsecond range.
  25. +
  26. +The hardware latency detector works by hogging all of the cpus for configurable
  27. +amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter
  28. +for some period, then looking for gaps in the TSC data. Any gap indicates a
  29. +time when the polling was interrupted and since the machine is stopped and
  30. +interrupts turned off the only thing that could do that would be an SMI.
  31. +
  32. +Note that the SMI detector should *NEVER* be used in a production environment.
  33. +It is intended to be run manually to determine if the hardware platform has a
  34. +problem with long system firmware service routines.
  35. +
  36. +Usage:
  37. +------
  38. +
  39. +Loading the module hwlat_detector passing the parameter "enabled=1" (or by
  40. +setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only
  41. +step required to start the hwlat_detector. It is possible to redefine the
  42. +threshold in microseconds (us) above which latency spikes will be taken
  43. +into account (parameter "threshold=").
  44. +
  45. +Example:
  46. +
  47. + # modprobe hwlat_detector enabled=1 threshold=100
  48. +
  49. +After the module is loaded, it creates a directory named "hwlat_detector" under
  50. +the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary
  51. +to have debugfs mounted, which might be on /sys/debug on your system.
  52. +
  53. +The /debug/hwlat_detector interface contains the following files:
  54. +
  55. +count - number of latency spikes observed since last reset
  56. +enable - a global enable/disable toggle (0/1), resets count
  57. +max - maximum hardware latency actually observed (usecs)
  58. +sample - a pipe from which to read current raw sample data
  59. + in the format <timestamp> <latency observed usecs>
  60. + (can be opened O_NONBLOCK for a single sample)
  61. +threshold - minimum latency value to be considered (usecs)
  62. +width - time period to sample with CPUs held (usecs)
  63. + must be less than the total window size (enforced)
  64. +window - total period of sampling, width being inside (usecs)
  65. +
  66. +By default we will set width to 500,000 and window to 1,000,000, meaning that
  67. +we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we
  68. +observe any latencies that exceed the threshold (initially 100 usecs),
  69. +then we write to a global sample ring buffer of 8K samples, which is
  70. +consumed by reading from the "sample" (pipe) debugfs file interface.
  71. diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
  72. index 13f5619b2203..f64d075ba647 100644
  73. --- a/Documentation/sysrq.txt
  74. +++ b/Documentation/sysrq.txt
  75. @@ -59,10 +59,17 @@ On PowerPC - Press 'ALT - Print Screen (or F13) - <command key>,
  76. On other - If you know of the key combos for other architectures, please
  77. let me know so I can add them to this section.
  78. -On all - write a character to /proc/sysrq-trigger. e.g.:
  79. -
  80. +On all - write a character to /proc/sysrq-trigger, e.g.:
  81. echo t > /proc/sysrq-trigger
  82. +On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g.
  83. + echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq
  84. + Send an ICMP echo request with this pattern plus the particular
  85. + SysRq command key. Example:
  86. + # ping -c1 -s57 -p0102030468
  87. + will trigger the SysRq-H (help) command.
  88. +
  89. +
  90. * What are the 'command' keys?
  91. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  92. 'b' - Will immediately reboot the system without syncing or unmounting
  93. diff --git a/Documentation/trace/histograms.txt b/Documentation/trace/histograms.txt
  94. new file mode 100644
  95. index 000000000000..6f2aeabf7faa
  96. --- /dev/null
  97. +++ b/Documentation/trace/histograms.txt
  98. @@ -0,0 +1,186 @@
  99. + Using the Linux Kernel Latency Histograms
  100. +
  101. +
  102. +This document gives a short explanation how to enable, configure and use
  103. +latency histograms. Latency histograms are primarily relevant in the
  104. +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT)
  105. +and are used in the quality management of the Linux real-time
  106. +capabilities.
  107. +
  108. +
  109. +* Purpose of latency histograms
  110. +
  111. +A latency histogram continuously accumulates the frequencies of latency
  112. +data. There are two types of histograms
  113. +- potential sources of latencies
  114. +- effective latencies
  115. +
  116. +
  117. +* Potential sources of latencies
  118. +
  119. +Potential sources of latencies are code segments where interrupts,
  120. +preemption or both are disabled (aka critical sections). To create
  121. +histograms of potential sources of latency, the kernel stores the time
  122. +stamp at the start of a critical section, determines the time elapsed
  123. +when the end of the section is reached, and increments the frequency
  124. +counter of that latency value - irrespective of whether any concurrently
  125. +running process is affected by latency or not.
  126. +- Configuration items (in the Kernel hacking/Tracers submenu)
  127. + CONFIG_INTERRUPT_OFF_LATENCY
  128. + CONFIG_PREEMPT_OFF_LATENCY
  129. +
  130. +
  131. +* Effective latencies
  132. +
  133. +Effective latencies are actually occuring during wakeup of a process. To
  134. +determine effective latencies, the kernel stores the time stamp when a
  135. +process is scheduled to be woken up, and determines the duration of the
  136. +wakeup time shortly before control is passed over to this process. Note
  137. +that the apparent latency in user space may be somewhat longer, since the
  138. +process may be interrupted after control is passed over to it but before
  139. +the execution in user space takes place. Simply measuring the interval
  140. +between enqueuing and wakeup may also not appropriate in cases when a
  141. +process is scheduled as a result of a timer expiration. The timer may have
  142. +missed its deadline, e.g. due to disabled interrupts, but this latency
  143. +would not be registered. Therefore, the offsets of missed timers are
  144. +recorded in a separate histogram. If both wakeup latency and missed timer
  145. +offsets are configured and enabled, a third histogram may be enabled that
  146. +records the overall latency as a sum of the timer latency, if any, and the
  147. +wakeup latency. This histogram is called "timerandwakeup".
  148. +- Configuration items (in the Kernel hacking/Tracers submenu)
  149. + CONFIG_WAKEUP_LATENCY
  150. + CONFIG_MISSED_TIMER_OFSETS
  151. +
  152. +
  153. +* Usage
  154. +
  155. +The interface to the administration of the latency histograms is located
  156. +in the debugfs file system. To mount it, either enter
  157. +
  158. +mount -t sysfs nodev /sys
  159. +mount -t debugfs nodev /sys/kernel/debug
  160. +
  161. +from shell command line level, or add
  162. +
  163. +nodev /sys sysfs defaults 0 0
  164. +nodev /sys/kernel/debug debugfs defaults 0 0
  165. +
  166. +to the file /etc/fstab. All latency histogram related files are then
  167. +available in the directory /sys/kernel/debug/tracing/latency_hist. A
  168. +particular histogram type is enabled by writing non-zero to the related
  169. +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory.
  170. +Select "preemptirqsoff" for the histograms of potential sources of
  171. +latencies and "wakeup" for histograms of effective latencies etc. The
  172. +histogram data - one per CPU - are available in the files
  173. +
  174. +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx
  175. +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx
  176. +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx
  177. +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx
  178. +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx
  179. +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx
  180. +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx
  181. +
  182. +The histograms are reset by writing non-zero to the file "reset" in a
  183. +particular latency directory. To reset all latency data, use
  184. +
  185. +#!/bin/sh
  186. +
  187. +TRACINGDIR=/sys/kernel/debug/tracing
  188. +HISTDIR=$TRACINGDIR/latency_hist
  189. +
  190. +if test -d $HISTDIR
  191. +then
  192. + cd $HISTDIR
  193. + for i in `find . | grep /reset$`
  194. + do
  195. + echo 1 >$i
  196. + done
  197. +fi
  198. +
  199. +
  200. +* Data format
  201. +
  202. +Latency data are stored with a resolution of one microsecond. The
  203. +maximum latency is 10,240 microseconds. The data are only valid, if the
  204. +overflow register is empty. Every output line contains the latency in
  205. +microseconds in the first row and the number of samples in the second
  206. +row. To display only lines with a positive latency count, use, for
  207. +example,
  208. +
  209. +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0
  210. +
  211. +#Minimum latency: 0 microseconds.
  212. +#Average latency: 0 microseconds.
  213. +#Maximum latency: 25 microseconds.
  214. +#Total samples: 3104770694
  215. +#There are 0 samples greater or equal than 10240 microseconds
  216. +#usecs samples
  217. + 0 2984486876
  218. + 1 49843506
  219. + 2 58219047
  220. + 3 5348126
  221. + 4 2187960
  222. + 5 3388262
  223. + 6 959289
  224. + 7 208294
  225. + 8 40420
  226. + 9 4485
  227. + 10 14918
  228. + 11 18340
  229. + 12 25052
  230. + 13 19455
  231. + 14 5602
  232. + 15 969
  233. + 16 47
  234. + 17 18
  235. + 18 14
  236. + 19 1
  237. + 20 3
  238. + 21 2
  239. + 22 5
  240. + 23 2
  241. + 25 1
  242. +
  243. +
  244. +* Wakeup latency of a selected process
  245. +
  246. +To only collect wakeup latency data of a particular process, write the
  247. +PID of the requested process to
  248. +
  249. +/sys/kernel/debug/tracing/latency_hist/wakeup/pid
  250. +
  251. +PIDs are not considered, if this variable is set to 0.
  252. +
  253. +
  254. +* Details of the process with the highest wakeup latency so far
  255. +
  256. +Selected data of the process that suffered from the highest wakeup
  257. +latency that occurred in a particular CPU are available in the file
  258. +
  259. +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx.
  260. +
  261. +In addition, other relevant system data at the time when the
  262. +latency occurred are given.
  263. +
  264. +The format of the data is (all in one line):
  265. +<PID> <Priority> <Latency> (<Timeroffset>) <Command> \
  266. +<- <PID> <Priority> <Command> <Timestamp>
  267. +
  268. +The value of <Timeroffset> is only relevant in the combined timer
  269. +and wakeup latency recording. In the wakeup recording, it is
  270. +always 0, in the missed_timer_offsets recording, it is the same
  271. +as <Latency>.
  272. +
  273. +When retrospectively searching for the origin of a latency and
  274. +tracing was not enabled, it may be helpful to know the name and
  275. +some basic data of the task that (finally) was switching to the
  276. +late real-tlme task. In addition to the victim's data, also the
  277. +data of the possible culprit are therefore displayed after the
  278. +"<-" symbol.
  279. +
  280. +Finally, the timestamp of the time when the latency occurred
  281. +in <seconds>.<microseconds> after the most recent system boot
  282. +is provided.
  283. +
  284. +These data are also reset when the wakeup histogram is reset.
  285. diff --git a/arch/Kconfig b/arch/Kconfig
  286. index 81869a5e7e17..54cfbcbdaa02 100644
  287. --- a/arch/Kconfig
  288. +++ b/arch/Kconfig
  289. @@ -9,6 +9,7 @@ config OPROFILE
  290. tristate "OProfile system profiling"
  291. depends on PROFILING
  292. depends on HAVE_OPROFILE
  293. + depends on !PREEMPT_RT_FULL
  294. select RING_BUFFER
  295. select RING_BUFFER_ALLOW_SWAP
  296. help
  297. @@ -52,6 +53,7 @@ config KPROBES
  298. config JUMP_LABEL
  299. bool "Optimize very unlikely/likely branches"
  300. depends on HAVE_ARCH_JUMP_LABEL
  301. + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST)
  302. help
  303. This option enables a transparent branch optimization that
  304. makes certain almost-always-true or almost-always-false branch
  305. diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
  306. index cdfa6c2b7626..fa885f5eb8c9 100644
  307. --- a/arch/arm/Kconfig
  308. +++ b/arch/arm/Kconfig
  309. @@ -35,7 +35,7 @@ config ARM
  310. select HARDIRQS_SW_RESEND
  311. select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
  312. select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
  313. - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
  314. + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT_BASE
  315. select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
  316. select HAVE_ARCH_MMAP_RND_BITS if MMU
  317. select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
  318. @@ -71,6 +71,7 @@ config ARM
  319. select HAVE_PERF_EVENTS
  320. select HAVE_PERF_REGS
  321. select HAVE_PERF_USER_STACK_DUMP
  322. + select HAVE_PREEMPT_LAZY
  323. select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
  324. select HAVE_REGS_AND_STACK_ACCESS_API
  325. select HAVE_SYSCALL_TRACEPOINTS
  326. diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h
  327. index 12ebfcc1d539..c962084605bc 100644
  328. --- a/arch/arm/include/asm/switch_to.h
  329. +++ b/arch/arm/include/asm/switch_to.h
  330. @@ -3,6 +3,13 @@
  331. #include <linux/thread_info.h>
  332. +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
  333. +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
  334. +#else
  335. +static inline void
  336. +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
  337. +#endif
  338. +
  339. /*
  340. * For v7 SMP cores running a preemptible kernel we may be pre-empted
  341. * during a TLB maintenance operation, so execute an inner-shareable dsb
  342. @@ -25,6 +32,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info
  343. #define switch_to(prev,next,last) \
  344. do { \
  345. __complete_pending_tlbi(); \
  346. + switch_kmaps(prev, next); \
  347. last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
  348. } while (0)
  349. diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
  350. index 776757d1604a..1f36a4eccc72 100644
  351. --- a/arch/arm/include/asm/thread_info.h
  352. +++ b/arch/arm/include/asm/thread_info.h
  353. @@ -49,6 +49,7 @@ struct cpu_context_save {
  354. struct thread_info {
  355. unsigned long flags; /* low level flags */
  356. int preempt_count; /* 0 => preemptable, <0 => bug */
  357. + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
  358. mm_segment_t addr_limit; /* address limit */
  359. struct task_struct *task; /* main task structure */
  360. __u32 cpu; /* cpu */
  361. @@ -142,7 +143,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
  362. #define TIF_SYSCALL_TRACE 4 /* syscall trace active */
  363. #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */
  364. #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
  365. -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */
  366. +#define TIF_SECCOMP 8 /* seccomp syscall filtering active */
  367. +#define TIF_NEED_RESCHED_LAZY 7
  368. #define TIF_NOHZ 12 /* in adaptive nohz mode */
  369. #define TIF_USING_IWMMXT 17
  370. @@ -152,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
  371. #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
  372. #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
  373. #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
  374. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  375. #define _TIF_UPROBE (1 << TIF_UPROBE)
  376. #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
  377. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  378. @@ -167,7 +170,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
  379. * Change these and you break ASM code in entry-common.S
  380. */
  381. #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
  382. - _TIF_NOTIFY_RESUME | _TIF_UPROBE)
  383. + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  384. + _TIF_NEED_RESCHED_LAZY)
  385. #endif /* __KERNEL__ */
  386. #endif /* __ASM_ARM_THREAD_INFO_H */
  387. diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
  388. index 27d05813ff09..5a3742215cde 100644
  389. --- a/arch/arm/kernel/asm-offsets.c
  390. +++ b/arch/arm/kernel/asm-offsets.c
  391. @@ -65,6 +65,7 @@ int main(void)
  392. BLANK();
  393. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  394. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  395. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  396. DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
  397. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  398. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  399. diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
  400. index e2550500486d..3125de9e9783 100644
  401. --- a/arch/arm/kernel/entry-armv.S
  402. +++ b/arch/arm/kernel/entry-armv.S
  403. @@ -215,11 +215,18 @@ ENDPROC(__dabt_svc)
  404. #ifdef CONFIG_PREEMPT
  405. get_thread_info tsk
  406. ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
  407. - ldr r0, [tsk, #TI_FLAGS] @ get flags
  408. teq r8, #0 @ if preempt count != 0
  409. + bne 1f @ return from exeption
  410. + ldr r0, [tsk, #TI_FLAGS] @ get flags
  411. + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
  412. + blne svc_preempt @ preempt!
  413. +
  414. + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
  415. + teq r8, #0 @ if preempt lazy count != 0
  416. movne r0, #0 @ force flags to 0
  417. - tst r0, #_TIF_NEED_RESCHED
  418. + tst r0, #_TIF_NEED_RESCHED_LAZY
  419. blne svc_preempt
  420. +1:
  421. #endif
  422. svc_exit r5, irq = 1 @ return from exception
  423. @@ -234,8 +241,14 @@ ENDPROC(__irq_svc)
  424. 1: bl preempt_schedule_irq @ irq en/disable is done inside
  425. ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
  426. tst r0, #_TIF_NEED_RESCHED
  427. + bne 1b
  428. + tst r0, #_TIF_NEED_RESCHED_LAZY
  429. reteq r8 @ go again
  430. - b 1b
  431. + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
  432. + teq r0, #0 @ if preempt lazy count != 0
  433. + beq 1b
  434. + ret r8 @ go again
  435. +
  436. #endif
  437. __und_fault:
  438. diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
  439. index 30a7228eaceb..c3bd6cbfce4b 100644
  440. --- a/arch/arm/kernel/entry-common.S
  441. +++ b/arch/arm/kernel/entry-common.S
  442. @@ -36,7 +36,9 @@
  443. UNWIND(.cantunwind )
  444. disable_irq_notrace @ disable interrupts
  445. ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
  446. - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
  447. + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
  448. + bne fast_work_pending
  449. + tst r1, #_TIF_SECCOMP
  450. bne fast_work_pending
  451. /* perform architecture specific actions before user return */
  452. @@ -62,8 +64,11 @@ ENDPROC(ret_fast_syscall)
  453. str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
  454. disable_irq_notrace @ disable interrupts
  455. ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
  456. - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
  457. + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
  458. + bne do_slower_path
  459. + tst r1, #_TIF_SECCOMP
  460. beq no_work_pending
  461. +do_slower_path:
  462. UNWIND(.fnend )
  463. ENDPROC(ret_fast_syscall)
  464. diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
  465. index 4adfb46e3ee9..15f1d94b47c5 100644
  466. --- a/arch/arm/kernel/process.c
  467. +++ b/arch/arm/kernel/process.c
  468. @@ -319,6 +319,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
  469. }
  470. #ifdef CONFIG_MMU
  471. +/*
  472. + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not
  473. + * initialized by pgtable_page_ctor() then a coredump of the vector page will
  474. + * fail.
  475. + */
  476. +static int __init vectors_user_mapping_init_page(void)
  477. +{
  478. + struct page *page;
  479. + unsigned long addr = 0xffff0000;
  480. + pgd_t *pgd;
  481. + pud_t *pud;
  482. + pmd_t *pmd;
  483. +
  484. + pgd = pgd_offset_k(addr);
  485. + pud = pud_offset(pgd, addr);
  486. + pmd = pmd_offset(pud, addr);
  487. + page = pmd_page(*(pmd));
  488. +
  489. + pgtable_page_ctor(page);
  490. +
  491. + return 0;
  492. +}
  493. +late_initcall(vectors_user_mapping_init_page);
  494. +
  495. #ifdef CONFIG_KUSER_HELPERS
  496. /*
  497. * The vectors page is always readable from user space for the
  498. diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
  499. index 7b8f2141427b..96541e00b74a 100644
  500. --- a/arch/arm/kernel/signal.c
  501. +++ b/arch/arm/kernel/signal.c
  502. @@ -572,7 +572,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
  503. */
  504. trace_hardirqs_off();
  505. do {
  506. - if (likely(thread_flags & _TIF_NEED_RESCHED)) {
  507. + if (likely(thread_flags & (_TIF_NEED_RESCHED |
  508. + _TIF_NEED_RESCHED_LAZY))) {
  509. schedule();
  510. } else {
  511. if (unlikely(!user_mode(regs)))
  512. diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
  513. index baee70267f29..ad157ec494f9 100644
  514. --- a/arch/arm/kernel/smp.c
  515. +++ b/arch/arm/kernel/smp.c
  516. @@ -234,8 +234,6 @@ int __cpu_disable(void)
  517. flush_cache_louis();
  518. local_flush_tlb_all();
  519. - clear_tasks_mm_cpumask(cpu);
  520. -
  521. return 0;
  522. }
  523. @@ -251,6 +249,9 @@ void __cpu_die(unsigned int cpu)
  524. pr_err("CPU%u: cpu didn't die\n", cpu);
  525. return;
  526. }
  527. +
  528. + clear_tasks_mm_cpumask(cpu);
  529. +
  530. pr_notice("CPU%u: shutdown\n", cpu);
  531. /*
  532. diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
  533. index 0bee233fef9a..314cfb232a63 100644
  534. --- a/arch/arm/kernel/unwind.c
  535. +++ b/arch/arm/kernel/unwind.c
  536. @@ -93,7 +93,7 @@ extern const struct unwind_idx __start_unwind_idx[];
  537. static const struct unwind_idx *__origin_unwind_idx;
  538. extern const struct unwind_idx __stop_unwind_idx[];
  539. -static DEFINE_SPINLOCK(unwind_lock);
  540. +static DEFINE_RAW_SPINLOCK(unwind_lock);
  541. static LIST_HEAD(unwind_tables);
  542. /* Convert a prel31 symbol to an absolute address */
  543. @@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr)
  544. /* module unwind tables */
  545. struct unwind_table *table;
  546. - spin_lock_irqsave(&unwind_lock, flags);
  547. + raw_spin_lock_irqsave(&unwind_lock, flags);
  548. list_for_each_entry(table, &unwind_tables, list) {
  549. if (addr >= table->begin_addr &&
  550. addr < table->end_addr) {
  551. @@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr)
  552. break;
  553. }
  554. }
  555. - spin_unlock_irqrestore(&unwind_lock, flags);
  556. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  557. }
  558. pr_debug("%s: idx = %p\n", __func__, idx);
  559. @@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size,
  560. tab->begin_addr = text_addr;
  561. tab->end_addr = text_addr + text_size;
  562. - spin_lock_irqsave(&unwind_lock, flags);
  563. + raw_spin_lock_irqsave(&unwind_lock, flags);
  564. list_add_tail(&tab->list, &unwind_tables);
  565. - spin_unlock_irqrestore(&unwind_lock, flags);
  566. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  567. return tab;
  568. }
  569. @@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_table *tab)
  570. if (!tab)
  571. return;
  572. - spin_lock_irqsave(&unwind_lock, flags);
  573. + raw_spin_lock_irqsave(&unwind_lock, flags);
  574. list_del(&tab->list);
  575. - spin_unlock_irqrestore(&unwind_lock, flags);
  576. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  577. kfree(tab);
  578. }
  579. diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
  580. index 72b11d91ede2..34068c59d0c5 100644
  581. --- a/arch/arm/kvm/arm.c
  582. +++ b/arch/arm/kvm/arm.c
  583. @@ -582,7 +582,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
  584. * involves poking the GIC, which must be done in a
  585. * non-preemptible context.
  586. */
  587. - preempt_disable();
  588. + migrate_disable();
  589. kvm_pmu_flush_hwstate(vcpu);
  590. kvm_timer_flush_hwstate(vcpu);
  591. kvm_vgic_flush_hwstate(vcpu);
  592. @@ -603,7 +603,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
  593. kvm_pmu_sync_hwstate(vcpu);
  594. kvm_timer_sync_hwstate(vcpu);
  595. kvm_vgic_sync_hwstate(vcpu);
  596. - preempt_enable();
  597. + migrate_enable();
  598. continue;
  599. }
  600. @@ -659,7 +659,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
  601. kvm_vgic_sync_hwstate(vcpu);
  602. - preempt_enable();
  603. + migrate_enable();
  604. ret = handle_exit(vcpu, run, ret);
  605. }
  606. diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
  607. index 85c3be63d644..8a950aec7834 100644
  608. --- a/arch/arm/mach-exynos/platsmp.c
  609. +++ b/arch/arm/mach-exynos/platsmp.c
  610. @@ -229,7 +229,7 @@ static void __iomem *scu_base_addr(void)
  611. return (void __iomem *)(S5P_VA_SCU);
  612. }
  613. -static DEFINE_SPINLOCK(boot_lock);
  614. +static DEFINE_RAW_SPINLOCK(boot_lock);
  615. static void exynos_secondary_init(unsigned int cpu)
  616. {
  617. @@ -242,8 +242,8 @@ static void exynos_secondary_init(unsigned int cpu)
  618. /*
  619. * Synchronise with the boot thread.
  620. */
  621. - spin_lock(&boot_lock);
  622. - spin_unlock(&boot_lock);
  623. + raw_spin_lock(&boot_lock);
  624. + raw_spin_unlock(&boot_lock);
  625. }
  626. int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr)
  627. @@ -307,7 +307,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
  628. * Set synchronisation state between this boot processor
  629. * and the secondary one
  630. */
  631. - spin_lock(&boot_lock);
  632. + raw_spin_lock(&boot_lock);
  633. /*
  634. * The secondary processor is waiting to be released from
  635. @@ -334,7 +334,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
  636. if (timeout == 0) {
  637. printk(KERN_ERR "cpu1 power enable failed");
  638. - spin_unlock(&boot_lock);
  639. + raw_spin_unlock(&boot_lock);
  640. return -ETIMEDOUT;
  641. }
  642. }
  643. @@ -380,7 +380,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
  644. * calibrations, then wait for it to finish
  645. */
  646. fail:
  647. - spin_unlock(&boot_lock);
  648. + raw_spin_unlock(&boot_lock);
  649. return pen_release != -1 ? ret : 0;
  650. }
  651. diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c
  652. index 4b653a8cb75c..b03d5a922cb1 100644
  653. --- a/arch/arm/mach-hisi/platmcpm.c
  654. +++ b/arch/arm/mach-hisi/platmcpm.c
  655. @@ -61,7 +61,7 @@
  656. static void __iomem *sysctrl, *fabric;
  657. static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
  658. -static DEFINE_SPINLOCK(boot_lock);
  659. +static DEFINE_RAW_SPINLOCK(boot_lock);
  660. static u32 fabric_phys_addr;
  661. /*
  662. * [0]: bootwrapper physical address
  663. @@ -113,7 +113,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle)
  664. if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
  665. return -EINVAL;
  666. - spin_lock_irq(&boot_lock);
  667. + raw_spin_lock_irq(&boot_lock);
  668. if (hip04_cpu_table[cluster][cpu])
  669. goto out;
  670. @@ -147,7 +147,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle)
  671. out:
  672. hip04_cpu_table[cluster][cpu]++;
  673. - spin_unlock_irq(&boot_lock);
  674. + raw_spin_unlock_irq(&boot_lock);
  675. return 0;
  676. }
  677. @@ -162,11 +162,11 @@ static void hip04_cpu_die(unsigned int l_cpu)
  678. cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
  679. cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
  680. - spin_lock(&boot_lock);
  681. + raw_spin_lock(&boot_lock);
  682. hip04_cpu_table[cluster][cpu]--;
  683. if (hip04_cpu_table[cluster][cpu] == 1) {
  684. /* A power_up request went ahead of us. */
  685. - spin_unlock(&boot_lock);
  686. + raw_spin_unlock(&boot_lock);
  687. return;
  688. } else if (hip04_cpu_table[cluster][cpu] > 1) {
  689. pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
  690. @@ -174,7 +174,7 @@ static void hip04_cpu_die(unsigned int l_cpu)
  691. }
  692. last_man = hip04_cluster_is_down(cluster);
  693. - spin_unlock(&boot_lock);
  694. + raw_spin_unlock(&boot_lock);
  695. if (last_man) {
  696. /* Since it's Cortex A15, disable L2 prefetching. */
  697. asm volatile(
  698. @@ -203,7 +203,7 @@ static int hip04_cpu_kill(unsigned int l_cpu)
  699. cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
  700. count = TIMEOUT_MSEC / POLL_MSEC;
  701. - spin_lock_irq(&boot_lock);
  702. + raw_spin_lock_irq(&boot_lock);
  703. for (tries = 0; tries < count; tries++) {
  704. if (hip04_cpu_table[cluster][cpu])
  705. goto err;
  706. @@ -211,10 +211,10 @@ static int hip04_cpu_kill(unsigned int l_cpu)
  707. data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
  708. if (data & CORE_WFI_STATUS(cpu))
  709. break;
  710. - spin_unlock_irq(&boot_lock);
  711. + raw_spin_unlock_irq(&boot_lock);
  712. /* Wait for clean L2 when the whole cluster is down. */
  713. msleep(POLL_MSEC);
  714. - spin_lock_irq(&boot_lock);
  715. + raw_spin_lock_irq(&boot_lock);
  716. }
  717. if (tries >= count)
  718. goto err;
  719. @@ -231,10 +231,10 @@ static int hip04_cpu_kill(unsigned int l_cpu)
  720. goto err;
  721. if (hip04_cluster_is_down(cluster))
  722. hip04_set_snoop_filter(cluster, 0);
  723. - spin_unlock_irq(&boot_lock);
  724. + raw_spin_unlock_irq(&boot_lock);
  725. return 1;
  726. err:
  727. - spin_unlock_irq(&boot_lock);
  728. + raw_spin_unlock_irq(&boot_lock);
  729. return 0;
  730. }
  731. #endif
  732. diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig
  733. index 8973fae25436..dd905b9602a0 100644
  734. --- a/arch/arm/mach-imx/Kconfig
  735. +++ b/arch/arm/mach-imx/Kconfig
  736. @@ -526,7 +526,7 @@ config SOC_IMX6Q
  737. bool "i.MX6 Quad/DualLite support"
  738. select ARM_ERRATA_764369 if SMP
  739. select HAVE_ARM_SCU if SMP
  740. - select HAVE_ARM_TWD if SMP
  741. + select HAVE_ARM_TWD
  742. select PCI_DOMAINS if PCI
  743. select PINCTRL_IMX6Q
  744. select SOC_IMX6
  745. diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
  746. index c625cc10d9f9..6cbad704cc3d 100644
  747. --- a/arch/arm/mach-omap2/omap-smp.c
  748. +++ b/arch/arm/mach-omap2/omap-smp.c
  749. @@ -43,7 +43,7 @@
  750. /* SCU base address */
  751. static void __iomem *scu_base;
  752. -static DEFINE_SPINLOCK(boot_lock);
  753. +static DEFINE_RAW_SPINLOCK(boot_lock);
  754. void __iomem *omap4_get_scu_base(void)
  755. {
  756. @@ -74,8 +74,8 @@ static void omap4_secondary_init(unsigned int cpu)
  757. /*
  758. * Synchronise with the boot thread.
  759. */
  760. - spin_lock(&boot_lock);
  761. - spin_unlock(&boot_lock);
  762. + raw_spin_lock(&boot_lock);
  763. + raw_spin_unlock(&boot_lock);
  764. }
  765. static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
  766. @@ -89,7 +89,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
  767. * Set synchronisation state between this boot processor
  768. * and the secondary one
  769. */
  770. - spin_lock(&boot_lock);
  771. + raw_spin_lock(&boot_lock);
  772. /*
  773. * Update the AuxCoreBoot0 with boot state for secondary core.
  774. @@ -166,7 +166,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
  775. * Now the secondary core is starting up let it run its
  776. * calibrations, then wait for it to finish
  777. */
  778. - spin_unlock(&boot_lock);
  779. + raw_spin_unlock(&boot_lock);
  780. return 0;
  781. }
  782. diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c
  783. index 0875b99add18..18b6d98d2581 100644
  784. --- a/arch/arm/mach-prima2/platsmp.c
  785. +++ b/arch/arm/mach-prima2/platsmp.c
  786. @@ -22,7 +22,7 @@
  787. static void __iomem *clk_base;
  788. -static DEFINE_SPINLOCK(boot_lock);
  789. +static DEFINE_RAW_SPINLOCK(boot_lock);
  790. static void sirfsoc_secondary_init(unsigned int cpu)
  791. {
  792. @@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsigned int cpu)
  793. /*
  794. * Synchronise with the boot thread.
  795. */
  796. - spin_lock(&boot_lock);
  797. - spin_unlock(&boot_lock);
  798. + raw_spin_lock(&boot_lock);
  799. + raw_spin_unlock(&boot_lock);
  800. }
  801. static const struct of_device_id clk_ids[] = {
  802. @@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle)
  803. /* make sure write buffer is drained */
  804. mb();
  805. - spin_lock(&boot_lock);
  806. + raw_spin_lock(&boot_lock);
  807. /*
  808. * The secondary processor is waiting to be released from
  809. @@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle)
  810. * now the secondary core is starting up let it run its
  811. * calibrations, then wait for it to finish
  812. */
  813. - spin_unlock(&boot_lock);
  814. + raw_spin_unlock(&boot_lock);
  815. return pen_release != -1 ? -ENOSYS : 0;
  816. }
  817. diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c
  818. index 5494c9e0c909..e8ce157d3548 100644
  819. --- a/arch/arm/mach-qcom/platsmp.c
  820. +++ b/arch/arm/mach-qcom/platsmp.c
  821. @@ -46,7 +46,7 @@
  822. extern void secondary_startup_arm(void);
  823. -static DEFINE_SPINLOCK(boot_lock);
  824. +static DEFINE_RAW_SPINLOCK(boot_lock);
  825. #ifdef CONFIG_HOTPLUG_CPU
  826. static void qcom_cpu_die(unsigned int cpu)
  827. @@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned int cpu)
  828. /*
  829. * Synchronise with the boot thread.
  830. */
  831. - spin_lock(&boot_lock);
  832. - spin_unlock(&boot_lock);
  833. + raw_spin_lock(&boot_lock);
  834. + raw_spin_unlock(&boot_lock);
  835. }
  836. static int scss_release_secondary(unsigned int cpu)
  837. @@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int))
  838. * set synchronisation state between this boot processor
  839. * and the secondary one
  840. */
  841. - spin_lock(&boot_lock);
  842. + raw_spin_lock(&boot_lock);
  843. /*
  844. * Send the secondary CPU a soft interrupt, thereby causing
  845. @@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int))
  846. * now the secondary core is starting up let it run its
  847. * calibrations, then wait for it to finish
  848. */
  849. - spin_unlock(&boot_lock);
  850. + raw_spin_unlock(&boot_lock);
  851. return ret;
  852. }
  853. diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c
  854. index 8d1e2d551786..7fa56cc78118 100644
  855. --- a/arch/arm/mach-spear/platsmp.c
  856. +++ b/arch/arm/mach-spear/platsmp.c
  857. @@ -32,7 +32,7 @@ static void write_pen_release(int val)
  858. sync_cache_w(&pen_release);
  859. }
  860. -static DEFINE_SPINLOCK(boot_lock);
  861. +static DEFINE_RAW_SPINLOCK(boot_lock);
  862. static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
  863. @@ -47,8 +47,8 @@ static void spear13xx_secondary_init(unsigned int cpu)
  864. /*
  865. * Synchronise with the boot thread.
  866. */
  867. - spin_lock(&boot_lock);
  868. - spin_unlock(&boot_lock);
  869. + raw_spin_lock(&boot_lock);
  870. + raw_spin_unlock(&boot_lock);
  871. }
  872. static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
  873. @@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
  874. * set synchronisation state between this boot processor
  875. * and the secondary one
  876. */
  877. - spin_lock(&boot_lock);
  878. + raw_spin_lock(&boot_lock);
  879. /*
  880. * The secondary processor is waiting to be released from
  881. @@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
  882. * now the secondary core is starting up let it run its
  883. * calibrations, then wait for it to finish
  884. */
  885. - spin_unlock(&boot_lock);
  886. + raw_spin_unlock(&boot_lock);
  887. return pen_release != -1 ? -ENOSYS : 0;
  888. }
  889. diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c
  890. index ea5a2277ee46..b988e081ac79 100644
  891. --- a/arch/arm/mach-sti/platsmp.c
  892. +++ b/arch/arm/mach-sti/platsmp.c
  893. @@ -35,7 +35,7 @@ static void write_pen_release(int val)
  894. sync_cache_w(&pen_release);
  895. }
  896. -static DEFINE_SPINLOCK(boot_lock);
  897. +static DEFINE_RAW_SPINLOCK(boot_lock);
  898. static void sti_secondary_init(unsigned int cpu)
  899. {
  900. @@ -48,8 +48,8 @@ static void sti_secondary_init(unsigned int cpu)
  901. /*
  902. * Synchronise with the boot thread.
  903. */
  904. - spin_lock(&boot_lock);
  905. - spin_unlock(&boot_lock);
  906. + raw_spin_lock(&boot_lock);
  907. + raw_spin_unlock(&boot_lock);
  908. }
  909. static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
  910. @@ -60,7 +60,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
  911. * set synchronisation state between this boot processor
  912. * and the secondary one
  913. */
  914. - spin_lock(&boot_lock);
  915. + raw_spin_lock(&boot_lock);
  916. /*
  917. * The secondary processor is waiting to be released from
  918. @@ -91,7 +91,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
  919. * now the secondary core is starting up let it run its
  920. * calibrations, then wait for it to finish
  921. */
  922. - spin_unlock(&boot_lock);
  923. + raw_spin_unlock(&boot_lock);
  924. return pen_release != -1 ? -ENOSYS : 0;
  925. }
  926. diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
  927. index ad5841856007..7678f0616a0a 100644
  928. --- a/arch/arm/mm/fault.c
  929. +++ b/arch/arm/mm/fault.c
  930. @@ -430,6 +430,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
  931. if (addr < TASK_SIZE)
  932. return do_page_fault(addr, fsr, regs);
  933. + if (interrupts_enabled(regs))
  934. + local_irq_enable();
  935. +
  936. if (user_mode(regs))
  937. goto bad_area;
  938. @@ -497,6 +500,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
  939. static int
  940. do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
  941. {
  942. + if (interrupts_enabled(regs))
  943. + local_irq_enable();
  944. +
  945. do_bad_area(addr, fsr, regs);
  946. return 0;
  947. }
  948. diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
  949. index d02f8187b1cc..542692dbd40a 100644
  950. --- a/arch/arm/mm/highmem.c
  951. +++ b/arch/arm/mm/highmem.c
  952. @@ -34,6 +34,11 @@ static inline pte_t get_fixmap_pte(unsigned long vaddr)
  953. return *ptep;
  954. }
  955. +static unsigned int fixmap_idx(int type)
  956. +{
  957. + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  958. +}
  959. +
  960. void *kmap(struct page *page)
  961. {
  962. might_sleep();
  963. @@ -54,12 +59,13 @@ EXPORT_SYMBOL(kunmap);
  964. void *kmap_atomic(struct page *page)
  965. {
  966. + pte_t pte = mk_pte(page, kmap_prot);
  967. unsigned int idx;
  968. unsigned long vaddr;
  969. void *kmap;
  970. int type;
  971. - preempt_disable();
  972. + preempt_disable_nort();
  973. pagefault_disable();
  974. if (!PageHighMem(page))
  975. return page_address(page);
  976. @@ -79,7 +85,7 @@ void *kmap_atomic(struct page *page)
  977. type = kmap_atomic_idx_push();
  978. - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  979. + idx = fixmap_idx(type);
  980. vaddr = __fix_to_virt(idx);
  981. #ifdef CONFIG_DEBUG_HIGHMEM
  982. /*
  983. @@ -93,7 +99,10 @@ void *kmap_atomic(struct page *page)
  984. * in place, so the contained TLB flush ensures the TLB is updated
  985. * with the new mapping.
  986. */
  987. - set_fixmap_pte(idx, mk_pte(page, kmap_prot));
  988. +#ifdef CONFIG_PREEMPT_RT_FULL
  989. + current->kmap_pte[type] = pte;
  990. +#endif
  991. + set_fixmap_pte(idx, pte);
  992. return (void *)vaddr;
  993. }
  994. @@ -106,44 +115,75 @@ void __kunmap_atomic(void *kvaddr)
  995. if (kvaddr >= (void *)FIXADDR_START) {
  996. type = kmap_atomic_idx();
  997. - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  998. + idx = fixmap_idx(type);
  999. if (cache_is_vivt())
  1000. __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
  1001. +#ifdef CONFIG_PREEMPT_RT_FULL
  1002. + current->kmap_pte[type] = __pte(0);
  1003. +#endif
  1004. #ifdef CONFIG_DEBUG_HIGHMEM
  1005. BUG_ON(vaddr != __fix_to_virt(idx));
  1006. - set_fixmap_pte(idx, __pte(0));
  1007. #else
  1008. (void) idx; /* to kill a warning */
  1009. #endif
  1010. + set_fixmap_pte(idx, __pte(0));
  1011. kmap_atomic_idx_pop();
  1012. } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
  1013. /* this address was obtained through kmap_high_get() */
  1014. kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
  1015. }
  1016. pagefault_enable();
  1017. - preempt_enable();
  1018. + preempt_enable_nort();
  1019. }
  1020. EXPORT_SYMBOL(__kunmap_atomic);
  1021. void *kmap_atomic_pfn(unsigned long pfn)
  1022. {
  1023. + pte_t pte = pfn_pte(pfn, kmap_prot);
  1024. unsigned long vaddr;
  1025. int idx, type;
  1026. struct page *page = pfn_to_page(pfn);
  1027. - preempt_disable();
  1028. + preempt_disable_nort();
  1029. pagefault_disable();
  1030. if (!PageHighMem(page))
  1031. return page_address(page);
  1032. type = kmap_atomic_idx_push();
  1033. - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  1034. + idx = fixmap_idx(type);
  1035. vaddr = __fix_to_virt(idx);
  1036. #ifdef CONFIG_DEBUG_HIGHMEM
  1037. BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
  1038. #endif
  1039. - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
  1040. +#ifdef CONFIG_PREEMPT_RT_FULL
  1041. + current->kmap_pte[type] = pte;
  1042. +#endif
  1043. + set_fixmap_pte(idx, pte);
  1044. return (void *)vaddr;
  1045. }
  1046. +#if defined CONFIG_PREEMPT_RT_FULL
  1047. +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
  1048. +{
  1049. + int i;
  1050. +
  1051. + /*
  1052. + * Clear @prev's kmap_atomic mappings
  1053. + */
  1054. + for (i = 0; i < prev_p->kmap_idx; i++) {
  1055. + int idx = fixmap_idx(i);
  1056. +
  1057. + set_fixmap_pte(idx, __pte(0));
  1058. + }
  1059. + /*
  1060. + * Restore @next_p's kmap_atomic mappings
  1061. + */
  1062. + for (i = 0; i < next_p->kmap_idx; i++) {
  1063. + int idx = fixmap_idx(i);
  1064. +
  1065. + if (!pte_none(next_p->kmap_pte[i]))
  1066. + set_fixmap_pte(idx, next_p->kmap_pte[i]);
  1067. + }
  1068. +}
  1069. +#endif
  1070. diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c
  1071. index 53feb90c840c..b4a8d54fc3f3 100644
  1072. --- a/arch/arm/plat-versatile/platsmp.c
  1073. +++ b/arch/arm/plat-versatile/platsmp.c
  1074. @@ -30,7 +30,7 @@ static void write_pen_release(int val)
  1075. sync_cache_w(&pen_release);
  1076. }
  1077. -static DEFINE_SPINLOCK(boot_lock);
  1078. +static DEFINE_RAW_SPINLOCK(boot_lock);
  1079. void versatile_secondary_init(unsigned int cpu)
  1080. {
  1081. @@ -43,8 +43,8 @@ void versatile_secondary_init(unsigned int cpu)
  1082. /*
  1083. * Synchronise with the boot thread.
  1084. */
  1085. - spin_lock(&boot_lock);
  1086. - spin_unlock(&boot_lock);
  1087. + raw_spin_lock(&boot_lock);
  1088. + raw_spin_unlock(&boot_lock);
  1089. }
  1090. int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1091. @@ -55,7 +55,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1092. * Set synchronisation state between this boot processor
  1093. * and the secondary one
  1094. */
  1095. - spin_lock(&boot_lock);
  1096. + raw_spin_lock(&boot_lock);
  1097. /*
  1098. * This is really belt and braces; we hold unintended secondary
  1099. @@ -85,7 +85,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1100. * now the secondary core is starting up let it run its
  1101. * calibrations, then wait for it to finish
  1102. */
  1103. - spin_unlock(&boot_lock);
  1104. + raw_spin_unlock(&boot_lock);
  1105. return pen_release != -1 ? -ENOSYS : 0;
  1106. }
  1107. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
  1108. index 4f436220384f..0d7f54cebae3 100644
  1109. --- a/arch/arm64/Kconfig
  1110. +++ b/arch/arm64/Kconfig
  1111. @@ -81,6 +81,7 @@ config ARM64
  1112. select HAVE_PERF_REGS
  1113. select HAVE_PERF_USER_STACK_DUMP
  1114. select HAVE_RCU_TABLE_FREE
  1115. + select HAVE_PREEMPT_LAZY
  1116. select HAVE_SYSCALL_TRACEPOINTS
  1117. select IOMMU_DMA if IOMMU_SUPPORT
  1118. select IRQ_DOMAIN
  1119. @@ -624,7 +625,7 @@ config XEN_DOM0
  1120. config XEN
  1121. bool "Xen guest support on ARM64"
  1122. - depends on ARM64 && OF
  1123. + depends on ARM64 && OF && !PREEMPT_RT_FULL
  1124. select SWIOTLB_XEN
  1125. select PARAVIRT
  1126. help
  1127. diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
  1128. index abd64bd1f6d9..9170788ffa37 100644
  1129. --- a/arch/arm64/include/asm/thread_info.h
  1130. +++ b/arch/arm64/include/asm/thread_info.h
  1131. @@ -49,6 +49,7 @@ struct thread_info {
  1132. mm_segment_t addr_limit; /* address limit */
  1133. struct task_struct *task; /* main task structure */
  1134. int preempt_count; /* 0 => preemptable, <0 => bug */
  1135. + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
  1136. int cpu; /* cpu */
  1137. };
  1138. @@ -109,6 +110,7 @@ static inline struct thread_info *current_thread_info(void)
  1139. #define TIF_NEED_RESCHED 1
  1140. #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
  1141. #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
  1142. +#define TIF_NEED_RESCHED_LAZY 4
  1143. #define TIF_NOHZ 7
  1144. #define TIF_SYSCALL_TRACE 8
  1145. #define TIF_SYSCALL_AUDIT 9
  1146. @@ -124,6 +126,7 @@ static inline struct thread_info *current_thread_info(void)
  1147. #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
  1148. #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
  1149. #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
  1150. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  1151. #define _TIF_NOHZ (1 << TIF_NOHZ)
  1152. #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
  1153. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  1154. @@ -132,7 +135,8 @@ static inline struct thread_info *current_thread_info(void)
  1155. #define _TIF_32BIT (1 << TIF_32BIT)
  1156. #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
  1157. - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
  1158. + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
  1159. + _TIF_NEED_RESCHED_LAZY)
  1160. #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
  1161. _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
  1162. diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
  1163. index 1abcd8829f3b..8290a50d6dd3 100644
  1164. --- a/arch/arm64/kernel/asm-offsets.c
  1165. +++ b/arch/arm64/kernel/asm-offsets.c
  1166. @@ -36,6 +36,7 @@ int main(void)
  1167. BLANK();
  1168. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  1169. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  1170. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  1171. DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
  1172. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  1173. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  1174. diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
  1175. index 6c3b7345a6c4..d4ec679f7e1f 100644
  1176. --- a/arch/arm64/kernel/entry.S
  1177. +++ b/arch/arm64/kernel/entry.S
  1178. @@ -426,11 +426,16 @@ ENDPROC(el1_sync)
  1179. #ifdef CONFIG_PREEMPT
  1180. ldr w24, [tsk, #TI_PREEMPT] // get preempt count
  1181. - cbnz w24, 1f // preempt count != 0
  1182. + cbnz w24, 2f // preempt count != 0
  1183. ldr x0, [tsk, #TI_FLAGS] // get flags
  1184. - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
  1185. - bl el1_preempt
  1186. + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
  1187. +
  1188. + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count
  1189. + cbnz w24, 2f // preempt lazy count != 0
  1190. + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling?
  1191. 1:
  1192. + bl el1_preempt
  1193. +2:
  1194. #endif
  1195. #ifdef CONFIG_TRACE_IRQFLAGS
  1196. bl trace_hardirqs_on
  1197. @@ -444,6 +449,7 @@ ENDPROC(el1_irq)
  1198. 1: bl preempt_schedule_irq // irq en/disable is done inside
  1199. ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
  1200. tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
  1201. + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling?
  1202. ret x24
  1203. #endif
  1204. @@ -690,6 +696,7 @@ ENDPROC(cpu_switch_to)
  1205. */
  1206. work_pending:
  1207. tbnz x1, #TIF_NEED_RESCHED, work_resched
  1208. + tbnz x1, #TIF_NEED_RESCHED_LAZY, work_resched
  1209. /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
  1210. mov x0, sp // 'regs'
  1211. enable_irq // enable interrupts for do_notify_resume()
  1212. diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
  1213. index 2018c2b0e078..3f3820fdb80e 100644
  1214. --- a/arch/mips/Kconfig
  1215. +++ b/arch/mips/Kconfig
  1216. @@ -2416,7 +2416,7 @@ config CPU_R4400_WORKAROUNDS
  1217. #
  1218. config HIGHMEM
  1219. bool "High Memory Support"
  1220. - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA
  1221. + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL
  1222. config CPU_SUPPORTS_HIGHMEM
  1223. bool
  1224. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
  1225. index 7cd32c038286..69c3f4db485b 100644
  1226. --- a/arch/powerpc/Kconfig
  1227. +++ b/arch/powerpc/Kconfig
  1228. @@ -57,10 +57,11 @@ config LOCKDEP_SUPPORT
  1229. config RWSEM_GENERIC_SPINLOCK
  1230. bool
  1231. + default y if PREEMPT_RT_FULL
  1232. config RWSEM_XCHGADD_ALGORITHM
  1233. bool
  1234. - default y
  1235. + default y if !PREEMPT_RT_FULL
  1236. config GENERIC_LOCKBREAK
  1237. bool
  1238. @@ -138,6 +139,7 @@ config PPC
  1239. select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
  1240. select GENERIC_STRNCPY_FROM_USER
  1241. select GENERIC_STRNLEN_USER
  1242. + select HAVE_PREEMPT_LAZY
  1243. select HAVE_MOD_ARCH_SPECIFIC
  1244. select MODULES_USE_ELF_RELA
  1245. select CLONE_BACKWARDS
  1246. @@ -319,7 +321,7 @@ menu "Kernel options"
  1247. config HIGHMEM
  1248. bool "High memory support"
  1249. - depends on PPC32
  1250. + depends on PPC32 && !PREEMPT_RT_FULL
  1251. source kernel/Kconfig.hz
  1252. source kernel/Kconfig.preempt
  1253. diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
  1254. index 7efee4a3240b..40e6fa1b85b2 100644
  1255. --- a/arch/powerpc/include/asm/thread_info.h
  1256. +++ b/arch/powerpc/include/asm/thread_info.h
  1257. @@ -42,6 +42,8 @@ struct thread_info {
  1258. int cpu; /* cpu we're on */
  1259. int preempt_count; /* 0 => preemptable,
  1260. <0 => BUG */
  1261. + int preempt_lazy_count; /* 0 => preemptable,
  1262. + <0 => BUG */
  1263. unsigned long local_flags; /* private flags for thread */
  1264. /* low level flags - has atomic operations done on it */
  1265. @@ -82,8 +84,7 @@ static inline struct thread_info *current_thread_info(void)
  1266. #define TIF_SYSCALL_TRACE 0 /* syscall trace active */
  1267. #define TIF_SIGPENDING 1 /* signal pending */
  1268. #define TIF_NEED_RESCHED 2 /* rescheduling necessary */
  1269. -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
  1270. - TIF_NEED_RESCHED */
  1271. +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */
  1272. #define TIF_32BIT 4 /* 32 bit binary */
  1273. #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
  1274. #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
  1275. @@ -101,6 +102,8 @@ static inline struct thread_info *current_thread_info(void)
  1276. #if defined(CONFIG_PPC64)
  1277. #define TIF_ELF2ABI 18 /* function descriptors must die! */
  1278. #endif
  1279. +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling
  1280. + TIF_NEED_RESCHED */
  1281. /* as above, but as bit values */
  1282. #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
  1283. @@ -119,14 +122,16 @@ static inline struct thread_info *current_thread_info(void)
  1284. #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
  1285. #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
  1286. #define _TIF_NOHZ (1<<TIF_NOHZ)
  1287. +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
  1288. #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
  1289. _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
  1290. _TIF_NOHZ)
  1291. #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
  1292. _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  1293. - _TIF_RESTORE_TM)
  1294. + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY)
  1295. #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
  1296. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  1297. /* Bits in local_flags */
  1298. /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
  1299. diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
  1300. index 0d0183d3180a..476cf176816b 100644
  1301. --- a/arch/powerpc/kernel/asm-offsets.c
  1302. +++ b/arch/powerpc/kernel/asm-offsets.c
  1303. @@ -162,6 +162,7 @@ int main(void)
  1304. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  1305. DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
  1306. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  1307. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  1308. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  1309. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  1310. diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
  1311. index 2405631e91a2..c21b4b42eaa0 100644
  1312. --- a/arch/powerpc/kernel/entry_32.S
  1313. +++ b/arch/powerpc/kernel/entry_32.S
  1314. @@ -818,7 +818,14 @@ user_exc_return: /* r10 contains MSR_KERNEL here */
  1315. cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
  1316. bne restore
  1317. andi. r8,r8,_TIF_NEED_RESCHED
  1318. + bne+ 1f
  1319. + lwz r0,TI_PREEMPT_LAZY(r9)
  1320. + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
  1321. + bne restore
  1322. + lwz r0,TI_FLAGS(r9)
  1323. + andi. r0,r0,_TIF_NEED_RESCHED_LAZY
  1324. beq+ restore
  1325. +1:
  1326. lwz r3,_MSR(r1)
  1327. andi. r0,r3,MSR_EE /* interrupts off? */
  1328. beq restore /* don't schedule if so */
  1329. @@ -829,11 +836,11 @@ user_exc_return: /* r10 contains MSR_KERNEL here */
  1330. */
  1331. bl trace_hardirqs_off
  1332. #endif
  1333. -1: bl preempt_schedule_irq
  1334. +2: bl preempt_schedule_irq
  1335. CURRENT_THREAD_INFO(r9, r1)
  1336. lwz r3,TI_FLAGS(r9)
  1337. - andi. r0,r3,_TIF_NEED_RESCHED
  1338. - bne- 1b
  1339. + andi. r0,r3,_TIF_NEED_RESCHED_MASK
  1340. + bne- 2b
  1341. #ifdef CONFIG_TRACE_IRQFLAGS
  1342. /* And now, to properly rebalance the above, we tell lockdep they
  1343. * are being turned back on, which will happen when we return
  1344. @@ -1154,7 +1161,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
  1345. #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
  1346. do_work: /* r10 contains MSR_KERNEL here */
  1347. - andi. r0,r9,_TIF_NEED_RESCHED
  1348. + andi. r0,r9,_TIF_NEED_RESCHED_MASK
  1349. beq do_user_signal
  1350. do_resched: /* r10 contains MSR_KERNEL here */
  1351. @@ -1175,7 +1182,7 @@ do_resched: /* r10 contains MSR_KERNEL here */
  1352. MTMSRD(r10) /* disable interrupts */
  1353. CURRENT_THREAD_INFO(r9, r1)
  1354. lwz r9,TI_FLAGS(r9)
  1355. - andi. r0,r9,_TIF_NEED_RESCHED
  1356. + andi. r0,r9,_TIF_NEED_RESCHED_MASK
  1357. bne- do_resched
  1358. andi. r0,r9,_TIF_USER_WORK_MASK
  1359. beq restore_user
  1360. diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
  1361. index 9916d150b28c..5dce9354223c 100644
  1362. --- a/arch/powerpc/kernel/entry_64.S
  1363. +++ b/arch/powerpc/kernel/entry_64.S
  1364. @@ -644,7 +644,7 @@ _GLOBAL(ret_from_except_lite)
  1365. bl restore_math
  1366. b restore
  1367. #endif
  1368. -1: andi. r0,r4,_TIF_NEED_RESCHED
  1369. +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK
  1370. beq 2f
  1371. bl restore_interrupts
  1372. SCHEDULE_USER
  1373. @@ -706,10 +706,18 @@ _GLOBAL(ret_from_except_lite)
  1374. #ifdef CONFIG_PREEMPT
  1375. /* Check if we need to preempt */
  1376. - andi. r0,r4,_TIF_NEED_RESCHED
  1377. - beq+ restore
  1378. - /* Check that preempt_count() == 0 and interrupts are enabled */
  1379. lwz r8,TI_PREEMPT(r9)
  1380. + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */
  1381. + bne restore
  1382. + andi. r0,r4,_TIF_NEED_RESCHED
  1383. + bne+ check_count
  1384. +
  1385. + andi. r0,r4,_TIF_NEED_RESCHED_LAZY
  1386. + beq+ restore
  1387. + lwz r8,TI_PREEMPT_LAZY(r9)
  1388. +
  1389. + /* Check that preempt_count() == 0 and interrupts are enabled */
  1390. +check_count:
  1391. cmpwi cr1,r8,0
  1392. ld r0,SOFTE(r1)
  1393. cmpdi r0,0
  1394. @@ -726,7 +734,7 @@ _GLOBAL(ret_from_except_lite)
  1395. /* Re-test flags and eventually loop */
  1396. CURRENT_THREAD_INFO(r9, r1)
  1397. ld r4,TI_FLAGS(r9)
  1398. - andi. r0,r4,_TIF_NEED_RESCHED
  1399. + andi. r0,r4,_TIF_NEED_RESCHED_MASK
  1400. bne 1b
  1401. /*
  1402. diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
  1403. index 290559df1e8b..070afa6da35d 100644
  1404. --- a/arch/powerpc/kernel/irq.c
  1405. +++ b/arch/powerpc/kernel/irq.c
  1406. @@ -614,6 +614,7 @@ void irq_ctx_init(void)
  1407. }
  1408. }
  1409. +#ifndef CONFIG_PREEMPT_RT_FULL
  1410. void do_softirq_own_stack(void)
  1411. {
  1412. struct thread_info *curtp, *irqtp;
  1413. @@ -631,6 +632,7 @@ void do_softirq_own_stack(void)
  1414. if (irqtp->flags)
  1415. set_bits(irqtp->flags, &curtp->flags);
  1416. }
  1417. +#endif
  1418. irq_hw_number_t virq_to_hw(unsigned int virq)
  1419. {
  1420. diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
  1421. index bf5160fbf9d8..e551d78c8038 100644
  1422. --- a/arch/powerpc/kernel/misc_32.S
  1423. +++ b/arch/powerpc/kernel/misc_32.S
  1424. @@ -40,6 +40,7 @@
  1425. * We store the saved ksp_limit in the unused part
  1426. * of the STACK_FRAME_OVERHEAD
  1427. */
  1428. +#ifndef CONFIG_PREEMPT_RT_FULL
  1429. _GLOBAL(call_do_softirq)
  1430. mflr r0
  1431. stw r0,4(r1)
  1432. @@ -56,6 +57,7 @@ _GLOBAL(call_do_softirq)
  1433. stw r10,THREAD+KSP_LIMIT(r2)
  1434. mtlr r0
  1435. blr
  1436. +#endif
  1437. /*
  1438. * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
  1439. diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
  1440. index f28754c497e5..31ae513acac9 100644
  1441. --- a/arch/powerpc/kernel/misc_64.S
  1442. +++ b/arch/powerpc/kernel/misc_64.S
  1443. @@ -30,6 +30,7 @@
  1444. .text
  1445. +#ifndef CONFIG_PREEMPT_RT_FULL
  1446. _GLOBAL(call_do_softirq)
  1447. mflr r0
  1448. std r0,16(r1)
  1449. @@ -40,6 +41,7 @@ _GLOBAL(call_do_softirq)
  1450. ld r0,16(r1)
  1451. mtlr r0
  1452. blr
  1453. +#endif
  1454. _GLOBAL(call_do_irq)
  1455. mflr r0
  1456. diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
  1457. index c2024ac9d4e8..2303788da7e1 100644
  1458. --- a/arch/powerpc/kvm/Kconfig
  1459. +++ b/arch/powerpc/kvm/Kconfig
  1460. @@ -172,6 +172,7 @@ config KVM_E500MC
  1461. config KVM_MPIC
  1462. bool "KVM in-kernel MPIC emulation"
  1463. depends on KVM && E500
  1464. + depends on !PREEMPT_RT_FULL
  1465. select HAVE_KVM_IRQCHIP
  1466. select HAVE_KVM_IRQFD
  1467. select HAVE_KVM_IRQ_ROUTING
  1468. diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
  1469. index 3f175e8aedb4..c4c02f91904c 100644
  1470. --- a/arch/powerpc/platforms/ps3/device-init.c
  1471. +++ b/arch/powerpc/platforms/ps3/device-init.c
  1472. @@ -752,7 +752,7 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev,
  1473. }
  1474. pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op);
  1475. - res = wait_event_interruptible(dev->done.wait,
  1476. + res = swait_event_interruptible(dev->done.wait,
  1477. dev->done.done || kthread_should_stop());
  1478. if (kthread_should_stop())
  1479. res = -EINTR;
  1480. diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
  1481. index 6c0378c0b8b5..abd58b4dff97 100644
  1482. --- a/arch/sh/kernel/irq.c
  1483. +++ b/arch/sh/kernel/irq.c
  1484. @@ -147,6 +147,7 @@ void irq_ctx_exit(int cpu)
  1485. hardirq_ctx[cpu] = NULL;
  1486. }
  1487. +#ifndef CONFIG_PREEMPT_RT_FULL
  1488. void do_softirq_own_stack(void)
  1489. {
  1490. struct thread_info *curctx;
  1491. @@ -174,6 +175,7 @@ void do_softirq_own_stack(void)
  1492. "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
  1493. );
  1494. }
  1495. +#endif
  1496. #else
  1497. static inline void handle_one_irq(unsigned int irq)
  1498. {
  1499. diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
  1500. index 57ffaf285c2f..733c72826f28 100644
  1501. --- a/arch/sparc/Kconfig
  1502. +++ b/arch/sparc/Kconfig
  1503. @@ -184,12 +184,10 @@ config NR_CPUS
  1504. source kernel/Kconfig.hz
  1505. config RWSEM_GENERIC_SPINLOCK
  1506. - bool
  1507. - default y if SPARC32
  1508. + def_bool PREEMPT_RT_FULL
  1509. config RWSEM_XCHGADD_ALGORITHM
  1510. - bool
  1511. - default y if SPARC64
  1512. + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
  1513. config GENERIC_HWEIGHT
  1514. bool
  1515. diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
  1516. index e22416ce56ea..d359de71153a 100644
  1517. --- a/arch/sparc/kernel/irq_64.c
  1518. +++ b/arch/sparc/kernel/irq_64.c
  1519. @@ -854,6 +854,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs)
  1520. set_irq_regs(old_regs);
  1521. }
  1522. +#ifndef CONFIG_PREEMPT_RT_FULL
  1523. void do_softirq_own_stack(void)
  1524. {
  1525. void *orig_sp, *sp = softirq_stack[smp_processor_id()];
  1526. @@ -868,6 +869,7 @@ void do_softirq_own_stack(void)
  1527. __asm__ __volatile__("mov %0, %%sp"
  1528. : : "r" (orig_sp));
  1529. }
  1530. +#endif
  1531. #ifdef CONFIG_HOTPLUG_CPU
  1532. void fixup_irqs(void)
  1533. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
  1534. index 2dc18605831f..ea65dc777a77 100644
  1535. --- a/arch/x86/Kconfig
  1536. +++ b/arch/x86/Kconfig
  1537. @@ -17,6 +17,7 @@ config X86_64
  1538. ### Arch settings
  1539. config X86
  1540. def_bool y
  1541. + select HAVE_PREEMPT_LAZY
  1542. select ACPI_LEGACY_TABLES_LOOKUP if ACPI
  1543. select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
  1544. select ANON_INODES
  1545. @@ -230,8 +231,11 @@ config ARCH_MAY_HAVE_PC_FDC
  1546. def_bool y
  1547. depends on ISA_DMA_API
  1548. +config RWSEM_GENERIC_SPINLOCK
  1549. + def_bool PREEMPT_RT_FULL
  1550. +
  1551. config RWSEM_XCHGADD_ALGORITHM
  1552. - def_bool y
  1553. + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
  1554. config GENERIC_CALIBRATE_DELAY
  1555. def_bool y
  1556. @@ -889,7 +893,7 @@ config IOMMU_HELPER
  1557. config MAXSMP
  1558. bool "Enable Maximum number of SMP Processors and NUMA Nodes"
  1559. depends on X86_64 && SMP && DEBUG_KERNEL
  1560. - select CPUMASK_OFFSTACK
  1561. + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
  1562. ---help---
  1563. Enable maximum number of CPUS and NUMA Nodes for this architecture.
  1564. If unsure, say N.
  1565. diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
  1566. index 064c7e2bd7c8..e400dd3947db 100644
  1567. --- a/arch/x86/crypto/aesni-intel_glue.c
  1568. +++ b/arch/x86/crypto/aesni-intel_glue.c
  1569. @@ -383,14 +383,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc,
  1570. err = blkcipher_walk_virt(desc, &walk);
  1571. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1572. - kernel_fpu_begin();
  1573. while ((nbytes = walk.nbytes)) {
  1574. + kernel_fpu_begin();
  1575. aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1576. - nbytes & AES_BLOCK_MASK);
  1577. + nbytes & AES_BLOCK_MASK);
  1578. + kernel_fpu_end();
  1579. nbytes &= AES_BLOCK_SIZE - 1;
  1580. err = blkcipher_walk_done(desc, &walk, nbytes);
  1581. }
  1582. - kernel_fpu_end();
  1583. return err;
  1584. }
  1585. @@ -407,14 +407,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc,
  1586. err = blkcipher_walk_virt(desc, &walk);
  1587. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1588. - kernel_fpu_begin();
  1589. while ((nbytes = walk.nbytes)) {
  1590. + kernel_fpu_begin();
  1591. aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1592. nbytes & AES_BLOCK_MASK);
  1593. + kernel_fpu_end();
  1594. nbytes &= AES_BLOCK_SIZE - 1;
  1595. err = blkcipher_walk_done(desc, &walk, nbytes);
  1596. }
  1597. - kernel_fpu_end();
  1598. return err;
  1599. }
  1600. @@ -431,14 +431,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc,
  1601. err = blkcipher_walk_virt(desc, &walk);
  1602. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1603. - kernel_fpu_begin();
  1604. while ((nbytes = walk.nbytes)) {
  1605. + kernel_fpu_begin();
  1606. aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1607. nbytes & AES_BLOCK_MASK, walk.iv);
  1608. + kernel_fpu_end();
  1609. nbytes &= AES_BLOCK_SIZE - 1;
  1610. err = blkcipher_walk_done(desc, &walk, nbytes);
  1611. }
  1612. - kernel_fpu_end();
  1613. return err;
  1614. }
  1615. @@ -455,14 +455,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc,
  1616. err = blkcipher_walk_virt(desc, &walk);
  1617. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1618. - kernel_fpu_begin();
  1619. while ((nbytes = walk.nbytes)) {
  1620. + kernel_fpu_begin();
  1621. aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1622. nbytes & AES_BLOCK_MASK, walk.iv);
  1623. + kernel_fpu_end();
  1624. nbytes &= AES_BLOCK_SIZE - 1;
  1625. err = blkcipher_walk_done(desc, &walk, nbytes);
  1626. }
  1627. - kernel_fpu_end();
  1628. return err;
  1629. }
  1630. @@ -514,18 +514,20 @@ static int ctr_crypt(struct blkcipher_desc *desc,
  1631. err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
  1632. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1633. - kernel_fpu_begin();
  1634. while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
  1635. + kernel_fpu_begin();
  1636. aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1637. nbytes & AES_BLOCK_MASK, walk.iv);
  1638. + kernel_fpu_end();
  1639. nbytes &= AES_BLOCK_SIZE - 1;
  1640. err = blkcipher_walk_done(desc, &walk, nbytes);
  1641. }
  1642. if (walk.nbytes) {
  1643. + kernel_fpu_begin();
  1644. ctr_crypt_final(ctx, &walk);
  1645. + kernel_fpu_end();
  1646. err = blkcipher_walk_done(desc, &walk, 0);
  1647. }
  1648. - kernel_fpu_end();
  1649. return err;
  1650. }
  1651. diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
  1652. index 8648158f3916..d7699130ee36 100644
  1653. --- a/arch/x86/crypto/cast5_avx_glue.c
  1654. +++ b/arch/x86/crypto/cast5_avx_glue.c
  1655. @@ -59,7 +59,7 @@ static inline void cast5_fpu_end(bool fpu_enabled)
  1656. static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
  1657. bool enc)
  1658. {
  1659. - bool fpu_enabled = false;
  1660. + bool fpu_enabled;
  1661. struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  1662. const unsigned int bsize = CAST5_BLOCK_SIZE;
  1663. unsigned int nbytes;
  1664. @@ -75,7 +75,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
  1665. u8 *wsrc = walk->src.virt.addr;
  1666. u8 *wdst = walk->dst.virt.addr;
  1667. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1668. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1669. /* Process multi-block batch */
  1670. if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
  1671. @@ -103,10 +103,9 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
  1672. } while (nbytes >= bsize);
  1673. done:
  1674. + cast5_fpu_end(fpu_enabled);
  1675. err = blkcipher_walk_done(desc, walk, nbytes);
  1676. }
  1677. -
  1678. - cast5_fpu_end(fpu_enabled);
  1679. return err;
  1680. }
  1681. @@ -227,7 +226,7 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
  1682. static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  1683. struct scatterlist *src, unsigned int nbytes)
  1684. {
  1685. - bool fpu_enabled = false;
  1686. + bool fpu_enabled;
  1687. struct blkcipher_walk walk;
  1688. int err;
  1689. @@ -236,12 +235,11 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  1690. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1691. while ((nbytes = walk.nbytes)) {
  1692. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1693. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1694. nbytes = __cbc_decrypt(desc, &walk);
  1695. + cast5_fpu_end(fpu_enabled);
  1696. err = blkcipher_walk_done(desc, &walk, nbytes);
  1697. }
  1698. -
  1699. - cast5_fpu_end(fpu_enabled);
  1700. return err;
  1701. }
  1702. @@ -311,7 +309,7 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
  1703. static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  1704. struct scatterlist *src, unsigned int nbytes)
  1705. {
  1706. - bool fpu_enabled = false;
  1707. + bool fpu_enabled;
  1708. struct blkcipher_walk walk;
  1709. int err;
  1710. @@ -320,13 +318,12 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  1711. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1712. while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
  1713. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1714. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1715. nbytes = __ctr_crypt(desc, &walk);
  1716. + cast5_fpu_end(fpu_enabled);
  1717. err = blkcipher_walk_done(desc, &walk, nbytes);
  1718. }
  1719. - cast5_fpu_end(fpu_enabled);
  1720. -
  1721. if (walk.nbytes) {
  1722. ctr_crypt_final(desc, &walk);
  1723. err = blkcipher_walk_done(desc, &walk, 0);
  1724. diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
  1725. index 6a85598931b5..3a506ce7ed93 100644
  1726. --- a/arch/x86/crypto/glue_helper.c
  1727. +++ b/arch/x86/crypto/glue_helper.c
  1728. @@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
  1729. void *ctx = crypto_blkcipher_ctx(desc->tfm);
  1730. const unsigned int bsize = 128 / 8;
  1731. unsigned int nbytes, i, func_bytes;
  1732. - bool fpu_enabled = false;
  1733. + bool fpu_enabled;
  1734. int err;
  1735. err = blkcipher_walk_virt(desc, walk);
  1736. @@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
  1737. u8 *wdst = walk->dst.virt.addr;
  1738. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1739. - desc, fpu_enabled, nbytes);
  1740. + desc, false, nbytes);
  1741. for (i = 0; i < gctx->num_funcs; i++) {
  1742. func_bytes = bsize * gctx->funcs[i].num_blocks;
  1743. @@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
  1744. }
  1745. done:
  1746. + glue_fpu_end(fpu_enabled);
  1747. err = blkcipher_walk_done(desc, walk, nbytes);
  1748. }
  1749. - glue_fpu_end(fpu_enabled);
  1750. return err;
  1751. }
  1752. @@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
  1753. struct scatterlist *src, unsigned int nbytes)
  1754. {
  1755. const unsigned int bsize = 128 / 8;
  1756. - bool fpu_enabled = false;
  1757. + bool fpu_enabled;
  1758. struct blkcipher_walk walk;
  1759. int err;
  1760. @@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
  1761. while ((nbytes = walk.nbytes)) {
  1762. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1763. - desc, fpu_enabled, nbytes);
  1764. + desc, false, nbytes);
  1765. nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
  1766. + glue_fpu_end(fpu_enabled);
  1767. err = blkcipher_walk_done(desc, &walk, nbytes);
  1768. }
  1769. - glue_fpu_end(fpu_enabled);
  1770. return err;
  1771. }
  1772. EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
  1773. @@ -277,7 +277,7 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
  1774. struct scatterlist *src, unsigned int nbytes)
  1775. {
  1776. const unsigned int bsize = 128 / 8;
  1777. - bool fpu_enabled = false;
  1778. + bool fpu_enabled;
  1779. struct blkcipher_walk walk;
  1780. int err;
  1781. @@ -286,13 +286,12 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
  1782. while ((nbytes = walk.nbytes) >= bsize) {
  1783. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1784. - desc, fpu_enabled, nbytes);
  1785. + desc, false, nbytes);
  1786. nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
  1787. + glue_fpu_end(fpu_enabled);
  1788. err = blkcipher_walk_done(desc, &walk, nbytes);
  1789. }
  1790. - glue_fpu_end(fpu_enabled);
  1791. -
  1792. if (walk.nbytes) {
  1793. glue_ctr_crypt_final_128bit(
  1794. gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
  1795. @@ -347,7 +346,7 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
  1796. void *tweak_ctx, void *crypt_ctx)
  1797. {
  1798. const unsigned int bsize = 128 / 8;
  1799. - bool fpu_enabled = false;
  1800. + bool fpu_enabled;
  1801. struct blkcipher_walk walk;
  1802. int err;
  1803. @@ -360,21 +359,21 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
  1804. /* set minimum length to bsize, for tweak_fn */
  1805. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1806. - desc, fpu_enabled,
  1807. + desc, false,
  1808. nbytes < bsize ? bsize : nbytes);
  1809. -
  1810. /* calculate first value of T */
  1811. tweak_fn(tweak_ctx, walk.iv, walk.iv);
  1812. + glue_fpu_end(fpu_enabled);
  1813. while (nbytes) {
  1814. + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1815. + desc, false, nbytes);
  1816. nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
  1817. + glue_fpu_end(fpu_enabled);
  1818. err = blkcipher_walk_done(desc, &walk, nbytes);
  1819. nbytes = walk.nbytes;
  1820. }
  1821. -
  1822. - glue_fpu_end(fpu_enabled);
  1823. -
  1824. return err;
  1825. }
  1826. EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
  1827. diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
  1828. index e79d93d44ecd..ef2564dc2656 100644
  1829. --- a/arch/x86/entry/common.c
  1830. +++ b/arch/x86/entry/common.c
  1831. @@ -202,7 +202,7 @@ long syscall_trace_enter(struct pt_regs *regs)
  1832. #define EXIT_TO_USERMODE_LOOP_FLAGS \
  1833. (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  1834. - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY)
  1835. + _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY)
  1836. static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
  1837. {
  1838. @@ -218,9 +218,16 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
  1839. /* We have work to do. */
  1840. local_irq_enable();
  1841. - if (cached_flags & _TIF_NEED_RESCHED)
  1842. + if (cached_flags & _TIF_NEED_RESCHED_MASK)
  1843. schedule();
  1844. +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
  1845. + if (unlikely(current->forced_info.si_signo)) {
  1846. + struct task_struct *t = current;
  1847. + force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
  1848. + t->forced_info.si_signo = 0;
  1849. + }
  1850. +#endif
  1851. if (cached_flags & _TIF_UPROBE)
  1852. uprobe_notify_resume(regs);
  1853. diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
  1854. index 10868aa734dc..7c7d3085f046 100644
  1855. --- a/arch/x86/entry/entry_32.S
  1856. +++ b/arch/x86/entry/entry_32.S
  1857. @@ -278,8 +278,24 @@ END(ret_from_exception)
  1858. ENTRY(resume_kernel)
  1859. DISABLE_INTERRUPTS(CLBR_ANY)
  1860. need_resched:
  1861. + # preempt count == 0 + NEED_RS set?
  1862. cmpl $0, PER_CPU_VAR(__preempt_count)
  1863. +#ifndef CONFIG_PREEMPT_LAZY
  1864. jnz restore_all
  1865. +#else
  1866. + jz test_int_off
  1867. +
  1868. + # atleast preempt count == 0 ?
  1869. + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
  1870. + jne restore_all
  1871. +
  1872. + cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ?
  1873. + jnz restore_all
  1874. +
  1875. + testl $_TIF_NEED_RESCHED_LAZY, TI_flags(%ebp)
  1876. + jz restore_all
  1877. +test_int_off:
  1878. +#endif
  1879. testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
  1880. jz restore_all
  1881. call preempt_schedule_irq
  1882. diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
  1883. index 858b555e274b..a93639ddfbf6 100644
  1884. --- a/arch/x86/entry/entry_64.S
  1885. +++ b/arch/x86/entry/entry_64.S
  1886. @@ -511,7 +511,23 @@ GLOBAL(retint_user)
  1887. bt $9, EFLAGS(%rsp) /* were interrupts off? */
  1888. jnc 1f
  1889. 0: cmpl $0, PER_CPU_VAR(__preempt_count)
  1890. +#ifndef CONFIG_PREEMPT_LAZY
  1891. jnz 1f
  1892. +#else
  1893. + jz do_preempt_schedule_irq
  1894. +
  1895. + # atleast preempt count == 0 ?
  1896. + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
  1897. + jnz 1f
  1898. +
  1899. + GET_THREAD_INFO(%rcx)
  1900. + cmpl $0, TI_preempt_lazy_count(%rcx)
  1901. + jnz 1f
  1902. +
  1903. + bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx)
  1904. + jnc 1f
  1905. +do_preempt_schedule_irq:
  1906. +#endif
  1907. call preempt_schedule_irq
  1908. jmp 0b
  1909. 1:
  1910. @@ -799,6 +815,7 @@ END(native_load_gs_index)
  1911. jmp 2b
  1912. .previous
  1913. +#ifndef CONFIG_PREEMPT_RT_FULL
  1914. /* Call softirq on interrupt stack. Interrupts are off. */
  1915. ENTRY(do_softirq_own_stack)
  1916. pushq %rbp
  1917. @@ -811,6 +828,7 @@ ENTRY(do_softirq_own_stack)
  1918. decl PER_CPU_VAR(irq_count)
  1919. ret
  1920. END(do_softirq_own_stack)
  1921. +#endif
  1922. #ifdef CONFIG_XEN
  1923. idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
  1924. diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
  1925. index d397deb58146..190af4271b5c 100644
  1926. --- a/arch/x86/include/asm/preempt.h
  1927. +++ b/arch/x86/include/asm/preempt.h
  1928. @@ -79,17 +79,33 @@ static __always_inline void __preempt_count_sub(int val)
  1929. * a decrement which hits zero means we have no preempt_count and should
  1930. * reschedule.
  1931. */
  1932. -static __always_inline bool __preempt_count_dec_and_test(void)
  1933. +static __always_inline bool ____preempt_count_dec_and_test(void)
  1934. {
  1935. GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
  1936. }
  1937. +static __always_inline bool __preempt_count_dec_and_test(void)
  1938. +{
  1939. + if (____preempt_count_dec_and_test())
  1940. + return true;
  1941. +#ifdef CONFIG_PREEMPT_LAZY
  1942. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  1943. +#else
  1944. + return false;
  1945. +#endif
  1946. +}
  1947. +
  1948. /*
  1949. * Returns true when we need to resched and can (barring IRQ state).
  1950. */
  1951. static __always_inline bool should_resched(int preempt_offset)
  1952. {
  1953. +#ifdef CONFIG_PREEMPT_LAZY
  1954. + return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset ||
  1955. + test_thread_flag(TIF_NEED_RESCHED_LAZY));
  1956. +#else
  1957. return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
  1958. +#endif
  1959. }
  1960. #ifdef CONFIG_PREEMPT
  1961. diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
  1962. index 2138c9ae19ee..3f5b4ee2e2c1 100644
  1963. --- a/arch/x86/include/asm/signal.h
  1964. +++ b/arch/x86/include/asm/signal.h
  1965. @@ -23,6 +23,19 @@ typedef struct {
  1966. unsigned long sig[_NSIG_WORDS];
  1967. } sigset_t;
  1968. +/*
  1969. + * Because some traps use the IST stack, we must keep preemption
  1970. + * disabled while calling do_trap(), but do_trap() may call
  1971. + * force_sig_info() which will grab the signal spin_locks for the
  1972. + * task, which in PREEMPT_RT_FULL are mutexes. By defining
  1973. + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
  1974. + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
  1975. + * trap.
  1976. + */
  1977. +#if defined(CONFIG_PREEMPT_RT_FULL)
  1978. +#define ARCH_RT_DELAYS_SIGNAL_SEND
  1979. +#endif
  1980. +
  1981. #ifndef CONFIG_COMPAT
  1982. typedef sigset_t compat_sigset_t;
  1983. #endif
  1984. diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
  1985. index 58505f01962f..02fa39652cd6 100644
  1986. --- a/arch/x86/include/asm/stackprotector.h
  1987. +++ b/arch/x86/include/asm/stackprotector.h
  1988. @@ -59,7 +59,7 @@
  1989. */
  1990. static __always_inline void boot_init_stack_canary(void)
  1991. {
  1992. - u64 canary;
  1993. + u64 uninitialized_var(canary);
  1994. u64 tsc;
  1995. #ifdef CONFIG_X86_64
  1996. @@ -70,8 +70,15 @@ static __always_inline void boot_init_stack_canary(void)
  1997. * of randomness. The TSC only matters for very early init,
  1998. * there it already has some randomness on most systems. Later
  1999. * on during the bootup the random pool has true entropy too.
  2000. + *
  2001. + * For preempt-rt we need to weaken the randomness a bit, as
  2002. + * we can't call into the random generator from atomic context
  2003. + * due to locking constraints. We just leave canary
  2004. + * uninitialized and use the TSC based randomness on top of it.
  2005. */
  2006. +#ifndef CONFIG_PREEMPT_RT_FULL
  2007. get_random_bytes(&canary, sizeof(canary));
  2008. +#endif
  2009. tsc = rdtsc();
  2010. canary += tsc + (tsc << 32UL);
  2011. diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
  2012. index ffae84df8a93..b4575ba19a59 100644
  2013. --- a/arch/x86/include/asm/thread_info.h
  2014. +++ b/arch/x86/include/asm/thread_info.h
  2015. @@ -58,6 +58,8 @@ struct thread_info {
  2016. __u32 status; /* thread synchronous flags */
  2017. __u32 cpu; /* current CPU */
  2018. mm_segment_t addr_limit;
  2019. + int preempt_lazy_count; /* 0 => lazy preemptable
  2020. + <0 => BUG */
  2021. unsigned int sig_on_uaccess_error:1;
  2022. unsigned int uaccess_err:1; /* uaccess failed */
  2023. };
  2024. @@ -95,6 +97,7 @@ struct thread_info {
  2025. #define TIF_SYSCALL_EMU 6 /* syscall emulation active */
  2026. #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
  2027. #define TIF_SECCOMP 8 /* secure computing */
  2028. +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
  2029. #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
  2030. #define TIF_UPROBE 12 /* breakpointed or singlestepping */
  2031. #define TIF_NOTSC 16 /* TSC is not accessible in userland */
  2032. @@ -119,6 +122,7 @@ struct thread_info {
  2033. #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
  2034. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  2035. #define _TIF_SECCOMP (1 << TIF_SECCOMP)
  2036. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  2037. #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
  2038. #define _TIF_UPROBE (1 << TIF_UPROBE)
  2039. #define _TIF_NOTSC (1 << TIF_NOTSC)
  2040. @@ -155,6 +159,8 @@ struct thread_info {
  2041. #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
  2042. #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
  2043. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  2044. +
  2045. #define STACK_WARN (THREAD_SIZE/8)
  2046. /*
  2047. diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
  2048. index fc808b83fccb..ebb40118abf5 100644
  2049. --- a/arch/x86/include/asm/uv/uv_bau.h
  2050. +++ b/arch/x86/include/asm/uv/uv_bau.h
  2051. @@ -615,9 +615,9 @@ struct bau_control {
  2052. cycles_t send_message;
  2053. cycles_t period_end;
  2054. cycles_t period_time;
  2055. - spinlock_t uvhub_lock;
  2056. - spinlock_t queue_lock;
  2057. - spinlock_t disable_lock;
  2058. + raw_spinlock_t uvhub_lock;
  2059. + raw_spinlock_t queue_lock;
  2060. + raw_spinlock_t disable_lock;
  2061. /* tunables */
  2062. int max_concurr;
  2063. int max_concurr_const;
  2064. @@ -776,15 +776,15 @@ static inline int atom_asr(short i, struct atomic_short *v)
  2065. * to be lowered below the current 'v'. atomic_add_unless can only stop
  2066. * on equal.
  2067. */
  2068. -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
  2069. +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u)
  2070. {
  2071. - spin_lock(lock);
  2072. + raw_spin_lock(lock);
  2073. if (atomic_read(v) >= u) {
  2074. - spin_unlock(lock);
  2075. + raw_spin_unlock(lock);
  2076. return 0;
  2077. }
  2078. atomic_inc(v);
  2079. - spin_unlock(lock);
  2080. + raw_spin_unlock(lock);
  2081. return 1;
  2082. }
  2083. diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
  2084. index ea7074784cc4..01ec643ce66e 100644
  2085. --- a/arch/x86/include/asm/uv/uv_hub.h
  2086. +++ b/arch/x86/include/asm/uv/uv_hub.h
  2087. @@ -492,7 +492,7 @@ struct uv_blade_info {
  2088. unsigned short nr_online_cpus;
  2089. unsigned short pnode;
  2090. short memory_nid;
  2091. - spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
  2092. + raw_spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
  2093. unsigned long nmi_count; /* obsolete, see uv_hub_nmi */
  2094. };
  2095. extern struct uv_blade_info *uv_blade_info;
  2096. diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
  2097. index fdb0fbfb1197..678c711e2a16 100644
  2098. --- a/arch/x86/kernel/apic/io_apic.c
  2099. +++ b/arch/x86/kernel/apic/io_apic.c
  2100. @@ -1711,7 +1711,8 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data)
  2101. static inline bool ioapic_irqd_mask(struct irq_data *data)
  2102. {
  2103. /* If we are moving the irq we need to mask it */
  2104. - if (unlikely(irqd_is_setaffinity_pending(data))) {
  2105. + if (unlikely(irqd_is_setaffinity_pending(data) &&
  2106. + !irqd_irq_inprogress(data))) {
  2107. mask_ioapic_irq(data);
  2108. return true;
  2109. }
  2110. diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
  2111. index d7ce96a7daca..cd97771f61d6 100644
  2112. --- a/arch/x86/kernel/apic/x2apic_uv_x.c
  2113. +++ b/arch/x86/kernel/apic/x2apic_uv_x.c
  2114. @@ -755,7 +755,7 @@ static void uv_heartbeat(unsigned long ignored)
  2115. uv_set_scir_bits(bits);
  2116. /* enable next timer period */
  2117. - mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL);
  2118. + mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
  2119. }
  2120. static void uv_heartbeat_enable(int cpu)
  2121. @@ -764,7 +764,7 @@ static void uv_heartbeat_enable(int cpu)
  2122. struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
  2123. uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
  2124. - setup_timer(timer, uv_heartbeat, cpu);
  2125. + setup_pinned_timer(timer, uv_heartbeat, cpu);
  2126. timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
  2127. add_timer_on(timer, cpu);
  2128. uv_cpu_hub_info(cpu)->scir.enabled = 1;
  2129. @@ -950,7 +950,7 @@ void __init uv_system_init(void)
  2130. uv_blade_info[blade].pnode = pnode;
  2131. uv_blade_info[blade].nr_possible_cpus = 0;
  2132. uv_blade_info[blade].nr_online_cpus = 0;
  2133. - spin_lock_init(&uv_blade_info[blade].nmi_lock);
  2134. + raw_spin_lock_init(&uv_blade_info[blade].nmi_lock);
  2135. min_pnode = min(pnode, min_pnode);
  2136. max_pnode = max(pnode, max_pnode);
  2137. blade++;
  2138. diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
  2139. index 5c042466f274..ea558b0576d0 100644
  2140. --- a/arch/x86/kernel/asm-offsets.c
  2141. +++ b/arch/x86/kernel/asm-offsets.c
  2142. @@ -32,6 +32,7 @@ void common(void) {
  2143. OFFSET(TI_flags, thread_info, flags);
  2144. OFFSET(TI_status, thread_info, status);
  2145. OFFSET(TI_addr_limit, thread_info, addr_limit);
  2146. + OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count);
  2147. BLANK();
  2148. OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
  2149. @@ -85,4 +86,5 @@ void common(void) {
  2150. BLANK();
  2151. DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
  2152. + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED);
  2153. }
  2154. diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
  2155. index f0c921b03e42..f3bd7b8302ef 100644
  2156. --- a/arch/x86/kernel/cpu/mcheck/mce.c
  2157. +++ b/arch/x86/kernel/cpu/mcheck/mce.c
  2158. @@ -41,6 +41,8 @@
  2159. #include <linux/debugfs.h>
  2160. #include <linux/irq_work.h>
  2161. #include <linux/export.h>
  2162. +#include <linux/jiffies.h>
  2163. +#include <linux/swork.h>
  2164. #include <asm/processor.h>
  2165. #include <asm/traps.h>
  2166. @@ -1240,7 +1242,7 @@ void mce_log_therm_throt_event(__u64 status)
  2167. static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
  2168. static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
  2169. -static DEFINE_PER_CPU(struct timer_list, mce_timer);
  2170. +static DEFINE_PER_CPU(struct hrtimer, mce_timer);
  2171. static unsigned long mce_adjust_timer_default(unsigned long interval)
  2172. {
  2173. @@ -1249,32 +1251,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
  2174. static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
  2175. -static void __restart_timer(struct timer_list *t, unsigned long interval)
  2176. +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval)
  2177. {
  2178. - unsigned long when = jiffies + interval;
  2179. - unsigned long flags;
  2180. -
  2181. - local_irq_save(flags);
  2182. -
  2183. - if (timer_pending(t)) {
  2184. - if (time_before(when, t->expires))
  2185. - mod_timer_pinned(t, when);
  2186. - } else {
  2187. - t->expires = round_jiffies(when);
  2188. - add_timer_on(t, smp_processor_id());
  2189. - }
  2190. -
  2191. - local_irq_restore(flags);
  2192. + if (!interval)
  2193. + return HRTIMER_NORESTART;
  2194. + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval)));
  2195. + return HRTIMER_RESTART;
  2196. }
  2197. -static void mce_timer_fn(unsigned long data)
  2198. +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
  2199. {
  2200. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  2201. - int cpu = smp_processor_id();
  2202. unsigned long iv;
  2203. - WARN_ON(cpu != data);
  2204. -
  2205. iv = __this_cpu_read(mce_next_interval);
  2206. if (mce_available(this_cpu_ptr(&cpu_info))) {
  2207. @@ -1297,7 +1285,7 @@ static void mce_timer_fn(unsigned long data)
  2208. done:
  2209. __this_cpu_write(mce_next_interval, iv);
  2210. - __restart_timer(t, iv);
  2211. + return __restart_timer(timer, iv);
  2212. }
  2213. /*
  2214. @@ -1305,7 +1293,7 @@ static void mce_timer_fn(unsigned long data)
  2215. */
  2216. void mce_timer_kick(unsigned long interval)
  2217. {
  2218. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  2219. + struct hrtimer *t = this_cpu_ptr(&mce_timer);
  2220. unsigned long iv = __this_cpu_read(mce_next_interval);
  2221. __restart_timer(t, interval);
  2222. @@ -1320,7 +1308,7 @@ static void mce_timer_delete_all(void)
  2223. int cpu;
  2224. for_each_online_cpu(cpu)
  2225. - del_timer_sync(&per_cpu(mce_timer, cpu));
  2226. + hrtimer_cancel(&per_cpu(mce_timer, cpu));
  2227. }
  2228. static void mce_do_trigger(struct work_struct *work)
  2229. @@ -1330,6 +1318,56 @@ static void mce_do_trigger(struct work_struct *work)
  2230. static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
  2231. +static void __mce_notify_work(struct swork_event *event)
  2232. +{
  2233. + /* Not more than two messages every minute */
  2234. + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
  2235. +
  2236. + /* wake processes polling /dev/mcelog */
  2237. + wake_up_interruptible(&mce_chrdev_wait);
  2238. +
  2239. + /*
  2240. + * There is no risk of missing notifications because
  2241. + * work_pending is always cleared before the function is
  2242. + * executed.
  2243. + */
  2244. + if (mce_helper[0] && !work_pending(&mce_trigger_work))
  2245. + schedule_work(&mce_trigger_work);
  2246. +
  2247. + if (__ratelimit(&ratelimit))
  2248. + pr_info(HW_ERR "Machine check events logged\n");
  2249. +}
  2250. +
  2251. +#ifdef CONFIG_PREEMPT_RT_FULL
  2252. +static bool notify_work_ready __read_mostly;
  2253. +static struct swork_event notify_work;
  2254. +
  2255. +static int mce_notify_work_init(void)
  2256. +{
  2257. + int err;
  2258. +
  2259. + err = swork_get();
  2260. + if (err)
  2261. + return err;
  2262. +
  2263. + INIT_SWORK(&notify_work, __mce_notify_work);
  2264. + notify_work_ready = true;
  2265. + return 0;
  2266. +}
  2267. +
  2268. +static void mce_notify_work(void)
  2269. +{
  2270. + if (notify_work_ready)
  2271. + swork_queue(&notify_work);
  2272. +}
  2273. +#else
  2274. +static void mce_notify_work(void)
  2275. +{
  2276. + __mce_notify_work(NULL);
  2277. +}
  2278. +static inline int mce_notify_work_init(void) { return 0; }
  2279. +#endif
  2280. +
  2281. /*
  2282. * Notify the user(s) about new machine check events.
  2283. * Can be called from interrupt context, but not from machine check/NMI
  2284. @@ -1337,19 +1375,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
  2285. */
  2286. int mce_notify_irq(void)
  2287. {
  2288. - /* Not more than two messages every minute */
  2289. - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
  2290. -
  2291. if (test_and_clear_bit(0, &mce_need_notify)) {
  2292. - /* wake processes polling /dev/mcelog */
  2293. - wake_up_interruptible(&mce_chrdev_wait);
  2294. -
  2295. - if (mce_helper[0])
  2296. - schedule_work(&mce_trigger_work);
  2297. -
  2298. - if (__ratelimit(&ratelimit))
  2299. - pr_info(HW_ERR "Machine check events logged\n");
  2300. -
  2301. + mce_notify_work();
  2302. return 1;
  2303. }
  2304. return 0;
  2305. @@ -1654,7 +1681,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
  2306. }
  2307. }
  2308. -static void mce_start_timer(unsigned int cpu, struct timer_list *t)
  2309. +static void mce_start_timer(unsigned int cpu, struct hrtimer *t)
  2310. {
  2311. unsigned long iv = check_interval * HZ;
  2312. @@ -1663,16 +1690,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
  2313. per_cpu(mce_next_interval, cpu) = iv;
  2314. - t->expires = round_jiffies(jiffies + iv);
  2315. - add_timer_on(t, cpu);
  2316. + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL),
  2317. + 0, HRTIMER_MODE_REL_PINNED);
  2318. }
  2319. static void __mcheck_cpu_init_timer(void)
  2320. {
  2321. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  2322. + struct hrtimer *t = this_cpu_ptr(&mce_timer);
  2323. unsigned int cpu = smp_processor_id();
  2324. - setup_timer(t, mce_timer_fn, cpu);
  2325. + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  2326. + t->function = mce_timer_fn;
  2327. mce_start_timer(cpu, t);
  2328. }
  2329. @@ -2393,6 +2421,8 @@ static void mce_disable_cpu(void *h)
  2330. if (!mce_available(raw_cpu_ptr(&cpu_info)))
  2331. return;
  2332. + hrtimer_cancel(this_cpu_ptr(&mce_timer));
  2333. +
  2334. if (!(action & CPU_TASKS_FROZEN))
  2335. cmci_clear();
  2336. @@ -2415,6 +2445,7 @@ static void mce_reenable_cpu(void *h)
  2337. if (b->init)
  2338. wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
  2339. }
  2340. + __mcheck_cpu_init_timer();
  2341. }
  2342. /* Get notified when a cpu comes on/off. Be hotplug friendly. */
  2343. @@ -2422,7 +2453,6 @@ static int
  2344. mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
  2345. {
  2346. unsigned int cpu = (unsigned long)hcpu;
  2347. - struct timer_list *t = &per_cpu(mce_timer, cpu);
  2348. switch (action & ~CPU_TASKS_FROZEN) {
  2349. case CPU_ONLINE:
  2350. @@ -2442,11 +2472,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
  2351. break;
  2352. case CPU_DOWN_PREPARE:
  2353. smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
  2354. - del_timer_sync(t);
  2355. break;
  2356. case CPU_DOWN_FAILED:
  2357. smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
  2358. - mce_start_timer(cpu, t);
  2359. break;
  2360. }
  2361. @@ -2485,6 +2513,10 @@ static __init int mcheck_init_device(void)
  2362. goto err_out;
  2363. }
  2364. + err = mce_notify_work_init();
  2365. + if (err)
  2366. + goto err_out;
  2367. +
  2368. if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) {
  2369. err = -ENOMEM;
  2370. goto err_out;
  2371. diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
  2372. index 464ffd69b92e..00db1aad1548 100644
  2373. --- a/arch/x86/kernel/dumpstack_32.c
  2374. +++ b/arch/x86/kernel/dumpstack_32.c
  2375. @@ -42,7 +42,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
  2376. unsigned long *stack, unsigned long bp,
  2377. const struct stacktrace_ops *ops, void *data)
  2378. {
  2379. - const unsigned cpu = get_cpu();
  2380. + const unsigned cpu = get_cpu_light();
  2381. int graph = 0;
  2382. u32 *prev_esp;
  2383. @@ -86,7 +86,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
  2384. break;
  2385. touch_nmi_watchdog();
  2386. }
  2387. - put_cpu();
  2388. + put_cpu_light();
  2389. }
  2390. EXPORT_SYMBOL(dump_trace);
  2391. diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
  2392. index 5f1c6266eb30..c331e3fef465 100644
  2393. --- a/arch/x86/kernel/dumpstack_64.c
  2394. +++ b/arch/x86/kernel/dumpstack_64.c
  2395. @@ -152,7 +152,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
  2396. unsigned long *stack, unsigned long bp,
  2397. const struct stacktrace_ops *ops, void *data)
  2398. {
  2399. - const unsigned cpu = get_cpu();
  2400. + const unsigned cpu = get_cpu_light();
  2401. struct thread_info *tinfo;
  2402. unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
  2403. unsigned long dummy;
  2404. @@ -241,7 +241,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
  2405. * This handles the process stack:
  2406. */
  2407. bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
  2408. - put_cpu();
  2409. + put_cpu_light();
  2410. }
  2411. EXPORT_SYMBOL(dump_trace);
  2412. @@ -255,7 +255,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
  2413. int cpu;
  2414. int i;
  2415. - preempt_disable();
  2416. + migrate_disable();
  2417. cpu = smp_processor_id();
  2418. irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
  2419. @@ -291,7 +291,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
  2420. pr_cont(" %016lx", *stack++);
  2421. touch_nmi_watchdog();
  2422. }
  2423. - preempt_enable();
  2424. + migrate_enable();
  2425. pr_cont("\n");
  2426. show_trace_log_lvl(task, regs, sp, bp, log_lvl);
  2427. diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
  2428. index 38da8f29a9c8..ce71f7098f15 100644
  2429. --- a/arch/x86/kernel/irq_32.c
  2430. +++ b/arch/x86/kernel/irq_32.c
  2431. @@ -128,6 +128,7 @@ void irq_ctx_init(int cpu)
  2432. cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
  2433. }
  2434. +#ifndef CONFIG_PREEMPT_RT_FULL
  2435. void do_softirq_own_stack(void)
  2436. {
  2437. struct thread_info *curstk;
  2438. @@ -146,6 +147,7 @@ void do_softirq_own_stack(void)
  2439. call_on_stack(__do_softirq, isp);
  2440. }
  2441. +#endif
  2442. bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
  2443. {
  2444. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
  2445. index 9f950917528b..4dd4beae917a 100644
  2446. --- a/arch/x86/kernel/process_32.c
  2447. +++ b/arch/x86/kernel/process_32.c
  2448. @@ -35,6 +35,7 @@
  2449. #include <linux/uaccess.h>
  2450. #include <linux/io.h>
  2451. #include <linux/kdebug.h>
  2452. +#include <linux/highmem.h>
  2453. #include <asm/pgtable.h>
  2454. #include <asm/ldt.h>
  2455. @@ -210,6 +211,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
  2456. }
  2457. EXPORT_SYMBOL_GPL(start_thread);
  2458. +#ifdef CONFIG_PREEMPT_RT_FULL
  2459. +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
  2460. +{
  2461. + int i;
  2462. +
  2463. + /*
  2464. + * Clear @prev's kmap_atomic mappings
  2465. + */
  2466. + for (i = 0; i < prev_p->kmap_idx; i++) {
  2467. + int idx = i + KM_TYPE_NR * smp_processor_id();
  2468. + pte_t *ptep = kmap_pte - idx;
  2469. +
  2470. + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
  2471. + }
  2472. + /*
  2473. + * Restore @next_p's kmap_atomic mappings
  2474. + */
  2475. + for (i = 0; i < next_p->kmap_idx; i++) {
  2476. + int idx = i + KM_TYPE_NR * smp_processor_id();
  2477. +
  2478. + if (!pte_none(next_p->kmap_pte[i]))
  2479. + set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
  2480. + }
  2481. +}
  2482. +#else
  2483. +static inline void
  2484. +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
  2485. +#endif
  2486. +
  2487. /*
  2488. * switch_to(x,y) should switch tasks from x to y.
  2489. @@ -286,6 +316,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
  2490. task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
  2491. __switch_to_xtra(prev_p, next_p, tss);
  2492. + switch_kmaps(prev_p, next_p);
  2493. +
  2494. /*
  2495. * Leave lazy mode, flushing any hypercalls made here.
  2496. * This must be done before restoring TLS segments so
  2497. diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
  2498. index 1a2da0e5a373..a0c78a291f02 100644
  2499. --- a/arch/x86/kvm/lapic.c
  2500. +++ b/arch/x86/kvm/lapic.c
  2501. @@ -1870,6 +1870,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
  2502. hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
  2503. HRTIMER_MODE_ABS_PINNED);
  2504. apic->lapic_timer.timer.function = apic_timer_fn;
  2505. + apic->lapic_timer.timer.irqsafe = 1;
  2506. /*
  2507. * APIC is created enabled. This will prevent kvm_lapic_set_base from
  2508. diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
  2509. index 6b9701babaa1..9da12e3ebcc5 100644
  2510. --- a/arch/x86/kvm/x86.c
  2511. +++ b/arch/x86/kvm/x86.c
  2512. @@ -5855,6 +5855,13 @@ int kvm_arch_init(void *opaque)
  2513. goto out;
  2514. }
  2515. +#ifdef CONFIG_PREEMPT_RT_FULL
  2516. + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
  2517. + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
  2518. + return -EOPNOTSUPP;
  2519. + }
  2520. +#endif
  2521. +
  2522. r = kvm_mmu_module_init();
  2523. if (r)
  2524. goto out_free_percpu;
  2525. diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
  2526. index a6d739258137..bd24ba1c4a86 100644
  2527. --- a/arch/x86/mm/highmem_32.c
  2528. +++ b/arch/x86/mm/highmem_32.c
  2529. @@ -32,10 +32,11 @@ EXPORT_SYMBOL(kunmap);
  2530. */
  2531. void *kmap_atomic_prot(struct page *page, pgprot_t prot)
  2532. {
  2533. + pte_t pte = mk_pte(page, prot);
  2534. unsigned long vaddr;
  2535. int idx, type;
  2536. - preempt_disable();
  2537. + preempt_disable_nort();
  2538. pagefault_disable();
  2539. if (!PageHighMem(page))
  2540. @@ -45,7 +46,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
  2541. idx = type + KM_TYPE_NR*smp_processor_id();
  2542. vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
  2543. BUG_ON(!pte_none(*(kmap_pte-idx)));
  2544. - set_pte(kmap_pte-idx, mk_pte(page, prot));
  2545. +#ifdef CONFIG_PREEMPT_RT_FULL
  2546. + current->kmap_pte[type] = pte;
  2547. +#endif
  2548. + set_pte(kmap_pte-idx, pte);
  2549. arch_flush_lazy_mmu_mode();
  2550. return (void *)vaddr;
  2551. @@ -88,6 +92,9 @@ void __kunmap_atomic(void *kvaddr)
  2552. * is a bad idea also, in case the page changes cacheability
  2553. * attributes or becomes a protected page in a hypervisor.
  2554. */
  2555. +#ifdef CONFIG_PREEMPT_RT_FULL
  2556. + current->kmap_pte[type] = __pte(0);
  2557. +#endif
  2558. kpte_clear_flush(kmap_pte-idx, vaddr);
  2559. kmap_atomic_idx_pop();
  2560. arch_flush_lazy_mmu_mode();
  2561. @@ -100,7 +107,7 @@ void __kunmap_atomic(void *kvaddr)
  2562. #endif
  2563. pagefault_enable();
  2564. - preempt_enable();
  2565. + preempt_enable_nort();
  2566. }
  2567. EXPORT_SYMBOL(__kunmap_atomic);
  2568. diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
  2569. index 9c0ff045fdd4..dd25dd1671b6 100644
  2570. --- a/arch/x86/mm/iomap_32.c
  2571. +++ b/arch/x86/mm/iomap_32.c
  2572. @@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free);
  2573. void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
  2574. {
  2575. + pte_t pte = pfn_pte(pfn, prot);
  2576. unsigned long vaddr;
  2577. int idx, type;
  2578. @@ -65,7 +66,12 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
  2579. type = kmap_atomic_idx_push();
  2580. idx = type + KM_TYPE_NR * smp_processor_id();
  2581. vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
  2582. - set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
  2583. + WARN_ON(!pte_none(*(kmap_pte - idx)));
  2584. +
  2585. +#ifdef CONFIG_PREEMPT_RT_FULL
  2586. + current->kmap_pte[type] = pte;
  2587. +#endif
  2588. + set_pte(kmap_pte - idx, pte);
  2589. arch_flush_lazy_mmu_mode();
  2590. return (void *)vaddr;
  2591. @@ -113,6 +119,9 @@ iounmap_atomic(void __iomem *kvaddr)
  2592. * is a bad idea also, in case the page changes cacheability
  2593. * attributes or becomes a protected page in a hypervisor.
  2594. */
  2595. +#ifdef CONFIG_PREEMPT_RT_FULL
  2596. + current->kmap_pte[type] = __pte(0);
  2597. +#endif
  2598. kpte_clear_flush(kmap_pte-idx, vaddr);
  2599. kmap_atomic_idx_pop();
  2600. }
  2601. diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
  2602. index 3b6ec42718e4..7871083de089 100644
  2603. --- a/arch/x86/platform/uv/tlb_uv.c
  2604. +++ b/arch/x86/platform/uv/tlb_uv.c
  2605. @@ -714,9 +714,9 @@ static void destination_plugged(struct bau_desc *bau_desc,
  2606. quiesce_local_uvhub(hmaster);
  2607. - spin_lock(&hmaster->queue_lock);
  2608. + raw_spin_lock(&hmaster->queue_lock);
  2609. reset_with_ipi(&bau_desc->distribution, bcp);
  2610. - spin_unlock(&hmaster->queue_lock);
  2611. + raw_spin_unlock(&hmaster->queue_lock);
  2612. end_uvhub_quiesce(hmaster);
  2613. @@ -736,9 +736,9 @@ static void destination_timeout(struct bau_desc *bau_desc,
  2614. quiesce_local_uvhub(hmaster);
  2615. - spin_lock(&hmaster->queue_lock);
  2616. + raw_spin_lock(&hmaster->queue_lock);
  2617. reset_with_ipi(&bau_desc->distribution, bcp);
  2618. - spin_unlock(&hmaster->queue_lock);
  2619. + raw_spin_unlock(&hmaster->queue_lock);
  2620. end_uvhub_quiesce(hmaster);
  2621. @@ -759,7 +759,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
  2622. cycles_t tm1;
  2623. hmaster = bcp->uvhub_master;
  2624. - spin_lock(&hmaster->disable_lock);
  2625. + raw_spin_lock(&hmaster->disable_lock);
  2626. if (!bcp->baudisabled) {
  2627. stat->s_bau_disabled++;
  2628. tm1 = get_cycles();
  2629. @@ -772,7 +772,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
  2630. }
  2631. }
  2632. }
  2633. - spin_unlock(&hmaster->disable_lock);
  2634. + raw_spin_unlock(&hmaster->disable_lock);
  2635. }
  2636. static void count_max_concurr(int stat, struct bau_control *bcp,
  2637. @@ -835,7 +835,7 @@ static void record_send_stats(cycles_t time1, cycles_t time2,
  2638. */
  2639. static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
  2640. {
  2641. - spinlock_t *lock = &hmaster->uvhub_lock;
  2642. + raw_spinlock_t *lock = &hmaster->uvhub_lock;
  2643. atomic_t *v;
  2644. v = &hmaster->active_descriptor_count;
  2645. @@ -968,7 +968,7 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
  2646. struct bau_control *hmaster;
  2647. hmaster = bcp->uvhub_master;
  2648. - spin_lock(&hmaster->disable_lock);
  2649. + raw_spin_lock(&hmaster->disable_lock);
  2650. if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
  2651. stat->s_bau_reenabled++;
  2652. for_each_present_cpu(tcpu) {
  2653. @@ -980,10 +980,10 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
  2654. tbcp->period_giveups = 0;
  2655. }
  2656. }
  2657. - spin_unlock(&hmaster->disable_lock);
  2658. + raw_spin_unlock(&hmaster->disable_lock);
  2659. return 0;
  2660. }
  2661. - spin_unlock(&hmaster->disable_lock);
  2662. + raw_spin_unlock(&hmaster->disable_lock);
  2663. return -1;
  2664. }
  2665. @@ -1901,9 +1901,9 @@ static void __init init_per_cpu_tunables(void)
  2666. bcp->cong_reps = congested_reps;
  2667. bcp->disabled_period = sec_2_cycles(disabled_period);
  2668. bcp->giveup_limit = giveup_limit;
  2669. - spin_lock_init(&bcp->queue_lock);
  2670. - spin_lock_init(&bcp->uvhub_lock);
  2671. - spin_lock_init(&bcp->disable_lock);
  2672. + raw_spin_lock_init(&bcp->queue_lock);
  2673. + raw_spin_lock_init(&bcp->uvhub_lock);
  2674. + raw_spin_lock_init(&bcp->disable_lock);
  2675. }
  2676. }
  2677. diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
  2678. index 2b158a9fa1d7..5e0b122620cb 100644
  2679. --- a/arch/x86/platform/uv/uv_time.c
  2680. +++ b/arch/x86/platform/uv/uv_time.c
  2681. @@ -57,7 +57,7 @@ static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
  2682. /* There is one of these allocated per node */
  2683. struct uv_rtc_timer_head {
  2684. - spinlock_t lock;
  2685. + raw_spinlock_t lock;
  2686. /* next cpu waiting for timer, local node relative: */
  2687. int next_cpu;
  2688. /* number of cpus on this node: */
  2689. @@ -177,7 +177,7 @@ static __init int uv_rtc_allocate_timers(void)
  2690. uv_rtc_deallocate_timers();
  2691. return -ENOMEM;
  2692. }
  2693. - spin_lock_init(&head->lock);
  2694. + raw_spin_lock_init(&head->lock);
  2695. head->ncpus = uv_blade_nr_possible_cpus(bid);
  2696. head->next_cpu = -1;
  2697. blade_info[bid] = head;
  2698. @@ -231,7 +231,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
  2699. unsigned long flags;
  2700. int next_cpu;
  2701. - spin_lock_irqsave(&head->lock, flags);
  2702. + raw_spin_lock_irqsave(&head->lock, flags);
  2703. next_cpu = head->next_cpu;
  2704. *t = expires;
  2705. @@ -243,12 +243,12 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
  2706. if (uv_setup_intr(cpu, expires)) {
  2707. *t = ULLONG_MAX;
  2708. uv_rtc_find_next_timer(head, pnode);
  2709. - spin_unlock_irqrestore(&head->lock, flags);
  2710. + raw_spin_unlock_irqrestore(&head->lock, flags);
  2711. return -ETIME;
  2712. }
  2713. }
  2714. - spin_unlock_irqrestore(&head->lock, flags);
  2715. + raw_spin_unlock_irqrestore(&head->lock, flags);
  2716. return 0;
  2717. }
  2718. @@ -267,7 +267,7 @@ static int uv_rtc_unset_timer(int cpu, int force)
  2719. unsigned long flags;
  2720. int rc = 0;
  2721. - spin_lock_irqsave(&head->lock, flags);
  2722. + raw_spin_lock_irqsave(&head->lock, flags);
  2723. if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
  2724. rc = 1;
  2725. @@ -279,7 +279,7 @@ static int uv_rtc_unset_timer(int cpu, int force)
  2726. uv_rtc_find_next_timer(head, pnode);
  2727. }
  2728. - spin_unlock_irqrestore(&head->lock, flags);
  2729. + raw_spin_unlock_irqrestore(&head->lock, flags);
  2730. return rc;
  2731. }
  2732. @@ -299,13 +299,18 @@ static int uv_rtc_unset_timer(int cpu, int force)
  2733. static cycle_t uv_read_rtc(struct clocksource *cs)
  2734. {
  2735. unsigned long offset;
  2736. + cycle_t cycles;
  2737. + preempt_disable();
  2738. if (uv_get_min_hub_revision_id() == 1)
  2739. offset = 0;
  2740. else
  2741. offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
  2742. - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
  2743. + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
  2744. + preempt_enable();
  2745. +
  2746. + return cycles;
  2747. }
  2748. /*
  2749. diff --git a/block/blk-core.c b/block/blk-core.c
  2750. index b60537b2c35b..b9bdc9d0262d 100644
  2751. --- a/block/blk-core.c
  2752. +++ b/block/blk-core.c
  2753. @@ -125,6 +125,9 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
  2754. INIT_LIST_HEAD(&rq->queuelist);
  2755. INIT_LIST_HEAD(&rq->timeout_list);
  2756. +#ifdef CONFIG_PREEMPT_RT_FULL
  2757. + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
  2758. +#endif
  2759. rq->cpu = -1;
  2760. rq->q = q;
  2761. rq->__sector = (sector_t) -1;
  2762. @@ -233,7 +236,7 @@ EXPORT_SYMBOL(blk_start_queue_async);
  2763. **/
  2764. void blk_start_queue(struct request_queue *q)
  2765. {
  2766. - WARN_ON(!irqs_disabled());
  2767. + WARN_ON_NONRT(!irqs_disabled());
  2768. queue_flag_clear(QUEUE_FLAG_STOPPED, q);
  2769. __blk_run_queue(q);
  2770. @@ -657,7 +660,7 @@ int blk_queue_enter(struct request_queue *q, bool nowait)
  2771. if (nowait)
  2772. return -EBUSY;
  2773. - ret = wait_event_interruptible(q->mq_freeze_wq,
  2774. + ret = swait_event_interruptible(q->mq_freeze_wq,
  2775. !atomic_read(&q->mq_freeze_depth) ||
  2776. blk_queue_dying(q));
  2777. if (blk_queue_dying(q))
  2778. @@ -677,7 +680,7 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref)
  2779. struct request_queue *q =
  2780. container_of(ref, struct request_queue, q_usage_counter);
  2781. - wake_up_all(&q->mq_freeze_wq);
  2782. + swake_up_all(&q->mq_freeze_wq);
  2783. }
  2784. static void blk_rq_timed_out_timer(unsigned long data)
  2785. @@ -746,7 +749,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
  2786. q->bypass_depth = 1;
  2787. __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
  2788. - init_waitqueue_head(&q->mq_freeze_wq);
  2789. + init_swait_queue_head(&q->mq_freeze_wq);
  2790. /*
  2791. * Init percpu_ref in atomic mode so that it's faster to shutdown.
  2792. @@ -3209,7 +3212,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
  2793. blk_run_queue_async(q);
  2794. else
  2795. __blk_run_queue(q);
  2796. - spin_unlock(q->queue_lock);
  2797. + spin_unlock_irq(q->queue_lock);
  2798. }
  2799. static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
  2800. @@ -3257,7 +3260,6 @@ EXPORT_SYMBOL(blk_check_plugged);
  2801. void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  2802. {
  2803. struct request_queue *q;
  2804. - unsigned long flags;
  2805. struct request *rq;
  2806. LIST_HEAD(list);
  2807. unsigned int depth;
  2808. @@ -3277,11 +3279,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  2809. q = NULL;
  2810. depth = 0;
  2811. - /*
  2812. - * Save and disable interrupts here, to avoid doing it for every
  2813. - * queue lock we have to take.
  2814. - */
  2815. - local_irq_save(flags);
  2816. while (!list_empty(&list)) {
  2817. rq = list_entry_rq(list.next);
  2818. list_del_init(&rq->queuelist);
  2819. @@ -3294,7 +3291,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  2820. queue_unplugged(q, depth, from_schedule);
  2821. q = rq->q;
  2822. depth = 0;
  2823. - spin_lock(q->queue_lock);
  2824. + spin_lock_irq(q->queue_lock);
  2825. }
  2826. /*
  2827. @@ -3321,8 +3318,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  2828. */
  2829. if (q)
  2830. queue_unplugged(q, depth, from_schedule);
  2831. -
  2832. - local_irq_restore(flags);
  2833. }
  2834. void blk_finish_plug(struct blk_plug *plug)
  2835. diff --git a/block/blk-ioc.c b/block/blk-ioc.c
  2836. index 381cb50a673c..dc8785233d94 100644
  2837. --- a/block/blk-ioc.c
  2838. +++ b/block/blk-ioc.c
  2839. @@ -7,6 +7,7 @@
  2840. #include <linux/bio.h>
  2841. #include <linux/blkdev.h>
  2842. #include <linux/slab.h>
  2843. +#include <linux/delay.h>
  2844. #include "blk.h"
  2845. @@ -109,7 +110,7 @@ static void ioc_release_fn(struct work_struct *work)
  2846. spin_unlock(q->queue_lock);
  2847. } else {
  2848. spin_unlock_irqrestore(&ioc->lock, flags);
  2849. - cpu_relax();
  2850. + cpu_chill();
  2851. spin_lock_irqsave_nested(&ioc->lock, flags, 1);
  2852. }
  2853. }
  2854. @@ -187,7 +188,7 @@ void put_io_context_active(struct io_context *ioc)
  2855. spin_unlock(icq->q->queue_lock);
  2856. } else {
  2857. spin_unlock_irqrestore(&ioc->lock, flags);
  2858. - cpu_relax();
  2859. + cpu_chill();
  2860. goto retry;
  2861. }
  2862. }
  2863. diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c
  2864. index bb3ed488f7b5..628c6c13c482 100644
  2865. --- a/block/blk-mq-cpu.c
  2866. +++ b/block/blk-mq-cpu.c
  2867. @@ -16,7 +16,7 @@
  2868. #include "blk-mq.h"
  2869. static LIST_HEAD(blk_mq_cpu_notify_list);
  2870. -static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
  2871. +static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
  2872. static int blk_mq_main_cpu_notify(struct notifier_block *self,
  2873. unsigned long action, void *hcpu)
  2874. @@ -25,7 +25,10 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self,
  2875. struct blk_mq_cpu_notifier *notify;
  2876. int ret = NOTIFY_OK;
  2877. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  2878. + if (action != CPU_POST_DEAD)
  2879. + return NOTIFY_OK;
  2880. +
  2881. + spin_lock(&blk_mq_cpu_notify_lock);
  2882. list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) {
  2883. ret = notify->notify(notify->data, action, cpu);
  2884. @@ -33,7 +36,7 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self,
  2885. break;
  2886. }
  2887. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  2888. + spin_unlock(&blk_mq_cpu_notify_lock);
  2889. return ret;
  2890. }
  2891. @@ -41,16 +44,16 @@ void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
  2892. {
  2893. BUG_ON(!notifier->notify);
  2894. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  2895. + spin_lock(&blk_mq_cpu_notify_lock);
  2896. list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
  2897. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  2898. + spin_unlock(&blk_mq_cpu_notify_lock);
  2899. }
  2900. void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
  2901. {
  2902. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  2903. + spin_lock(&blk_mq_cpu_notify_lock);
  2904. list_del(&notifier->list);
  2905. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  2906. + spin_unlock(&blk_mq_cpu_notify_lock);
  2907. }
  2908. void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
  2909. diff --git a/block/blk-mq.c b/block/blk-mq.c
  2910. index 1699baf39b78..2a2006009548 100644
  2911. --- a/block/blk-mq.c
  2912. +++ b/block/blk-mq.c
  2913. @@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
  2914. static void blk_mq_freeze_queue_wait(struct request_queue *q)
  2915. {
  2916. - wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
  2917. + swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
  2918. }
  2919. /*
  2920. @@ -130,7 +130,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
  2921. WARN_ON_ONCE(freeze_depth < 0);
  2922. if (!freeze_depth) {
  2923. percpu_ref_reinit(&q->q_usage_counter);
  2924. - wake_up_all(&q->mq_freeze_wq);
  2925. + swake_up_all(&q->mq_freeze_wq);
  2926. }
  2927. }
  2928. EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
  2929. @@ -149,7 +149,7 @@ void blk_mq_wake_waiters(struct request_queue *q)
  2930. * dying, we need to ensure that processes currently waiting on
  2931. * the queue are notified as well.
  2932. */
  2933. - wake_up_all(&q->mq_freeze_wq);
  2934. + swake_up_all(&q->mq_freeze_wq);
  2935. }
  2936. bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
  2937. @@ -196,6 +196,9 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
  2938. rq->resid_len = 0;
  2939. rq->sense = NULL;
  2940. +#ifdef CONFIG_PREEMPT_RT_FULL
  2941. + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
  2942. +#endif
  2943. INIT_LIST_HEAD(&rq->timeout_list);
  2944. rq->timeout = 0;
  2945. @@ -323,6 +326,17 @@ void blk_mq_end_request(struct request *rq, int error)
  2946. }
  2947. EXPORT_SYMBOL(blk_mq_end_request);
  2948. +#ifdef CONFIG_PREEMPT_RT_FULL
  2949. +
  2950. +void __blk_mq_complete_request_remote_work(struct work_struct *work)
  2951. +{
  2952. + struct request *rq = container_of(work, struct request, work);
  2953. +
  2954. + rq->q->softirq_done_fn(rq);
  2955. +}
  2956. +
  2957. +#else
  2958. +
  2959. static void __blk_mq_complete_request_remote(void *data)
  2960. {
  2961. struct request *rq = data;
  2962. @@ -330,6 +344,8 @@ static void __blk_mq_complete_request_remote(void *data)
  2963. rq->q->softirq_done_fn(rq);
  2964. }
  2965. +#endif
  2966. +
  2967. static void blk_mq_ipi_complete_request(struct request *rq)
  2968. {
  2969. struct blk_mq_ctx *ctx = rq->mq_ctx;
  2970. @@ -341,19 +357,23 @@ static void blk_mq_ipi_complete_request(struct request *rq)
  2971. return;
  2972. }
  2973. - cpu = get_cpu();
  2974. + cpu = get_cpu_light();
  2975. if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
  2976. shared = cpus_share_cache(cpu, ctx->cpu);
  2977. if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
  2978. +#ifdef CONFIG_PREEMPT_RT_FULL
  2979. + schedule_work_on(ctx->cpu, &rq->work);
  2980. +#else
  2981. rq->csd.func = __blk_mq_complete_request_remote;
  2982. rq->csd.info = rq;
  2983. rq->csd.flags = 0;
  2984. smp_call_function_single_async(ctx->cpu, &rq->csd);
  2985. +#endif
  2986. } else {
  2987. rq->q->softirq_done_fn(rq);
  2988. }
  2989. - put_cpu();
  2990. + put_cpu_light();
  2991. }
  2992. static void __blk_mq_complete_request(struct request *rq)
  2993. @@ -868,14 +888,14 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
  2994. return;
  2995. if (!async) {
  2996. - int cpu = get_cpu();
  2997. + int cpu = get_cpu_light();
  2998. if (cpumask_test_cpu(cpu, hctx->cpumask)) {
  2999. __blk_mq_run_hw_queue(hctx);
  3000. - put_cpu();
  3001. + put_cpu_light();
  3002. return;
  3003. }
  3004. - put_cpu();
  3005. + put_cpu_light();
  3006. }
  3007. kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
  3008. @@ -1621,7 +1641,7 @@ static int blk_mq_hctx_notify(void *data, unsigned long action,
  3009. {
  3010. struct blk_mq_hw_ctx *hctx = data;
  3011. - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
  3012. + if (action == CPU_POST_DEAD)
  3013. return blk_mq_hctx_cpu_offline(hctx, cpu);
  3014. /*
  3015. diff --git a/block/blk-mq.h b/block/blk-mq.h
  3016. index 9087b11037b7..0401d76e827c 100644
  3017. --- a/block/blk-mq.h
  3018. +++ b/block/blk-mq.h
  3019. @@ -86,12 +86,12 @@ static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
  3020. */
  3021. static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
  3022. {
  3023. - return __blk_mq_get_ctx(q, get_cpu());
  3024. + return __blk_mq_get_ctx(q, get_cpu_light());
  3025. }
  3026. static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
  3027. {
  3028. - put_cpu();
  3029. + put_cpu_light();
  3030. }
  3031. struct blk_mq_alloc_data {
  3032. diff --git a/block/blk-softirq.c b/block/blk-softirq.c
  3033. index 53b1737e978d..81c3c0a62edf 100644
  3034. --- a/block/blk-softirq.c
  3035. +++ b/block/blk-softirq.c
  3036. @@ -51,6 +51,7 @@ static void trigger_softirq(void *data)
  3037. raise_softirq_irqoff(BLOCK_SOFTIRQ);
  3038. local_irq_restore(flags);
  3039. + preempt_check_resched_rt();
  3040. }
  3041. /*
  3042. @@ -93,6 +94,7 @@ static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
  3043. this_cpu_ptr(&blk_cpu_done));
  3044. raise_softirq_irqoff(BLOCK_SOFTIRQ);
  3045. local_irq_enable();
  3046. + preempt_check_resched_rt();
  3047. }
  3048. return NOTIFY_OK;
  3049. @@ -150,6 +152,7 @@ void __blk_complete_request(struct request *req)
  3050. goto do_local;
  3051. local_irq_restore(flags);
  3052. + preempt_check_resched_rt();
  3053. }
  3054. /**
  3055. diff --git a/block/bounce.c b/block/bounce.c
  3056. index 1cb5dd3a5da1..2f1ec8a67cbe 100644
  3057. --- a/block/bounce.c
  3058. +++ b/block/bounce.c
  3059. @@ -55,11 +55,11 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
  3060. unsigned long flags;
  3061. unsigned char *vto;
  3062. - local_irq_save(flags);
  3063. + local_irq_save_nort(flags);
  3064. vto = kmap_atomic(to->bv_page);
  3065. memcpy(vto + to->bv_offset, vfrom, to->bv_len);
  3066. kunmap_atomic(vto);
  3067. - local_irq_restore(flags);
  3068. + local_irq_restore_nort(flags);
  3069. }
  3070. #else /* CONFIG_HIGHMEM */
  3071. diff --git a/block/genhd.c b/block/genhd.c
  3072. index 9f42526b4d62..f06d7f3b075b 100644
  3073. --- a/block/genhd.c
  3074. +++ b/block/genhd.c
  3075. @@ -1523,12 +1523,7 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
  3076. if (--ev->block)
  3077. goto out_unlock;
  3078. - /*
  3079. - * Not exactly a latency critical operation, set poll timer
  3080. - * slack to 25% and kick event check.
  3081. - */
  3082. intv = disk_events_poll_jiffies(disk);
  3083. - set_timer_slack(&ev->dwork.timer, intv / 4);
  3084. if (check_now)
  3085. queue_delayed_work(system_freezable_power_efficient_wq,
  3086. &ev->dwork, 0);
  3087. diff --git a/crypto/algapi.c b/crypto/algapi.c
  3088. index 731255a6104f..d3380591dfdd 100644
  3089. --- a/crypto/algapi.c
  3090. +++ b/crypto/algapi.c
  3091. @@ -718,13 +718,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2);
  3092. int crypto_register_notifier(struct notifier_block *nb)
  3093. {
  3094. - return blocking_notifier_chain_register(&crypto_chain, nb);
  3095. + return srcu_notifier_chain_register(&crypto_chain, nb);
  3096. }
  3097. EXPORT_SYMBOL_GPL(crypto_register_notifier);
  3098. int crypto_unregister_notifier(struct notifier_block *nb)
  3099. {
  3100. - return blocking_notifier_chain_unregister(&crypto_chain, nb);
  3101. + return srcu_notifier_chain_unregister(&crypto_chain, nb);
  3102. }
  3103. EXPORT_SYMBOL_GPL(crypto_unregister_notifier);
  3104. diff --git a/crypto/api.c b/crypto/api.c
  3105. index bbc147cb5dec..bc1a848f02ec 100644
  3106. --- a/crypto/api.c
  3107. +++ b/crypto/api.c
  3108. @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(crypto_alg_list);
  3109. DECLARE_RWSEM(crypto_alg_sem);
  3110. EXPORT_SYMBOL_GPL(crypto_alg_sem);
  3111. -BLOCKING_NOTIFIER_HEAD(crypto_chain);
  3112. +SRCU_NOTIFIER_HEAD(crypto_chain);
  3113. EXPORT_SYMBOL_GPL(crypto_chain);
  3114. static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg);
  3115. @@ -236,10 +236,10 @@ int crypto_probing_notify(unsigned long val, void *v)
  3116. {
  3117. int ok;
  3118. - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
  3119. + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
  3120. if (ok == NOTIFY_DONE) {
  3121. request_module("cryptomgr");
  3122. - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
  3123. + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
  3124. }
  3125. return ok;
  3126. diff --git a/crypto/internal.h b/crypto/internal.h
  3127. index 7eefcdb00227..0ecc7f5a2f40 100644
  3128. --- a/crypto/internal.h
  3129. +++ b/crypto/internal.h
  3130. @@ -47,7 +47,7 @@ struct crypto_larval {
  3131. extern struct list_head crypto_alg_list;
  3132. extern struct rw_semaphore crypto_alg_sem;
  3133. -extern struct blocking_notifier_head crypto_chain;
  3134. +extern struct srcu_notifier_head crypto_chain;
  3135. #ifdef CONFIG_PROC_FS
  3136. void __init crypto_init_proc(void);
  3137. @@ -146,7 +146,7 @@ static inline int crypto_is_moribund(struct crypto_alg *alg)
  3138. static inline void crypto_notify(unsigned long val, void *v)
  3139. {
  3140. - blocking_notifier_call_chain(&crypto_chain, val, v);
  3141. + srcu_notifier_call_chain(&crypto_chain, val, v);
  3142. }
  3143. #endif /* _CRYPTO_INTERNAL_H */
  3144. diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
  3145. index 51b073b68f16..2d38ca5c6635 100644
  3146. --- a/drivers/acpi/acpica/acglobal.h
  3147. +++ b/drivers/acpi/acpica/acglobal.h
  3148. @@ -116,7 +116,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pending);
  3149. * interrupt level
  3150. */
  3151. ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */
  3152. -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
  3153. +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
  3154. ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock);
  3155. /* Mutex for _OSI support */
  3156. diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
  3157. index 5ba0498412fd..ada43191fb31 100644
  3158. --- a/drivers/acpi/acpica/hwregs.c
  3159. +++ b/drivers/acpi/acpica/hwregs.c
  3160. @@ -269,14 +269,14 @@ acpi_status acpi_hw_clear_acpi_status(void)
  3161. ACPI_BITMASK_ALL_FIXED_STATUS,
  3162. ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address)));
  3163. - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
  3164. + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
  3165. /* Clear the fixed events in PM1 A/B */
  3166. status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
  3167. ACPI_BITMASK_ALL_FIXED_STATUS);
  3168. - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
  3169. + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
  3170. if (ACPI_FAILURE(status)) {
  3171. goto exit;
  3172. diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c
  3173. index a01ddb393a55..fb64afd9470b 100644
  3174. --- a/drivers/acpi/acpica/hwxface.c
  3175. +++ b/drivers/acpi/acpica/hwxface.c
  3176. @@ -374,7 +374,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value)
  3177. return_ACPI_STATUS(AE_BAD_PARAMETER);
  3178. }
  3179. - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
  3180. + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
  3181. /*
  3182. * At this point, we know that the parent register is one of the
  3183. @@ -435,7 +435,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value)
  3184. unlock_and_exit:
  3185. - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
  3186. + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
  3187. return_ACPI_STATUS(status);
  3188. }
  3189. diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c
  3190. index 15073375bd00..357e7ca5a587 100644
  3191. --- a/drivers/acpi/acpica/utmutex.c
  3192. +++ b/drivers/acpi/acpica/utmutex.c
  3193. @@ -88,7 +88,7 @@ acpi_status acpi_ut_mutex_initialize(void)
  3194. return_ACPI_STATUS (status);
  3195. }
  3196. - status = acpi_os_create_lock (&acpi_gbl_hardware_lock);
  3197. + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock);
  3198. if (ACPI_FAILURE (status)) {
  3199. return_ACPI_STATUS (status);
  3200. }
  3201. @@ -145,7 +145,7 @@ void acpi_ut_mutex_terminate(void)
  3202. /* Delete the spinlocks */
  3203. acpi_os_delete_lock(acpi_gbl_gpe_lock);
  3204. - acpi_os_delete_lock(acpi_gbl_hardware_lock);
  3205. + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock);
  3206. acpi_os_delete_lock(acpi_gbl_reference_count_lock);
  3207. /* Delete the reader/writer lock */
  3208. diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
  3209. index 051b6158d1b7..7ad293bef6ed 100644
  3210. --- a/drivers/ata/libata-sff.c
  3211. +++ b/drivers/ata/libata-sff.c
  3212. @@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf,
  3213. unsigned long flags;
  3214. unsigned int consumed;
  3215. - local_irq_save(flags);
  3216. + local_irq_save_nort(flags);
  3217. consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
  3218. - local_irq_restore(flags);
  3219. + local_irq_restore_nort(flags);
  3220. return consumed;
  3221. }
  3222. @@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
  3223. unsigned long flags;
  3224. /* FIXME: use a bounce buffer */
  3225. - local_irq_save(flags);
  3226. + local_irq_save_nort(flags);
  3227. buf = kmap_atomic(page);
  3228. /* do the actual data transfer */
  3229. @@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
  3230. do_write);
  3231. kunmap_atomic(buf);
  3232. - local_irq_restore(flags);
  3233. + local_irq_restore_nort(flags);
  3234. } else {
  3235. buf = page_address(page);
  3236. ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
  3237. @@ -864,7 +864,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
  3238. unsigned long flags;
  3239. /* FIXME: use bounce buffer */
  3240. - local_irq_save(flags);
  3241. + local_irq_save_nort(flags);
  3242. buf = kmap_atomic(page);
  3243. /* do the actual data transfer */
  3244. @@ -872,7 +872,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
  3245. count, rw);
  3246. kunmap_atomic(buf);
  3247. - local_irq_restore(flags);
  3248. + local_irq_restore_nort(flags);
  3249. } else {
  3250. buf = page_address(page);
  3251. consumed = ap->ops->sff_data_xfer(dev, buf + offset,
  3252. diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
  3253. index 370c2f76016d..65e0b375a291 100644
  3254. --- a/drivers/block/zram/zram_drv.c
  3255. +++ b/drivers/block/zram/zram_drv.c
  3256. @@ -520,6 +520,8 @@ static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
  3257. goto out_error;
  3258. }
  3259. + zram_meta_init_table_locks(meta, disksize);
  3260. +
  3261. return meta;
  3262. out_error:
  3263. @@ -568,12 +570,12 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
  3264. unsigned long handle;
  3265. size_t size;
  3266. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3267. + zram_lock_table(&meta->table[index]);
  3268. handle = meta->table[index].handle;
  3269. size = zram_get_obj_size(meta, index);
  3270. if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
  3271. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3272. + zram_unlock_table(&meta->table[index]);
  3273. clear_page(mem);
  3274. return 0;
  3275. }
  3276. @@ -584,7 +586,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
  3277. else
  3278. ret = zcomp_decompress(zram->comp, cmem, size, mem);
  3279. zs_unmap_object(meta->mem_pool, handle);
  3280. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3281. + zram_unlock_table(&meta->table[index]);
  3282. /* Should NEVER happen. Return bio error if it does. */
  3283. if (unlikely(ret)) {
  3284. @@ -604,14 +606,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
  3285. struct zram_meta *meta = zram->meta;
  3286. page = bvec->bv_page;
  3287. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3288. + zram_lock_table(&meta->table[index]);
  3289. if (unlikely(!meta->table[index].handle) ||
  3290. zram_test_flag(meta, index, ZRAM_ZERO)) {
  3291. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3292. + zram_unlock_table(&meta->table[index]);
  3293. handle_zero_page(bvec);
  3294. return 0;
  3295. }
  3296. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3297. + zram_unlock_table(&meta->table[index]);
  3298. if (is_partial_io(bvec))
  3299. /* Use a temporary buffer to decompress the page */
  3300. @@ -689,10 +691,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
  3301. if (user_mem)
  3302. kunmap_atomic(user_mem);
  3303. /* Free memory associated with this sector now. */
  3304. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3305. + zram_lock_table(&meta->table[index]);
  3306. zram_free_page(zram, index);
  3307. zram_set_flag(meta, index, ZRAM_ZERO);
  3308. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3309. + zram_unlock_table(&meta->table[index]);
  3310. atomic64_inc(&zram->stats.zero_pages);
  3311. ret = 0;
  3312. @@ -752,12 +754,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
  3313. * Free memory associated with this sector
  3314. * before overwriting unused sectors.
  3315. */
  3316. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3317. + zram_lock_table(&meta->table[index]);
  3318. zram_free_page(zram, index);
  3319. meta->table[index].handle = handle;
  3320. zram_set_obj_size(meta, index, clen);
  3321. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3322. + zram_unlock_table(&meta->table[index]);
  3323. /* Update stats */
  3324. atomic64_add(clen, &zram->stats.compr_data_size);
  3325. @@ -800,9 +802,9 @@ static void zram_bio_discard(struct zram *zram, u32 index,
  3326. }
  3327. while (n >= PAGE_SIZE) {
  3328. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3329. + zram_lock_table(&meta->table[index]);
  3330. zram_free_page(zram, index);
  3331. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3332. + zram_unlock_table(&meta->table[index]);
  3333. atomic64_inc(&zram->stats.notify_free);
  3334. index++;
  3335. n -= PAGE_SIZE;
  3336. @@ -928,9 +930,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
  3337. zram = bdev->bd_disk->private_data;
  3338. meta = zram->meta;
  3339. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3340. + zram_lock_table(&meta->table[index]);
  3341. zram_free_page(zram, index);
  3342. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3343. + zram_unlock_table(&meta->table[index]);
  3344. atomic64_inc(&zram->stats.notify_free);
  3345. }
  3346. diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
  3347. index 8e92339686d7..9e3e953d680e 100644
  3348. --- a/drivers/block/zram/zram_drv.h
  3349. +++ b/drivers/block/zram/zram_drv.h
  3350. @@ -72,6 +72,9 @@ enum zram_pageflags {
  3351. struct zram_table_entry {
  3352. unsigned long handle;
  3353. unsigned long value;
  3354. +#ifdef CONFIG_PREEMPT_RT_BASE
  3355. + spinlock_t lock;
  3356. +#endif
  3357. };
  3358. struct zram_stats {
  3359. @@ -119,4 +122,42 @@ struct zram {
  3360. */
  3361. bool claim; /* Protected by bdev->bd_mutex */
  3362. };
  3363. +
  3364. +#ifndef CONFIG_PREEMPT_RT_BASE
  3365. +static inline void zram_lock_table(struct zram_table_entry *table)
  3366. +{
  3367. + bit_spin_lock(ZRAM_ACCESS, &table->value);
  3368. +}
  3369. +
  3370. +static inline void zram_unlock_table(struct zram_table_entry *table)
  3371. +{
  3372. + bit_spin_unlock(ZRAM_ACCESS, &table->value);
  3373. +}
  3374. +
  3375. +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { }
  3376. +#else /* CONFIG_PREEMPT_RT_BASE */
  3377. +static inline void zram_lock_table(struct zram_table_entry *table)
  3378. +{
  3379. + spin_lock(&table->lock);
  3380. + __set_bit(ZRAM_ACCESS, &table->value);
  3381. +}
  3382. +
  3383. +static inline void zram_unlock_table(struct zram_table_entry *table)
  3384. +{
  3385. + __clear_bit(ZRAM_ACCESS, &table->value);
  3386. + spin_unlock(&table->lock);
  3387. +}
  3388. +
  3389. +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize)
  3390. +{
  3391. + size_t num_pages = disksize >> PAGE_SHIFT;
  3392. + size_t index;
  3393. +
  3394. + for (index = 0; index < num_pages; index++) {
  3395. + spinlock_t *lock = &meta->table[index].lock;
  3396. + spin_lock_init(lock);
  3397. + }
  3398. +}
  3399. +#endif /* CONFIG_PREEMPT_RT_BASE */
  3400. +
  3401. #endif
  3402. diff --git a/drivers/char/random.c b/drivers/char/random.c
  3403. index b583e5336630..ac2bd8c62de9 100644
  3404. --- a/drivers/char/random.c
  3405. +++ b/drivers/char/random.c
  3406. @@ -796,8 +796,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
  3407. } sample;
  3408. long delta, delta2, delta3;
  3409. - preempt_disable();
  3410. -
  3411. sample.jiffies = jiffies;
  3412. sample.cycles = random_get_entropy();
  3413. sample.num = num;
  3414. @@ -838,7 +836,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
  3415. */
  3416. credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
  3417. }
  3418. - preempt_enable();
  3419. }
  3420. void add_input_randomness(unsigned int type, unsigned int code,
  3421. @@ -891,28 +888,27 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
  3422. return *(ptr + f->reg_idx++);
  3423. }
  3424. -void add_interrupt_randomness(int irq, int irq_flags)
  3425. +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip)
  3426. {
  3427. struct entropy_store *r;
  3428. struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
  3429. - struct pt_regs *regs = get_irq_regs();
  3430. unsigned long now = jiffies;
  3431. cycles_t cycles = random_get_entropy();
  3432. __u32 c_high, j_high;
  3433. - __u64 ip;
  3434. unsigned long seed;
  3435. int credit = 0;
  3436. if (cycles == 0)
  3437. - cycles = get_reg(fast_pool, regs);
  3438. + cycles = get_reg(fast_pool, NULL);
  3439. c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
  3440. j_high = (sizeof(now) > 4) ? now >> 32 : 0;
  3441. fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
  3442. fast_pool->pool[1] ^= now ^ c_high;
  3443. - ip = regs ? instruction_pointer(regs) : _RET_IP_;
  3444. + if (!ip)
  3445. + ip = _RET_IP_;
  3446. fast_pool->pool[2] ^= ip;
  3447. fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 :
  3448. - get_reg(fast_pool, regs);
  3449. + get_reg(fast_pool, NULL);
  3450. fast_mix(fast_pool);
  3451. add_interrupt_bench(cycles);
  3452. diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
  3453. index 4da2af9694a2..5b6f57f500b8 100644
  3454. --- a/drivers/clocksource/tcb_clksrc.c
  3455. +++ b/drivers/clocksource/tcb_clksrc.c
  3456. @@ -23,8 +23,7 @@
  3457. * this 32 bit free-running counter. the second channel is not used.
  3458. *
  3459. * - The third channel may be used to provide a 16-bit clockevent
  3460. - * source, used in either periodic or oneshot mode. This runs
  3461. - * at 32 KiHZ, and can handle delays of up to two seconds.
  3462. + * source, used in either periodic or oneshot mode.
  3463. *
  3464. * A boot clocksource and clockevent source are also currently needed,
  3465. * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
  3466. @@ -74,6 +73,8 @@ static struct clocksource clksrc = {
  3467. struct tc_clkevt_device {
  3468. struct clock_event_device clkevt;
  3469. struct clk *clk;
  3470. + bool clk_enabled;
  3471. + u32 freq;
  3472. void __iomem *regs;
  3473. };
  3474. @@ -82,15 +83,26 @@ static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt)
  3475. return container_of(clkevt, struct tc_clkevt_device, clkevt);
  3476. }
  3477. -/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
  3478. - * because using one of the divided clocks would usually mean the
  3479. - * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
  3480. - *
  3481. - * A divided clock could be good for high resolution timers, since
  3482. - * 30.5 usec resolution can seem "low".
  3483. - */
  3484. static u32 timer_clock;
  3485. +static void tc_clk_disable(struct clock_event_device *d)
  3486. +{
  3487. + struct tc_clkevt_device *tcd = to_tc_clkevt(d);
  3488. +
  3489. + clk_disable(tcd->clk);
  3490. + tcd->clk_enabled = false;
  3491. +}
  3492. +
  3493. +static void tc_clk_enable(struct clock_event_device *d)
  3494. +{
  3495. + struct tc_clkevt_device *tcd = to_tc_clkevt(d);
  3496. +
  3497. + if (tcd->clk_enabled)
  3498. + return;
  3499. + clk_enable(tcd->clk);
  3500. + tcd->clk_enabled = true;
  3501. +}
  3502. +
  3503. static int tc_shutdown(struct clock_event_device *d)
  3504. {
  3505. struct tc_clkevt_device *tcd = to_tc_clkevt(d);
  3506. @@ -98,8 +110,14 @@ static int tc_shutdown(struct clock_event_device *d)
  3507. __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR));
  3508. __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
  3509. + return 0;
  3510. +}
  3511. +
  3512. +static int tc_shutdown_clk_off(struct clock_event_device *d)
  3513. +{
  3514. + tc_shutdown(d);
  3515. if (!clockevent_state_detached(d))
  3516. - clk_disable(tcd->clk);
  3517. + tc_clk_disable(d);
  3518. return 0;
  3519. }
  3520. @@ -112,9 +130,9 @@ static int tc_set_oneshot(struct clock_event_device *d)
  3521. if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
  3522. tc_shutdown(d);
  3523. - clk_enable(tcd->clk);
  3524. + tc_clk_enable(d);
  3525. - /* slow clock, count up to RC, then irq and stop */
  3526. + /* count up to RC, then irq and stop */
  3527. __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE |
  3528. ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR));
  3529. __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
  3530. @@ -134,12 +152,12 @@ static int tc_set_periodic(struct clock_event_device *d)
  3531. /* By not making the gentime core emulate periodic mode on top
  3532. * of oneshot, we get lower overhead and improved accuracy.
  3533. */
  3534. - clk_enable(tcd->clk);
  3535. + tc_clk_enable(d);
  3536. - /* slow clock, count up to RC, then irq and restart */
  3537. + /* count up to RC, then irq and restart */
  3538. __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
  3539. regs + ATMEL_TC_REG(2, CMR));
  3540. - __raw_writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
  3541. + __raw_writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
  3542. /* Enable clock and interrupts on RC compare */
  3543. __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
  3544. @@ -166,9 +184,13 @@ static struct tc_clkevt_device clkevt = {
  3545. .features = CLOCK_EVT_FEAT_PERIODIC |
  3546. CLOCK_EVT_FEAT_ONESHOT,
  3547. /* Should be lower than at91rm9200's system timer */
  3548. +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  3549. .rating = 125,
  3550. +#else
  3551. + .rating = 200,
  3552. +#endif
  3553. .set_next_event = tc_next_event,
  3554. - .set_state_shutdown = tc_shutdown,
  3555. + .set_state_shutdown = tc_shutdown_clk_off,
  3556. .set_state_periodic = tc_set_periodic,
  3557. .set_state_oneshot = tc_set_oneshot,
  3558. },
  3559. @@ -188,8 +210,9 @@ static irqreturn_t ch2_irq(int irq, void *handle)
  3560. return IRQ_NONE;
  3561. }
  3562. -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
  3563. +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
  3564. {
  3565. + unsigned divisor = atmel_tc_divisors[divisor_idx];
  3566. int ret;
  3567. struct clk *t2_clk = tc->clk[2];
  3568. int irq = tc->irq[2];
  3569. @@ -210,7 +233,11 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
  3570. clkevt.regs = tc->regs;
  3571. clkevt.clk = t2_clk;
  3572. - timer_clock = clk32k_divisor_idx;
  3573. + timer_clock = divisor_idx;
  3574. + if (!divisor)
  3575. + clkevt.freq = 32768;
  3576. + else
  3577. + clkevt.freq = clk_get_rate(t2_clk) / divisor;
  3578. clkevt.clkevt.cpumask = cpumask_of(0);
  3579. @@ -221,7 +248,7 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
  3580. return ret;
  3581. }
  3582. - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
  3583. + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff);
  3584. return ret;
  3585. }
  3586. @@ -358,7 +385,11 @@ static int __init tcb_clksrc_init(void)
  3587. goto err_disable_t1;
  3588. /* channel 2: periodic and oneshot timer support */
  3589. +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  3590. ret = setup_clkevents(tc, clk32k_divisor_idx);
  3591. +#else
  3592. + ret = setup_clkevents(tc, best_divisor_idx);
  3593. +#endif
  3594. if (ret)
  3595. goto err_unregister_clksrc;
  3596. diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c
  3597. index d911c5dca8f1..7a40f7e88468 100644
  3598. --- a/drivers/clocksource/timer-atmel-pit.c
  3599. +++ b/drivers/clocksource/timer-atmel-pit.c
  3600. @@ -46,6 +46,7 @@ struct pit_data {
  3601. u32 cycle;
  3602. u32 cnt;
  3603. unsigned int irq;
  3604. + bool irq_requested;
  3605. struct clk *mck;
  3606. };
  3607. @@ -96,15 +97,29 @@ static int pit_clkevt_shutdown(struct clock_event_device *dev)
  3608. /* disable irq, leaving the clocksource active */
  3609. pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN);
  3610. + if (data->irq_requested) {
  3611. + free_irq(data->irq, data);
  3612. + data->irq_requested = false;
  3613. + }
  3614. return 0;
  3615. }
  3616. +static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id);
  3617. /*
  3618. * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16)
  3619. */
  3620. static int pit_clkevt_set_periodic(struct clock_event_device *dev)
  3621. {
  3622. struct pit_data *data = clkevt_to_pit_data(dev);
  3623. + int ret;
  3624. +
  3625. + ret = request_irq(data->irq, at91sam926x_pit_interrupt,
  3626. + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3627. + "at91_tick", data);
  3628. + if (ret)
  3629. + panic(pr_fmt("Unable to setup IRQ\n"));
  3630. +
  3631. + data->irq_requested = true;
  3632. /* update clocksource counter */
  3633. data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR));
  3634. @@ -181,7 +196,6 @@ static void __init at91sam926x_pit_common_init(struct pit_data *data)
  3635. {
  3636. unsigned long pit_rate;
  3637. unsigned bits;
  3638. - int ret;
  3639. /*
  3640. * Use our actual MCK to figure out how many MCK/16 ticks per
  3641. @@ -206,13 +220,6 @@ static void __init at91sam926x_pit_common_init(struct pit_data *data)
  3642. data->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
  3643. clocksource_register_hz(&data->clksrc, pit_rate);
  3644. - /* Set up irq handler */
  3645. - ret = request_irq(data->irq, at91sam926x_pit_interrupt,
  3646. - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3647. - "at91_tick", data);
  3648. - if (ret)
  3649. - panic(pr_fmt("Unable to setup IRQ\n"));
  3650. -
  3651. /* Set up and register clockevents */
  3652. data->clkevt.name = "pit";
  3653. data->clkevt.features = CLOCK_EVT_FEAT_PERIODIC;
  3654. diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c
  3655. index 29d21d68df5a..103d0fd70cc4 100644
  3656. --- a/drivers/clocksource/timer-atmel-st.c
  3657. +++ b/drivers/clocksource/timer-atmel-st.c
  3658. @@ -115,18 +115,29 @@ static void clkdev32k_disable_and_flush_irq(void)
  3659. last_crtr = read_CRTR();
  3660. }
  3661. +static int atmel_st_irq;
  3662. +
  3663. static int clkevt32k_shutdown(struct clock_event_device *evt)
  3664. {
  3665. clkdev32k_disable_and_flush_irq();
  3666. irqmask = 0;
  3667. regmap_write(regmap_st, AT91_ST_IER, irqmask);
  3668. + free_irq(atmel_st_irq, regmap_st);
  3669. return 0;
  3670. }
  3671. static int clkevt32k_set_oneshot(struct clock_event_device *dev)
  3672. {
  3673. + int ret;
  3674. +
  3675. clkdev32k_disable_and_flush_irq();
  3676. + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt,
  3677. + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3678. + "at91_tick", regmap_st);
  3679. + if (ret)
  3680. + panic(pr_fmt("Unable to setup IRQ\n"));
  3681. +
  3682. /*
  3683. * ALM for oneshot irqs, set by next_event()
  3684. * before 32 seconds have passed.
  3685. @@ -139,8 +150,16 @@ static int clkevt32k_set_oneshot(struct clock_event_device *dev)
  3686. static int clkevt32k_set_periodic(struct clock_event_device *dev)
  3687. {
  3688. + int ret;
  3689. +
  3690. clkdev32k_disable_and_flush_irq();
  3691. + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt,
  3692. + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3693. + "at91_tick", regmap_st);
  3694. + if (ret)
  3695. + panic(pr_fmt("Unable to setup IRQ\n"));
  3696. +
  3697. /* PIT for periodic irqs; fixed rate of 1/HZ */
  3698. irqmask = AT91_ST_PITS;
  3699. regmap_write(regmap_st, AT91_ST_PIMR, timer_latch);
  3700. @@ -198,7 +217,7 @@ static void __init atmel_st_timer_init(struct device_node *node)
  3701. {
  3702. struct clk *sclk;
  3703. unsigned int sclk_rate, val;
  3704. - int irq, ret;
  3705. + int ret;
  3706. regmap_st = syscon_node_to_regmap(node);
  3707. if (IS_ERR(regmap_st))
  3708. @@ -210,17 +229,10 @@ static void __init atmel_st_timer_init(struct device_node *node)
  3709. regmap_read(regmap_st, AT91_ST_SR, &val);
  3710. /* Get the interrupts property */
  3711. - irq = irq_of_parse_and_map(node, 0);
  3712. - if (!irq)
  3713. + atmel_st_irq = irq_of_parse_and_map(node, 0);
  3714. + if (!atmel_st_irq)
  3715. panic(pr_fmt("Unable to get IRQ from DT\n"));
  3716. - /* Make IRQs happen for the system timer */
  3717. - ret = request_irq(irq, at91rm9200_timer_interrupt,
  3718. - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3719. - "at91_tick", regmap_st);
  3720. - if (ret)
  3721. - panic(pr_fmt("Unable to setup IRQ\n"));
  3722. -
  3723. sclk = of_clk_get(node, 0);
  3724. if (IS_ERR(sclk))
  3725. panic(pr_fmt("Unable to get slow clock\n"));
  3726. diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
  3727. index c59bdcb83217..8f23161d80be 100644
  3728. --- a/drivers/cpufreq/Kconfig.x86
  3729. +++ b/drivers/cpufreq/Kconfig.x86
  3730. @@ -123,7 +123,7 @@ config X86_POWERNOW_K7_ACPI
  3731. config X86_POWERNOW_K8
  3732. tristate "AMD Opteron/Athlon64 PowerNow!"
  3733. - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ
  3734. + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE
  3735. help
  3736. This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors.
  3737. Support for K10 and newer processors is now in acpi-cpufreq.
  3738. diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
  3739. index 4dbc18727235..2a8ad712a5f2 100644
  3740. --- a/drivers/crypto/ccp/ccp-dev.c
  3741. +++ b/drivers/crypto/ccp/ccp-dev.c
  3742. @@ -16,7 +16,6 @@
  3743. #include <linux/sched.h>
  3744. #include <linux/interrupt.h>
  3745. #include <linux/spinlock.h>
  3746. -#include <linux/rwlock_types.h>
  3747. #include <linux/types.h>
  3748. #include <linux/mutex.h>
  3749. #include <linux/delay.h>
  3750. diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
  3751. index daba7ebb9699..18d58f136628 100644
  3752. --- a/drivers/gpu/drm/i915/i915_drv.h
  3753. +++ b/drivers/gpu/drm/i915/i915_drv.h
  3754. @@ -714,7 +714,7 @@ struct intel_uncore {
  3755. struct drm_i915_private *i915;
  3756. enum forcewake_domain_id id;
  3757. unsigned wake_count;
  3758. - struct timer_list timer;
  3759. + struct hrtimer timer;
  3760. i915_reg_t reg_set;
  3761. u32 val_set;
  3762. u32 val_clear;
  3763. diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  3764. index 1328bc5021b4..62103660b4c7 100644
  3765. --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  3766. +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  3767. @@ -1314,7 +1314,9 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
  3768. if (ret)
  3769. return ret;
  3770. +#ifndef CONFIG_PREEMPT_RT_BASE
  3771. trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
  3772. +#endif
  3773. i915_gem_execbuffer_move_to_active(vmas, params->request);
  3774. i915_gem_execbuffer_retire_commands(params);
  3775. diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
  3776. index 3af40616bf8b..46882680468e 100644
  3777. --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
  3778. +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
  3779. @@ -39,7 +39,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
  3780. if (!mutex_is_locked(mutex))
  3781. return false;
  3782. -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
  3783. +#if (defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)) && !defined(CONFIG_PREEMPT_RT_BASE)
  3784. return mutex->owner == task;
  3785. #else
  3786. /* Since UP may be pre-empted, we cannot assume that we own the lock */
  3787. diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
  3788. index d1a46ef5ab3f..7d43f913de24 100644
  3789. --- a/drivers/gpu/drm/i915/i915_irq.c
  3790. +++ b/drivers/gpu/drm/i915/i915_irq.c
  3791. @@ -830,6 +830,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
  3792. spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  3793. /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
  3794. + preempt_disable_rt();
  3795. /* Get optional system timestamp before query. */
  3796. if (stime)
  3797. @@ -881,6 +882,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
  3798. *etime = ktime_get();
  3799. /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
  3800. + preempt_enable_rt();
  3801. spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
  3802. diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
  3803. index e5db9e1f623f..5ca085d99181 100644
  3804. --- a/drivers/gpu/drm/i915/intel_display.c
  3805. +++ b/drivers/gpu/drm/i915/intel_display.c
  3806. @@ -11496,7 +11496,7 @@ void intel_check_page_flip(struct drm_device *dev, int pipe)
  3807. struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
  3808. struct intel_unpin_work *work;
  3809. - WARN_ON(!in_interrupt());
  3810. + WARN_ON_NONRT(!in_interrupt());
  3811. if (crtc == NULL)
  3812. return;
  3813. diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
  3814. index a2582c455b36..c459b91586b0 100644
  3815. --- a/drivers/gpu/drm/i915/intel_sprite.c
  3816. +++ b/drivers/gpu/drm/i915/intel_sprite.c
  3817. @@ -38,6 +38,7 @@
  3818. #include "intel_drv.h"
  3819. #include <drm/i915_drm.h>
  3820. #include "i915_drv.h"
  3821. +#include <linux/locallock.h>
  3822. static bool
  3823. format_is_yuv(uint32_t format)
  3824. @@ -64,6 +65,8 @@ static int usecs_to_scanlines(const struct drm_display_mode *adjusted_mode,
  3825. 1000 * adjusted_mode->crtc_htotal);
  3826. }
  3827. +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock);
  3828. +
  3829. /**
  3830. * intel_pipe_update_start() - start update of a set of display registers
  3831. * @crtc: the crtc of which the registers are going to be updated
  3832. @@ -96,7 +99,7 @@ void intel_pipe_update_start(struct intel_crtc *crtc)
  3833. min = vblank_start - usecs_to_scanlines(adjusted_mode, 100);
  3834. max = vblank_start - 1;
  3835. - local_irq_disable();
  3836. + local_lock_irq(pipe_update_lock);
  3837. if (min <= 0 || max <= 0)
  3838. return;
  3839. @@ -126,11 +129,11 @@ void intel_pipe_update_start(struct intel_crtc *crtc)
  3840. break;
  3841. }
  3842. - local_irq_enable();
  3843. + local_unlock_irq(pipe_update_lock);
  3844. timeout = schedule_timeout(timeout);
  3845. - local_irq_disable();
  3846. + local_lock_irq(pipe_update_lock);
  3847. }
  3848. finish_wait(wq, &wait);
  3849. @@ -164,7 +167,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc)
  3850. trace_i915_pipe_update_end(crtc, end_vbl_count, scanline_end);
  3851. - local_irq_enable();
  3852. + local_unlock_irq(pipe_update_lock);
  3853. if (crtc->debug.start_vbl_count &&
  3854. crtc->debug.start_vbl_count != end_vbl_count) {
  3855. diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
  3856. index 68b6f69aa682..49aefb097c15 100644
  3857. --- a/drivers/gpu/drm/i915/intel_uncore.c
  3858. +++ b/drivers/gpu/drm/i915/intel_uncore.c
  3859. @@ -60,7 +60,11 @@ fw_domain_reset(const struct intel_uncore_forcewake_domain *d)
  3860. static inline void
  3861. fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
  3862. {
  3863. - mod_timer_pinned(&d->timer, jiffies + 1);
  3864. + d->wake_count++;
  3865. + hrtimer_start_range_ns(&d->timer,
  3866. + ktime_set(0, NSEC_PER_MSEC),
  3867. + NSEC_PER_MSEC,
  3868. + HRTIMER_MODE_REL);
  3869. }
  3870. static inline void
  3871. @@ -224,9 +228,11 @@ static int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
  3872. return ret;
  3873. }
  3874. -static void intel_uncore_fw_release_timer(unsigned long arg)
  3875. +static enum hrtimer_restart
  3876. +intel_uncore_fw_release_timer(struct hrtimer *timer)
  3877. {
  3878. - struct intel_uncore_forcewake_domain *domain = (void *)arg;
  3879. + struct intel_uncore_forcewake_domain *domain =
  3880. + container_of(timer, struct intel_uncore_forcewake_domain, timer);
  3881. unsigned long irqflags;
  3882. assert_rpm_device_not_suspended(domain->i915);
  3883. @@ -240,6 +246,8 @@ static void intel_uncore_fw_release_timer(unsigned long arg)
  3884. 1 << domain->id);
  3885. spin_unlock_irqrestore(&domain->i915->uncore.lock, irqflags);
  3886. +
  3887. + return HRTIMER_NORESTART;
  3888. }
  3889. void intel_uncore_forcewake_reset(struct drm_device *dev, bool restore)
  3890. @@ -259,16 +267,16 @@ void intel_uncore_forcewake_reset(struct drm_device *dev, bool restore)
  3891. active_domains = 0;
  3892. for_each_fw_domain(domain, dev_priv, id) {
  3893. - if (del_timer_sync(&domain->timer) == 0)
  3894. + if (hrtimer_cancel(&domain->timer) == 0)
  3895. continue;
  3896. - intel_uncore_fw_release_timer((unsigned long)domain);
  3897. + intel_uncore_fw_release_timer(&domain->timer);
  3898. }
  3899. spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  3900. for_each_fw_domain(domain, dev_priv, id) {
  3901. - if (timer_pending(&domain->timer))
  3902. + if (hrtimer_active(&domain->timer))
  3903. active_domains |= (1 << id);
  3904. }
  3905. @@ -491,7 +499,6 @@ static void __intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
  3906. if (--domain->wake_count)
  3907. continue;
  3908. - domain->wake_count++;
  3909. fw_domain_arm_timer(domain);
  3910. }
  3911. }
  3912. @@ -732,7 +739,6 @@ static inline void __force_wake_get(struct drm_i915_private *dev_priv,
  3913. continue;
  3914. }
  3915. - domain->wake_count++;
  3916. fw_domain_arm_timer(domain);
  3917. }
  3918. @@ -1150,7 +1156,8 @@ static void fw_domain_init(struct drm_i915_private *dev_priv,
  3919. d->i915 = dev_priv;
  3920. d->id = domain_id;
  3921. - setup_timer(&d->timer, intel_uncore_fw_release_timer, (unsigned long)d);
  3922. + hrtimer_init(&d->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  3923. + d->timer.function = intel_uncore_fw_release_timer;
  3924. dev_priv->uncore.fw_domains |= (1 << domain_id);
  3925. diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
  3926. index fcc7483d3f7b..dc44af5675d9 100644
  3927. --- a/drivers/gpu/drm/radeon/radeon_display.c
  3928. +++ b/drivers/gpu/drm/radeon/radeon_display.c
  3929. @@ -1863,6 +1863,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
  3930. struct radeon_device *rdev = dev->dev_private;
  3931. /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
  3932. + preempt_disable_rt();
  3933. /* Get optional system timestamp before query. */
  3934. if (stime)
  3935. @@ -1955,6 +1956,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
  3936. *etime = ktime_get();
  3937. /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
  3938. + preempt_enable_rt();
  3939. /* Decode into vertical and horizontal scanout position. */
  3940. *vpos = position & 0x1fff;
  3941. diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
  3942. index 13c45296ce5b..50c035c73357 100644
  3943. --- a/drivers/i2c/busses/i2c-omap.c
  3944. +++ b/drivers/i2c/busses/i2c-omap.c
  3945. @@ -995,15 +995,12 @@ omap_i2c_isr(int irq, void *dev_id)
  3946. u16 mask;
  3947. u16 stat;
  3948. - spin_lock(&omap->lock);
  3949. - mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
  3950. stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG);
  3951. + mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
  3952. if (stat & mask)
  3953. ret = IRQ_WAKE_THREAD;
  3954. - spin_unlock(&omap->lock);
  3955. -
  3956. return ret;
  3957. }
  3958. diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
  3959. index 36f76e28a0bf..394f142f90c7 100644
  3960. --- a/drivers/ide/alim15x3.c
  3961. +++ b/drivers/ide/alim15x3.c
  3962. @@ -234,7 +234,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev)
  3963. isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
  3964. - local_irq_save(flags);
  3965. + local_irq_save_nort(flags);
  3966. if (m5229_revision < 0xC2) {
  3967. /*
  3968. @@ -325,7 +325,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev)
  3969. }
  3970. pci_dev_put(north);
  3971. pci_dev_put(isa_dev);
  3972. - local_irq_restore(flags);
  3973. + local_irq_restore_nort(flags);
  3974. return 0;
  3975. }
  3976. diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
  3977. index f94baadbf424..71c41cfe85e9 100644
  3978. --- a/drivers/ide/hpt366.c
  3979. +++ b/drivers/ide/hpt366.c
  3980. @@ -1236,7 +1236,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif,
  3981. dma_old = inb(base + 2);
  3982. - local_irq_save(flags);
  3983. + local_irq_save_nort(flags);
  3984. dma_new = dma_old;
  3985. pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
  3986. @@ -1247,7 +1247,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif,
  3987. if (dma_new != dma_old)
  3988. outb(dma_new, base + 2);
  3989. - local_irq_restore(flags);
  3990. + local_irq_restore_nort(flags);
  3991. printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n",
  3992. hwif->name, base, base + 7);
  3993. diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c
  3994. index 19763977568c..4169433faab5 100644
  3995. --- a/drivers/ide/ide-io-std.c
  3996. +++ b/drivers/ide/ide-io-std.c
  3997. @@ -175,7 +175,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
  3998. unsigned long uninitialized_var(flags);
  3999. if ((io_32bit & 2) && !mmio) {
  4000. - local_irq_save(flags);
  4001. + local_irq_save_nort(flags);
  4002. ata_vlb_sync(io_ports->nsect_addr);
  4003. }
  4004. @@ -186,7 +186,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
  4005. insl(data_addr, buf, words);
  4006. if ((io_32bit & 2) && !mmio)
  4007. - local_irq_restore(flags);
  4008. + local_irq_restore_nort(flags);
  4009. if (((len + 1) & 3) < 2)
  4010. return;
  4011. @@ -219,7 +219,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
  4012. unsigned long uninitialized_var(flags);
  4013. if ((io_32bit & 2) && !mmio) {
  4014. - local_irq_save(flags);
  4015. + local_irq_save_nort(flags);
  4016. ata_vlb_sync(io_ports->nsect_addr);
  4017. }
  4018. @@ -230,7 +230,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
  4019. outsl(data_addr, buf, words);
  4020. if ((io_32bit & 2) && !mmio)
  4021. - local_irq_restore(flags);
  4022. + local_irq_restore_nort(flags);
  4023. if (((len + 1) & 3) < 2)
  4024. return;
  4025. diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
  4026. index 669ea1e45795..e12e43e62245 100644
  4027. --- a/drivers/ide/ide-io.c
  4028. +++ b/drivers/ide/ide-io.c
  4029. @@ -659,7 +659,7 @@ void ide_timer_expiry (unsigned long data)
  4030. /* disable_irq_nosync ?? */
  4031. disable_irq(hwif->irq);
  4032. /* local CPU only, as if we were handling an interrupt */
  4033. - local_irq_disable();
  4034. + local_irq_disable_nort();
  4035. if (hwif->polling) {
  4036. startstop = handler(drive);
  4037. } else if (drive_is_ready(drive)) {
  4038. diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
  4039. index 376f2dc410c5..f014dd1b73dc 100644
  4040. --- a/drivers/ide/ide-iops.c
  4041. +++ b/drivers/ide/ide-iops.c
  4042. @@ -129,12 +129,12 @@ int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad,
  4043. if ((stat & ATA_BUSY) == 0)
  4044. break;
  4045. - local_irq_restore(flags);
  4046. + local_irq_restore_nort(flags);
  4047. *rstat = stat;
  4048. return -EBUSY;
  4049. }
  4050. }
  4051. - local_irq_restore(flags);
  4052. + local_irq_restore_nort(flags);
  4053. }
  4054. /*
  4055. * Allow status to settle, then read it again.
  4056. diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
  4057. index 0b63facd1d87..4ceba37afc0c 100644
  4058. --- a/drivers/ide/ide-probe.c
  4059. +++ b/drivers/ide/ide-probe.c
  4060. @@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id)
  4061. int bswap = 1;
  4062. /* local CPU only; some systems need this */
  4063. - local_irq_save(flags);
  4064. + local_irq_save_nort(flags);
  4065. /* read 512 bytes of id info */
  4066. hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
  4067. - local_irq_restore(flags);
  4068. + local_irq_restore_nort(flags);
  4069. drive->dev_flags |= IDE_DFLAG_ID_READ;
  4070. #ifdef DEBUG
  4071. diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
  4072. index a716693417a3..be0568c722d6 100644
  4073. --- a/drivers/ide/ide-taskfile.c
  4074. +++ b/drivers/ide/ide-taskfile.c
  4075. @@ -250,7 +250,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd,
  4076. page_is_high = PageHighMem(page);
  4077. if (page_is_high)
  4078. - local_irq_save(flags);
  4079. + local_irq_save_nort(flags);
  4080. buf = kmap_atomic(page) + offset;
  4081. @@ -271,7 +271,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd,
  4082. kunmap_atomic(buf);
  4083. if (page_is_high)
  4084. - local_irq_restore(flags);
  4085. + local_irq_restore_nort(flags);
  4086. len -= nr_bytes;
  4087. }
  4088. @@ -414,7 +414,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive,
  4089. }
  4090. if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
  4091. - local_irq_disable();
  4092. + local_irq_disable_nort();
  4093. ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
  4094. diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
  4095. index f0e55e47eb54..da5f28c892ca 100644
  4096. --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
  4097. +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
  4098. @@ -51,8 +51,6 @@ MODULE_PARM_DESC(data_debug_level,
  4099. "Enable data path debug tracing if > 0");
  4100. #endif
  4101. -static DEFINE_MUTEX(pkey_mutex);
  4102. -
  4103. struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
  4104. struct ib_pd *pd, struct ib_ah_attr *attr)
  4105. {
  4106. diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  4107. index 25889311b1e9..7fe9dd26a4da 100644
  4108. --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  4109. +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  4110. @@ -883,7 +883,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
  4111. ipoib_dbg_mcast(priv, "restarting multicast task\n");
  4112. - local_irq_save(flags);
  4113. + local_irq_save_nort(flags);
  4114. netif_addr_lock(dev);
  4115. spin_lock(&priv->lock);
  4116. @@ -965,7 +965,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
  4117. spin_unlock(&priv->lock);
  4118. netif_addr_unlock(dev);
  4119. - local_irq_restore(flags);
  4120. + local_irq_restore_nort(flags);
  4121. /*
  4122. * make sure the in-flight joins have finished before we attempt
  4123. diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
  4124. index 4a2a9e370be7..e970d9afd179 100644
  4125. --- a/drivers/input/gameport/gameport.c
  4126. +++ b/drivers/input/gameport/gameport.c
  4127. @@ -91,13 +91,13 @@ static int gameport_measure_speed(struct gameport *gameport)
  4128. tx = ~0;
  4129. for (i = 0; i < 50; i++) {
  4130. - local_irq_save(flags);
  4131. + local_irq_save_nort(flags);
  4132. t1 = ktime_get_ns();
  4133. for (t = 0; t < 50; t++)
  4134. gameport_read(gameport);
  4135. t2 = ktime_get_ns();
  4136. t3 = ktime_get_ns();
  4137. - local_irq_restore(flags);
  4138. + local_irq_restore_nort(flags);
  4139. udelay(i * 10);
  4140. t = (t2 - t1) - (t3 - t2);
  4141. if (t < tx)
  4142. @@ -124,12 +124,12 @@ static int old_gameport_measure_speed(struct gameport *gameport)
  4143. tx = 1 << 30;
  4144. for(i = 0; i < 50; i++) {
  4145. - local_irq_save(flags);
  4146. + local_irq_save_nort(flags);
  4147. GET_TIME(t1);
  4148. for (t = 0; t < 50; t++) gameport_read(gameport);
  4149. GET_TIME(t2);
  4150. GET_TIME(t3);
  4151. - local_irq_restore(flags);
  4152. + local_irq_restore_nort(flags);
  4153. udelay(i * 10);
  4154. if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t;
  4155. }
  4156. @@ -148,11 +148,11 @@ static int old_gameport_measure_speed(struct gameport *gameport)
  4157. tx = 1 << 30;
  4158. for(i = 0; i < 50; i++) {
  4159. - local_irq_save(flags);
  4160. + local_irq_save_nort(flags);
  4161. t1 = rdtsc();
  4162. for (t = 0; t < 50; t++) gameport_read(gameport);
  4163. t2 = rdtsc();
  4164. - local_irq_restore(flags);
  4165. + local_irq_restore_nort(flags);
  4166. udelay(i * 10);
  4167. if (t2 - t1 < tx) tx = t2 - t1;
  4168. }
  4169. diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
  4170. index 5efadad4615b..42fc6280729b 100644
  4171. --- a/drivers/iommu/amd_iommu.c
  4172. +++ b/drivers/iommu/amd_iommu.c
  4173. @@ -2165,10 +2165,10 @@ static int __attach_device(struct iommu_dev_data *dev_data,
  4174. int ret;
  4175. /*
  4176. - * Must be called with IRQs disabled. Warn here to detect early
  4177. - * when its not.
  4178. + * Must be called with IRQs disabled on a non RT kernel. Warn here to
  4179. + * detect early when its not.
  4180. */
  4181. - WARN_ON(!irqs_disabled());
  4182. + WARN_ON_NONRT(!irqs_disabled());
  4183. /* lock domain */
  4184. spin_lock(&domain->lock);
  4185. @@ -2331,10 +2331,10 @@ static void __detach_device(struct iommu_dev_data *dev_data)
  4186. struct protection_domain *domain;
  4187. /*
  4188. - * Must be called with IRQs disabled. Warn here to detect early
  4189. - * when its not.
  4190. + * Must be called with IRQs disabled on a non RT kernel. Warn here to
  4191. + * detect early when its not.
  4192. */
  4193. - WARN_ON(!irqs_disabled());
  4194. + WARN_ON_NONRT(!irqs_disabled());
  4195. if (WARN_ON(!dev_data->domain))
  4196. return;
  4197. diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
  4198. index 5bda6a9b56bb..d6286584c807 100644
  4199. --- a/drivers/leds/trigger/Kconfig
  4200. +++ b/drivers/leds/trigger/Kconfig
  4201. @@ -61,7 +61,7 @@ config LEDS_TRIGGER_BACKLIGHT
  4202. config LEDS_TRIGGER_CPU
  4203. bool "LED CPU Trigger"
  4204. - depends on LEDS_TRIGGERS
  4205. + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE
  4206. help
  4207. This allows LEDs to be controlled by active CPUs. This shows
  4208. the active CPUs across an array of LEDs so you can see which
  4209. diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
  4210. index 4d200883c505..98b64ed5cb81 100644
  4211. --- a/drivers/md/bcache/Kconfig
  4212. +++ b/drivers/md/bcache/Kconfig
  4213. @@ -1,6 +1,7 @@
  4214. config BCACHE
  4215. tristate "Block device as cache"
  4216. + depends on !PREEMPT_RT_FULL
  4217. ---help---
  4218. Allows a block device to be used as cache for other devices; uses
  4219. a btree for indexing and the layout is optimized for SSDs.
  4220. diff --git a/drivers/md/dm.c b/drivers/md/dm.c
  4221. index 3d3ac13287a4..b931c7b85a0b 100644
  4222. --- a/drivers/md/dm.c
  4223. +++ b/drivers/md/dm.c
  4224. @@ -2187,7 +2187,7 @@ static void dm_request_fn(struct request_queue *q)
  4225. /* Establish tio->ti before queuing work (map_tio_request) */
  4226. tio->ti = ti;
  4227. queue_kthread_work(&md->kworker, &tio->work);
  4228. - BUG_ON(!irqs_disabled());
  4229. + BUG_ON_NONRT(!irqs_disabled());
  4230. }
  4231. }
  4232. diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
  4233. index e48c262ce032..25b82212e67f 100644
  4234. --- a/drivers/md/raid5.c
  4235. +++ b/drivers/md/raid5.c
  4236. @@ -1918,8 +1918,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
  4237. struct raid5_percpu *percpu;
  4238. unsigned long cpu;
  4239. - cpu = get_cpu();
  4240. + cpu = get_cpu_light();
  4241. percpu = per_cpu_ptr(conf->percpu, cpu);
  4242. + spin_lock(&percpu->lock);
  4243. if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
  4244. ops_run_biofill(sh);
  4245. overlap_clear++;
  4246. @@ -1975,7 +1976,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
  4247. if (test_and_clear_bit(R5_Overlap, &dev->flags))
  4248. wake_up(&sh->raid_conf->wait_for_overlap);
  4249. }
  4250. - put_cpu();
  4251. + spin_unlock(&percpu->lock);
  4252. + put_cpu_light();
  4253. }
  4254. static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
  4255. @@ -6415,6 +6417,7 @@ static int raid5_alloc_percpu(struct r5conf *conf)
  4256. __func__, cpu);
  4257. break;
  4258. }
  4259. + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
  4260. }
  4261. put_online_cpus();
  4262. diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
  4263. index 517d4b68a1be..efe91887ecd7 100644
  4264. --- a/drivers/md/raid5.h
  4265. +++ b/drivers/md/raid5.h
  4266. @@ -504,6 +504,7 @@ struct r5conf {
  4267. int recovery_disabled;
  4268. /* per cpu variables */
  4269. struct raid5_percpu {
  4270. + spinlock_t lock; /* Protection for -RT */
  4271. struct page *spare_page; /* Used when checking P/Q in raid6 */
  4272. struct flex_array *scribble; /* space for constructing buffer
  4273. * lists and performing address
  4274. diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
  4275. index a216b4667742..cea73201112e 100644
  4276. --- a/drivers/misc/Kconfig
  4277. +++ b/drivers/misc/Kconfig
  4278. @@ -54,6 +54,7 @@ config AD525X_DPOT_SPI
  4279. config ATMEL_TCLIB
  4280. bool "Atmel AT32/AT91 Timer/Counter Library"
  4281. depends on (AVR32 || ARCH_AT91)
  4282. + default y if PREEMPT_RT_FULL
  4283. help
  4284. Select this if you want a library to allocate the Timer/Counter
  4285. blocks found on many Atmel processors. This facilitates using
  4286. @@ -69,8 +70,7 @@ config ATMEL_TCB_CLKSRC
  4287. are combined to make a single 32-bit timer.
  4288. When GENERIC_CLOCKEVENTS is defined, the third timer channel
  4289. - may be used as a clock event device supporting oneshot mode
  4290. - (delays of up to two seconds) based on the 32 KiHz clock.
  4291. + may be used as a clock event device supporting oneshot mode.
  4292. config ATMEL_TCB_CLKSRC_BLOCK
  4293. int
  4294. @@ -84,6 +84,15 @@ config ATMEL_TCB_CLKSRC_BLOCK
  4295. TC can be used for other purposes, such as PWM generation and
  4296. interval timing.
  4297. +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  4298. + bool "TC Block use 32 KiHz clock"
  4299. + depends on ATMEL_TCB_CLKSRC
  4300. + default y if !PREEMPT_RT_FULL
  4301. + help
  4302. + Select this to use 32 KiHz base clock rate as TC block clock
  4303. + source for clock events.
  4304. +
  4305. +
  4306. config DUMMY_IRQ
  4307. tristate "Dummy IRQ handler"
  4308. default n
  4309. @@ -114,6 +123,35 @@ config IBM_ASM
  4310. for information on the specific driver level and support statement
  4311. for your IBM server.
  4312. +config HWLAT_DETECTOR
  4313. + tristate "Testing module to detect hardware-induced latencies"
  4314. + depends on DEBUG_FS
  4315. + depends on RING_BUFFER
  4316. + default m
  4317. + ---help---
  4318. + A simple hardware latency detector. Use this module to detect
  4319. + large latencies introduced by the behavior of the underlying
  4320. + system firmware external to Linux. We do this using periodic
  4321. + use of stop_machine to grab all available CPUs and measure
  4322. + for unexplainable gaps in the CPU timestamp counter(s). By
  4323. + default, the module is not enabled until the "enable" file
  4324. + within the "hwlat_detector" debugfs directory is toggled.
  4325. +
  4326. + This module is often used to detect SMI (System Management
  4327. + Interrupts) on x86 systems, though is not x86 specific. To
  4328. + this end, we default to using a sample window of 1 second,
  4329. + during which we will sample for 0.5 seconds. If an SMI or
  4330. + similar event occurs during that time, it is recorded
  4331. + into an 8K samples global ring buffer until retreived.
  4332. +
  4333. + WARNING: This software should never be enabled (it can be built
  4334. + but should not be turned on after it is loaded) in a production
  4335. + environment where high latencies are a concern since the
  4336. + sampling mechanism actually introduces latencies for
  4337. + regular tasks while the CPU(s) are being held.
  4338. +
  4339. + If unsure, say N
  4340. +
  4341. config PHANTOM
  4342. tristate "Sensable PHANToM (PCI)"
  4343. depends on PCI
  4344. diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
  4345. index b2fb6dbffcef..22827d5a3ba6 100644
  4346. --- a/drivers/misc/Makefile
  4347. +++ b/drivers/misc/Makefile
  4348. @@ -39,6 +39,7 @@ obj-$(CONFIG_C2PORT) += c2port/
  4349. obj-$(CONFIG_HMC6352) += hmc6352.o
  4350. obj-y += eeprom/
  4351. obj-y += cb710/
  4352. +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o
  4353. obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o
  4354. obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o
  4355. obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o
  4356. diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c
  4357. new file mode 100644
  4358. index 000000000000..52f5ad5fd9c0
  4359. --- /dev/null
  4360. +++ b/drivers/misc/hwlat_detector.c
  4361. @@ -0,0 +1,1240 @@
  4362. +/*
  4363. + * hwlat_detector.c - A simple Hardware Latency detector.
  4364. + *
  4365. + * Use this module to detect large system latencies induced by the behavior of
  4366. + * certain underlying system hardware or firmware, independent of Linux itself.
  4367. + * The code was developed originally to detect the presence of SMIs on Intel
  4368. + * and AMD systems, although there is no dependency upon x86 herein.
  4369. + *
  4370. + * The classical example usage of this module is in detecting the presence of
  4371. + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
  4372. + * somewhat special form of hardware interrupt spawned from earlier CPU debug
  4373. + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
  4374. + * LPC (or other device) to generate a special interrupt under certain
  4375. + * circumstances, for example, upon expiration of a special SMI timer device,
  4376. + * due to certain external thermal readings, on certain I/O address accesses,
  4377. + * and other situations. An SMI hits a special CPU pin, triggers a special
  4378. + * SMI mode (complete with special memory map), and the OS is unaware.
  4379. + *
  4380. + * Although certain hardware-inducing latencies are necessary (for example,
  4381. + * a modern system often requires an SMI handler for correct thermal control
  4382. + * and remote management) they can wreak havoc upon any OS-level performance
  4383. + * guarantees toward low-latency, especially when the OS is not even made
  4384. + * aware of the presence of these interrupts. For this reason, we need a
  4385. + * somewhat brute force mechanism to detect these interrupts. In this case,
  4386. + * we do it by hogging all of the CPU(s) for configurable timer intervals,
  4387. + * sampling the built-in CPU timer, looking for discontiguous readings.
  4388. + *
  4389. + * WARNING: This implementation necessarily introduces latencies. Therefore,
  4390. + * you should NEVER use this module in a production environment
  4391. + * requiring any kind of low-latency performance guarantee(s).
  4392. + *
  4393. + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
  4394. + *
  4395. + * Includes useful feedback from Clark Williams <clark@redhat.com>
  4396. + *
  4397. + * This file is licensed under the terms of the GNU General Public
  4398. + * License version 2. This program is licensed "as is" without any
  4399. + * warranty of any kind, whether express or implied.
  4400. + */
  4401. +
  4402. +#include <linux/module.h>
  4403. +#include <linux/init.h>
  4404. +#include <linux/ring_buffer.h>
  4405. +#include <linux/time.h>
  4406. +#include <linux/hrtimer.h>
  4407. +#include <linux/kthread.h>
  4408. +#include <linux/debugfs.h>
  4409. +#include <linux/seq_file.h>
  4410. +#include <linux/uaccess.h>
  4411. +#include <linux/version.h>
  4412. +#include <linux/delay.h>
  4413. +#include <linux/slab.h>
  4414. +#include <linux/trace_clock.h>
  4415. +
  4416. +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */
  4417. +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */
  4418. +#define U64STR_SIZE 22 /* 20 digits max */
  4419. +
  4420. +#define VERSION "1.0.0"
  4421. +#define BANNER "hwlat_detector: "
  4422. +#define DRVNAME "hwlat_detector"
  4423. +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */
  4424. +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
  4425. +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */
  4426. +
  4427. +/* Module metadata */
  4428. +
  4429. +MODULE_LICENSE("GPL");
  4430. +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
  4431. +MODULE_DESCRIPTION("A simple hardware latency detector");
  4432. +MODULE_VERSION(VERSION);
  4433. +
  4434. +/* Module parameters */
  4435. +
  4436. +static int debug;
  4437. +static int enabled;
  4438. +static int threshold;
  4439. +
  4440. +module_param(debug, int, 0); /* enable debug */
  4441. +module_param(enabled, int, 0); /* enable detector */
  4442. +module_param(threshold, int, 0); /* latency threshold */
  4443. +
  4444. +/* Buffering and sampling */
  4445. +
  4446. +static struct ring_buffer *ring_buffer; /* sample buffer */
  4447. +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */
  4448. +static unsigned long buf_size = BUF_SIZE_DEFAULT;
  4449. +static struct task_struct *kthread; /* sampling thread */
  4450. +
  4451. +/* DebugFS filesystem entries */
  4452. +
  4453. +static struct dentry *debug_dir; /* debugfs directory */
  4454. +static struct dentry *debug_max; /* maximum TSC delta */
  4455. +static struct dentry *debug_count; /* total detect count */
  4456. +static struct dentry *debug_sample_width; /* sample width us */
  4457. +static struct dentry *debug_sample_window; /* sample window us */
  4458. +static struct dentry *debug_sample; /* raw samples us */
  4459. +static struct dentry *debug_threshold; /* threshold us */
  4460. +static struct dentry *debug_enable; /* enable/disable */
  4461. +
  4462. +/* Individual samples and global state */
  4463. +
  4464. +struct sample; /* latency sample */
  4465. +struct data; /* Global state */
  4466. +
  4467. +/* Sampling functions */
  4468. +static int __buffer_add_sample(struct sample *sample);
  4469. +static struct sample *buffer_get_sample(struct sample *sample);
  4470. +
  4471. +/* Threading and state */
  4472. +static int kthread_fn(void *unused);
  4473. +static int start_kthread(void);
  4474. +static int stop_kthread(void);
  4475. +static void __reset_stats(void);
  4476. +static int init_stats(void);
  4477. +
  4478. +/* Debugfs interface */
  4479. +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
  4480. + size_t cnt, loff_t *ppos, const u64 *entry);
  4481. +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
  4482. + size_t cnt, loff_t *ppos, u64 *entry);
  4483. +static int debug_sample_fopen(struct inode *inode, struct file *filp);
  4484. +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
  4485. + size_t cnt, loff_t *ppos);
  4486. +static int debug_sample_release(struct inode *inode, struct file *filp);
  4487. +static int debug_enable_fopen(struct inode *inode, struct file *filp);
  4488. +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
  4489. + size_t cnt, loff_t *ppos);
  4490. +static ssize_t debug_enable_fwrite(struct file *file,
  4491. + const char __user *user_buffer,
  4492. + size_t user_size, loff_t *offset);
  4493. +
  4494. +/* Initialization functions */
  4495. +static int init_debugfs(void);
  4496. +static void free_debugfs(void);
  4497. +static int detector_init(void);
  4498. +static void detector_exit(void);
  4499. +
  4500. +/* Individual latency samples are stored here when detected and packed into
  4501. + * the ring_buffer circular buffer, where they are overwritten when
  4502. + * more than buf_size/sizeof(sample) samples are received. */
  4503. +struct sample {
  4504. + u64 seqnum; /* unique sequence */
  4505. + u64 duration; /* ktime delta */
  4506. + u64 outer_duration; /* ktime delta (outer loop) */
  4507. + struct timespec timestamp; /* wall time */
  4508. + unsigned long lost;
  4509. +};
  4510. +
  4511. +/* keep the global state somewhere. */
  4512. +static struct data {
  4513. +
  4514. + struct mutex lock; /* protect changes */
  4515. +
  4516. + u64 count; /* total since reset */
  4517. + u64 max_sample; /* max hardware latency */
  4518. + u64 threshold; /* sample threshold level */
  4519. +
  4520. + u64 sample_window; /* total sampling window (on+off) */
  4521. + u64 sample_width; /* active sampling portion of window */
  4522. +
  4523. + atomic_t sample_open; /* whether the sample file is open */
  4524. +
  4525. + wait_queue_head_t wq; /* waitqeue for new sample values */
  4526. +
  4527. +} data;
  4528. +
  4529. +/**
  4530. + * __buffer_add_sample - add a new latency sample recording to the ring buffer
  4531. + * @sample: The new latency sample value
  4532. + *
  4533. + * This receives a new latency sample and records it in a global ring buffer.
  4534. + * No additional locking is used in this case.
  4535. + */
  4536. +static int __buffer_add_sample(struct sample *sample)
  4537. +{
  4538. + return ring_buffer_write(ring_buffer,
  4539. + sizeof(struct sample), sample);
  4540. +}
  4541. +
  4542. +/**
  4543. + * buffer_get_sample - remove a hardware latency sample from the ring buffer
  4544. + * @sample: Pre-allocated storage for the sample
  4545. + *
  4546. + * This retrieves a hardware latency sample from the global circular buffer
  4547. + */
  4548. +static struct sample *buffer_get_sample(struct sample *sample)
  4549. +{
  4550. + struct ring_buffer_event *e = NULL;
  4551. + struct sample *s = NULL;
  4552. + unsigned int cpu = 0;
  4553. +
  4554. + if (!sample)
  4555. + return NULL;
  4556. +
  4557. + mutex_lock(&ring_buffer_mutex);
  4558. + for_each_online_cpu(cpu) {
  4559. + e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost);
  4560. + if (e)
  4561. + break;
  4562. + }
  4563. +
  4564. + if (e) {
  4565. + s = ring_buffer_event_data(e);
  4566. + memcpy(sample, s, sizeof(struct sample));
  4567. + } else
  4568. + sample = NULL;
  4569. + mutex_unlock(&ring_buffer_mutex);
  4570. +
  4571. + return sample;
  4572. +}
  4573. +
  4574. +#ifndef CONFIG_TRACING
  4575. +#define time_type ktime_t
  4576. +#define time_get() ktime_get()
  4577. +#define time_to_us(x) ktime_to_us(x)
  4578. +#define time_sub(a, b) ktime_sub(a, b)
  4579. +#define init_time(a, b) (a).tv64 = b
  4580. +#define time_u64(a) ((a).tv64)
  4581. +#else
  4582. +#define time_type u64
  4583. +#define time_get() trace_clock_local()
  4584. +#define time_to_us(x) div_u64(x, 1000)
  4585. +#define time_sub(a, b) ((a) - (b))
  4586. +#define init_time(a, b) (a = b)
  4587. +#define time_u64(a) a
  4588. +#endif
  4589. +/**
  4590. + * get_sample - sample the CPU TSC and look for likely hardware latencies
  4591. + *
  4592. + * Used to repeatedly capture the CPU TSC (or similar), looking for potential
  4593. + * hardware-induced latency. Called with interrupts disabled and with
  4594. + * data.lock held.
  4595. + */
  4596. +static int get_sample(void)
  4597. +{
  4598. + time_type start, t1, t2, last_t2;
  4599. + s64 diff, total = 0;
  4600. + u64 sample = 0;
  4601. + u64 outer_sample = 0;
  4602. + int ret = -1;
  4603. +
  4604. + init_time(last_t2, 0);
  4605. + start = time_get(); /* start timestamp */
  4606. +
  4607. + do {
  4608. +
  4609. + t1 = time_get(); /* we'll look for a discontinuity */
  4610. + t2 = time_get();
  4611. +
  4612. + if (time_u64(last_t2)) {
  4613. + /* Check the delta from outer loop (t2 to next t1) */
  4614. + diff = time_to_us(time_sub(t1, last_t2));
  4615. + /* This shouldn't happen */
  4616. + if (diff < 0) {
  4617. + pr_err(BANNER "time running backwards\n");
  4618. + goto out;
  4619. + }
  4620. + if (diff > outer_sample)
  4621. + outer_sample = diff;
  4622. + }
  4623. + last_t2 = t2;
  4624. +
  4625. + total = time_to_us(time_sub(t2, start)); /* sample width */
  4626. +
  4627. + /* This checks the inner loop (t1 to t2) */
  4628. + diff = time_to_us(time_sub(t2, t1)); /* current diff */
  4629. +
  4630. + /* This shouldn't happen */
  4631. + if (diff < 0) {
  4632. + pr_err(BANNER "time running backwards\n");
  4633. + goto out;
  4634. + }
  4635. +
  4636. + if (diff > sample)
  4637. + sample = diff; /* only want highest value */
  4638. +
  4639. + } while (total <= data.sample_width);
  4640. +
  4641. + ret = 0;
  4642. +
  4643. + /* If we exceed the threshold value, we have found a hardware latency */
  4644. + if (sample > data.threshold || outer_sample > data.threshold) {
  4645. + struct sample s;
  4646. +
  4647. + ret = 1;
  4648. +
  4649. + data.count++;
  4650. + s.seqnum = data.count;
  4651. + s.duration = sample;
  4652. + s.outer_duration = outer_sample;
  4653. + s.timestamp = CURRENT_TIME;
  4654. + __buffer_add_sample(&s);
  4655. +
  4656. + /* Keep a running maximum ever recorded hardware latency */
  4657. + if (sample > data.max_sample)
  4658. + data.max_sample = sample;
  4659. + }
  4660. +
  4661. +out:
  4662. + return ret;
  4663. +}
  4664. +
  4665. +/*
  4666. + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
  4667. + * @unused: A required part of the kthread API.
  4668. + *
  4669. + * Used to periodically sample the CPU TSC via a call to get_sample. We
  4670. + * disable interrupts, which does (intentionally) introduce latency since we
  4671. + * need to ensure nothing else might be running (and thus pre-empting).
  4672. + * Obviously this should never be used in production environments.
  4673. + *
  4674. + * Currently this runs on which ever CPU it was scheduled on, but most
  4675. + * real-worald hardware latency situations occur across several CPUs,
  4676. + * but we might later generalize this if we find there are any actualy
  4677. + * systems with alternate SMI delivery or other hardware latencies.
  4678. + */
  4679. +static int kthread_fn(void *unused)
  4680. +{
  4681. + int ret;
  4682. + u64 interval;
  4683. +
  4684. + while (!kthread_should_stop()) {
  4685. +
  4686. + mutex_lock(&data.lock);
  4687. +
  4688. + local_irq_disable();
  4689. + ret = get_sample();
  4690. + local_irq_enable();
  4691. +
  4692. + if (ret > 0)
  4693. + wake_up(&data.wq); /* wake up reader(s) */
  4694. +
  4695. + interval = data.sample_window - data.sample_width;
  4696. + do_div(interval, USEC_PER_MSEC); /* modifies interval value */
  4697. +
  4698. + mutex_unlock(&data.lock);
  4699. +
  4700. + if (msleep_interruptible(interval))
  4701. + break;
  4702. + }
  4703. +
  4704. + return 0;
  4705. +}
  4706. +
  4707. +/**
  4708. + * start_kthread - Kick off the hardware latency sampling/detector kthread
  4709. + *
  4710. + * This starts a kernel thread that will sit and sample the CPU timestamp
  4711. + * counter (TSC or similar) and look for potential hardware latencies.
  4712. + */
  4713. +static int start_kthread(void)
  4714. +{
  4715. + kthread = kthread_run(kthread_fn, NULL,
  4716. + DRVNAME);
  4717. + if (IS_ERR(kthread)) {
  4718. + pr_err(BANNER "could not start sampling thread\n");
  4719. + enabled = 0;
  4720. + return -ENOMEM;
  4721. + }
  4722. +
  4723. + return 0;
  4724. +}
  4725. +
  4726. +/**
  4727. + * stop_kthread - Inform the hardware latency samping/detector kthread to stop
  4728. + *
  4729. + * This kicks the running hardware latency sampling/detector kernel thread and
  4730. + * tells it to stop sampling now. Use this on unload and at system shutdown.
  4731. + */
  4732. +static int stop_kthread(void)
  4733. +{
  4734. + int ret;
  4735. +
  4736. + ret = kthread_stop(kthread);
  4737. +
  4738. + return ret;
  4739. +}
  4740. +
  4741. +/**
  4742. + * __reset_stats - Reset statistics for the hardware latency detector
  4743. + *
  4744. + * We use data to store various statistics and global state. We call this
  4745. + * function in order to reset those when "enable" is toggled on or off, and
  4746. + * also at initialization. Should be called with data.lock held.
  4747. + */
  4748. +static void __reset_stats(void)
  4749. +{
  4750. + data.count = 0;
  4751. + data.max_sample = 0;
  4752. + ring_buffer_reset(ring_buffer); /* flush out old sample entries */
  4753. +}
  4754. +
  4755. +/**
  4756. + * init_stats - Setup global state statistics for the hardware latency detector
  4757. + *
  4758. + * We use data to store various statistics and global state. We also use
  4759. + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
  4760. + * induced system latencies. This function initializes these structures and
  4761. + * allocates the global ring buffer also.
  4762. + */
  4763. +static int init_stats(void)
  4764. +{
  4765. + int ret = -ENOMEM;
  4766. +
  4767. + mutex_init(&data.lock);
  4768. + init_waitqueue_head(&data.wq);
  4769. + atomic_set(&data.sample_open, 0);
  4770. +
  4771. + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
  4772. +
  4773. + if (WARN(!ring_buffer, KERN_ERR BANNER
  4774. + "failed to allocate ring buffer!\n"))
  4775. + goto out;
  4776. +
  4777. + __reset_stats();
  4778. + data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */
  4779. + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
  4780. + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */
  4781. +
  4782. + ret = 0;
  4783. +
  4784. +out:
  4785. + return ret;
  4786. +
  4787. +}
  4788. +
  4789. +/*
  4790. + * simple_data_read - Wrapper read function for global state debugfs entries
  4791. + * @filp: The active open file structure for the debugfs "file"
  4792. + * @ubuf: The userspace provided buffer to read value into
  4793. + * @cnt: The maximum number of bytes to read
  4794. + * @ppos: The current "file" position
  4795. + * @entry: The entry to read from
  4796. + *
  4797. + * This function provides a generic read implementation for the global state
  4798. + * "data" structure debugfs filesystem entries. It would be nice to use
  4799. + * simple_attr_read directly, but we need to make sure that the data.lock
  4800. + * is held during the actual read.
  4801. + */
  4802. +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
  4803. + size_t cnt, loff_t *ppos, const u64 *entry)
  4804. +{
  4805. + char buf[U64STR_SIZE];
  4806. + u64 val = 0;
  4807. + int len = 0;
  4808. +
  4809. + memset(buf, 0, sizeof(buf));
  4810. +
  4811. + if (!entry)
  4812. + return -EFAULT;
  4813. +
  4814. + mutex_lock(&data.lock);
  4815. + val = *entry;
  4816. + mutex_unlock(&data.lock);
  4817. +
  4818. + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
  4819. +
  4820. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
  4821. +
  4822. +}
  4823. +
  4824. +/*
  4825. + * simple_data_write - Wrapper write function for global state debugfs entries
  4826. + * @filp: The active open file structure for the debugfs "file"
  4827. + * @ubuf: The userspace provided buffer to write value from
  4828. + * @cnt: The maximum number of bytes to write
  4829. + * @ppos: The current "file" position
  4830. + * @entry: The entry to write to
  4831. + *
  4832. + * This function provides a generic write implementation for the global state
  4833. + * "data" structure debugfs filesystem entries. It would be nice to use
  4834. + * simple_attr_write directly, but we need to make sure that the data.lock
  4835. + * is held during the actual write.
  4836. + */
  4837. +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
  4838. + size_t cnt, loff_t *ppos, u64 *entry)
  4839. +{
  4840. + char buf[U64STR_SIZE];
  4841. + int csize = min(cnt, sizeof(buf));
  4842. + u64 val = 0;
  4843. + int err = 0;
  4844. +
  4845. + memset(buf, '\0', sizeof(buf));
  4846. + if (copy_from_user(buf, ubuf, csize))
  4847. + return -EFAULT;
  4848. +
  4849. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  4850. + err = kstrtoull(buf, 10, &val);
  4851. + if (err)
  4852. + return -EINVAL;
  4853. +
  4854. + mutex_lock(&data.lock);
  4855. + *entry = val;
  4856. + mutex_unlock(&data.lock);
  4857. +
  4858. + return csize;
  4859. +}
  4860. +
  4861. +/**
  4862. + * debug_count_fopen - Open function for "count" debugfs entry
  4863. + * @inode: The in-kernel inode representation of the debugfs "file"
  4864. + * @filp: The active open file structure for the debugfs "file"
  4865. + *
  4866. + * This function provides an open implementation for the "count" debugfs
  4867. + * interface to the hardware latency detector.
  4868. + */
  4869. +static int debug_count_fopen(struct inode *inode, struct file *filp)
  4870. +{
  4871. + return 0;
  4872. +}
  4873. +
  4874. +/**
  4875. + * debug_count_fread - Read function for "count" debugfs entry
  4876. + * @filp: The active open file structure for the debugfs "file"
  4877. + * @ubuf: The userspace provided buffer to read value into
  4878. + * @cnt: The maximum number of bytes to read
  4879. + * @ppos: The current "file" position
  4880. + *
  4881. + * This function provides a read implementation for the "count" debugfs
  4882. + * interface to the hardware latency detector. Can be used to read the
  4883. + * number of latency readings exceeding the configured threshold since
  4884. + * the detector was last reset (e.g. by writing a zero into "count").
  4885. + */
  4886. +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
  4887. + size_t cnt, loff_t *ppos)
  4888. +{
  4889. + return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
  4890. +}
  4891. +
  4892. +/**
  4893. + * debug_count_fwrite - Write function for "count" debugfs entry
  4894. + * @filp: The active open file structure for the debugfs "file"
  4895. + * @ubuf: The user buffer that contains the value to write
  4896. + * @cnt: The maximum number of bytes to write to "file"
  4897. + * @ppos: The current position in the debugfs "file"
  4898. + *
  4899. + * This function provides a write implementation for the "count" debugfs
  4900. + * interface to the hardware latency detector. Can be used to write a
  4901. + * desired value, especially to zero the total count.
  4902. + */
  4903. +static ssize_t debug_count_fwrite(struct file *filp,
  4904. + const char __user *ubuf,
  4905. + size_t cnt,
  4906. + loff_t *ppos)
  4907. +{
  4908. + return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
  4909. +}
  4910. +
  4911. +/**
  4912. + * debug_enable_fopen - Dummy open function for "enable" debugfs interface
  4913. + * @inode: The in-kernel inode representation of the debugfs "file"
  4914. + * @filp: The active open file structure for the debugfs "file"
  4915. + *
  4916. + * This function provides an open implementation for the "enable" debugfs
  4917. + * interface to the hardware latency detector.
  4918. + */
  4919. +static int debug_enable_fopen(struct inode *inode, struct file *filp)
  4920. +{
  4921. + return 0;
  4922. +}
  4923. +
  4924. +/**
  4925. + * debug_enable_fread - Read function for "enable" debugfs interface
  4926. + * @filp: The active open file structure for the debugfs "file"
  4927. + * @ubuf: The userspace provided buffer to read value into
  4928. + * @cnt: The maximum number of bytes to read
  4929. + * @ppos: The current "file" position
  4930. + *
  4931. + * This function provides a read implementation for the "enable" debugfs
  4932. + * interface to the hardware latency detector. Can be used to determine
  4933. + * whether the detector is currently enabled ("0\n" or "1\n" returned).
  4934. + */
  4935. +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
  4936. + size_t cnt, loff_t *ppos)
  4937. +{
  4938. + char buf[4];
  4939. +
  4940. + if ((cnt < sizeof(buf)) || (*ppos))
  4941. + return 0;
  4942. +
  4943. + buf[0] = enabled ? '1' : '0';
  4944. + buf[1] = '\n';
  4945. + buf[2] = '\0';
  4946. + if (copy_to_user(ubuf, buf, strlen(buf)))
  4947. + return -EFAULT;
  4948. + return *ppos = strlen(buf);
  4949. +}
  4950. +
  4951. +/**
  4952. + * debug_enable_fwrite - Write function for "enable" debugfs interface
  4953. + * @filp: The active open file structure for the debugfs "file"
  4954. + * @ubuf: The user buffer that contains the value to write
  4955. + * @cnt: The maximum number of bytes to write to "file"
  4956. + * @ppos: The current position in the debugfs "file"
  4957. + *
  4958. + * This function provides a write implementation for the "enable" debugfs
  4959. + * interface to the hardware latency detector. Can be used to enable or
  4960. + * disable the detector, which will have the side-effect of possibly
  4961. + * also resetting the global stats and kicking off the measuring
  4962. + * kthread (on an enable) or the converse (upon a disable).
  4963. + */
  4964. +static ssize_t debug_enable_fwrite(struct file *filp,
  4965. + const char __user *ubuf,
  4966. + size_t cnt,
  4967. + loff_t *ppos)
  4968. +{
  4969. + char buf[4];
  4970. + int csize = min(cnt, sizeof(buf));
  4971. + long val = 0;
  4972. + int err = 0;
  4973. +
  4974. + memset(buf, '\0', sizeof(buf));
  4975. + if (copy_from_user(buf, ubuf, csize))
  4976. + return -EFAULT;
  4977. +
  4978. + buf[sizeof(buf)-1] = '\0'; /* just in case */
  4979. + err = kstrtoul(buf, 10, &val);
  4980. + if (err)
  4981. + return -EINVAL;
  4982. +
  4983. + if (val) {
  4984. + if (enabled)
  4985. + goto unlock;
  4986. + enabled = 1;
  4987. + __reset_stats();
  4988. + if (start_kthread())
  4989. + return -EFAULT;
  4990. + } else {
  4991. + if (!enabled)
  4992. + goto unlock;
  4993. + enabled = 0;
  4994. + err = stop_kthread();
  4995. + if (err) {
  4996. + pr_err(BANNER "cannot stop kthread\n");
  4997. + return -EFAULT;
  4998. + }
  4999. + wake_up(&data.wq); /* reader(s) should return */
  5000. + }
  5001. +unlock:
  5002. + return csize;
  5003. +}
  5004. +
  5005. +/**
  5006. + * debug_max_fopen - Open function for "max" debugfs entry
  5007. + * @inode: The in-kernel inode representation of the debugfs "file"
  5008. + * @filp: The active open file structure for the debugfs "file"
  5009. + *
  5010. + * This function provides an open implementation for the "max" debugfs
  5011. + * interface to the hardware latency detector.
  5012. + */
  5013. +static int debug_max_fopen(struct inode *inode, struct file *filp)
  5014. +{
  5015. + return 0;
  5016. +}
  5017. +
  5018. +/**
  5019. + * debug_max_fread - Read function for "max" debugfs entry
  5020. + * @filp: The active open file structure for the debugfs "file"
  5021. + * @ubuf: The userspace provided buffer to read value into
  5022. + * @cnt: The maximum number of bytes to read
  5023. + * @ppos: The current "file" position
  5024. + *
  5025. + * This function provides a read implementation for the "max" debugfs
  5026. + * interface to the hardware latency detector. Can be used to determine
  5027. + * the maximum latency value observed since it was last reset.
  5028. + */
  5029. +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
  5030. + size_t cnt, loff_t *ppos)
  5031. +{
  5032. + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
  5033. +}
  5034. +
  5035. +/**
  5036. + * debug_max_fwrite - Write function for "max" debugfs entry
  5037. + * @filp: The active open file structure for the debugfs "file"
  5038. + * @ubuf: The user buffer that contains the value to write
  5039. + * @cnt: The maximum number of bytes to write to "file"
  5040. + * @ppos: The current position in the debugfs "file"
  5041. + *
  5042. + * This function provides a write implementation for the "max" debugfs
  5043. + * interface to the hardware latency detector. Can be used to reset the
  5044. + * maximum or set it to some other desired value - if, then, subsequent
  5045. + * measurements exceed this value, the maximum will be updated.
  5046. + */
  5047. +static ssize_t debug_max_fwrite(struct file *filp,
  5048. + const char __user *ubuf,
  5049. + size_t cnt,
  5050. + loff_t *ppos)
  5051. +{
  5052. + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
  5053. +}
  5054. +
  5055. +
  5056. +/**
  5057. + * debug_sample_fopen - An open function for "sample" debugfs interface
  5058. + * @inode: The in-kernel inode representation of this debugfs "file"
  5059. + * @filp: The active open file structure for the debugfs "file"
  5060. + *
  5061. + * This function handles opening the "sample" file within the hardware
  5062. + * latency detector debugfs directory interface. This file is used to read
  5063. + * raw samples from the global ring_buffer and allows the user to see a
  5064. + * running latency history. Can be opened blocking or non-blocking,
  5065. + * affecting whether it behaves as a buffer read pipe, or does not.
  5066. + * Implements simple locking to prevent multiple simultaneous use.
  5067. + */
  5068. +static int debug_sample_fopen(struct inode *inode, struct file *filp)
  5069. +{
  5070. + if (!atomic_add_unless(&data.sample_open, 1, 1))
  5071. + return -EBUSY;
  5072. + else
  5073. + return 0;
  5074. +}
  5075. +
  5076. +/**
  5077. + * debug_sample_fread - A read function for "sample" debugfs interface
  5078. + * @filp: The active open file structure for the debugfs "file"
  5079. + * @ubuf: The user buffer that will contain the samples read
  5080. + * @cnt: The maximum bytes to read from the debugfs "file"
  5081. + * @ppos: The current position in the debugfs "file"
  5082. + *
  5083. + * This function handles reading from the "sample" file within the hardware
  5084. + * latency detector debugfs directory interface. This file is used to read
  5085. + * raw samples from the global ring_buffer and allows the user to see a
  5086. + * running latency history. By default this will block pending a new
  5087. + * value written into the sample buffer, unless there are already a
  5088. + * number of value(s) waiting in the buffer, or the sample file was
  5089. + * previously opened in a non-blocking mode of operation.
  5090. + */
  5091. +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
  5092. + size_t cnt, loff_t *ppos)
  5093. +{
  5094. + int len = 0;
  5095. + char buf[64];
  5096. + struct sample *sample = NULL;
  5097. +
  5098. + if (!enabled)
  5099. + return 0;
  5100. +
  5101. + sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
  5102. + if (!sample)
  5103. + return -ENOMEM;
  5104. +
  5105. + while (!buffer_get_sample(sample)) {
  5106. +
  5107. + DEFINE_WAIT(wait);
  5108. +
  5109. + if (filp->f_flags & O_NONBLOCK) {
  5110. + len = -EAGAIN;
  5111. + goto out;
  5112. + }
  5113. +
  5114. + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
  5115. + schedule();
  5116. + finish_wait(&data.wq, &wait);
  5117. +
  5118. + if (signal_pending(current)) {
  5119. + len = -EINTR;
  5120. + goto out;
  5121. + }
  5122. +
  5123. + if (!enabled) { /* enable was toggled */
  5124. + len = 0;
  5125. + goto out;
  5126. + }
  5127. + }
  5128. +
  5129. + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n",
  5130. + sample->timestamp.tv_sec,
  5131. + sample->timestamp.tv_nsec,
  5132. + sample->duration,
  5133. + sample->outer_duration);
  5134. +
  5135. +
  5136. + /* handling partial reads is more trouble than it's worth */
  5137. + if (len > cnt)
  5138. + goto out;
  5139. +
  5140. + if (copy_to_user(ubuf, buf, len))
  5141. + len = -EFAULT;
  5142. +
  5143. +out:
  5144. + kfree(sample);
  5145. + return len;
  5146. +}
  5147. +
  5148. +/**
  5149. + * debug_sample_release - Release function for "sample" debugfs interface
  5150. + * @inode: The in-kernel inode represenation of the debugfs "file"
  5151. + * @filp: The active open file structure for the debugfs "file"
  5152. + *
  5153. + * This function completes the close of the debugfs interface "sample" file.
  5154. + * Frees the sample_open "lock" so that other users may open the interface.
  5155. + */
  5156. +static int debug_sample_release(struct inode *inode, struct file *filp)
  5157. +{
  5158. + atomic_dec(&data.sample_open);
  5159. +
  5160. + return 0;
  5161. +}
  5162. +
  5163. +/**
  5164. + * debug_threshold_fopen - Open function for "threshold" debugfs entry
  5165. + * @inode: The in-kernel inode representation of the debugfs "file"
  5166. + * @filp: The active open file structure for the debugfs "file"
  5167. + *
  5168. + * This function provides an open implementation for the "threshold" debugfs
  5169. + * interface to the hardware latency detector.
  5170. + */
  5171. +static int debug_threshold_fopen(struct inode *inode, struct file *filp)
  5172. +{
  5173. + return 0;
  5174. +}
  5175. +
  5176. +/**
  5177. + * debug_threshold_fread - Read function for "threshold" debugfs entry
  5178. + * @filp: The active open file structure for the debugfs "file"
  5179. + * @ubuf: The userspace provided buffer to read value into
  5180. + * @cnt: The maximum number of bytes to read
  5181. + * @ppos: The current "file" position
  5182. + *
  5183. + * This function provides a read implementation for the "threshold" debugfs
  5184. + * interface to the hardware latency detector. It can be used to determine
  5185. + * the current threshold level at which a latency will be recorded in the
  5186. + * global ring buffer, typically on the order of 10us.
  5187. + */
  5188. +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
  5189. + size_t cnt, loff_t *ppos)
  5190. +{
  5191. + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
  5192. +}
  5193. +
  5194. +/**
  5195. + * debug_threshold_fwrite - Write function for "threshold" debugfs entry
  5196. + * @filp: The active open file structure for the debugfs "file"
  5197. + * @ubuf: The user buffer that contains the value to write
  5198. + * @cnt: The maximum number of bytes to write to "file"
  5199. + * @ppos: The current position in the debugfs "file"
  5200. + *
  5201. + * This function provides a write implementation for the "threshold" debugfs
  5202. + * interface to the hardware latency detector. It can be used to configure
  5203. + * the threshold level at which any subsequently detected latencies will
  5204. + * be recorded into the global ring buffer.
  5205. + */
  5206. +static ssize_t debug_threshold_fwrite(struct file *filp,
  5207. + const char __user *ubuf,
  5208. + size_t cnt,
  5209. + loff_t *ppos)
  5210. +{
  5211. + int ret;
  5212. +
  5213. + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
  5214. +
  5215. + if (enabled)
  5216. + wake_up_process(kthread);
  5217. +
  5218. + return ret;
  5219. +}
  5220. +
  5221. +/**
  5222. + * debug_width_fopen - Open function for "width" debugfs entry
  5223. + * @inode: The in-kernel inode representation of the debugfs "file"
  5224. + * @filp: The active open file structure for the debugfs "file"
  5225. + *
  5226. + * This function provides an open implementation for the "width" debugfs
  5227. + * interface to the hardware latency detector.
  5228. + */
  5229. +static int debug_width_fopen(struct inode *inode, struct file *filp)
  5230. +{
  5231. + return 0;
  5232. +}
  5233. +
  5234. +/**
  5235. + * debug_width_fread - Read function for "width" debugfs entry
  5236. + * @filp: The active open file structure for the debugfs "file"
  5237. + * @ubuf: The userspace provided buffer to read value into
  5238. + * @cnt: The maximum number of bytes to read
  5239. + * @ppos: The current "file" position
  5240. + *
  5241. + * This function provides a read implementation for the "width" debugfs
  5242. + * interface to the hardware latency detector. It can be used to determine
  5243. + * for how many us of the total window us we will actively sample for any
  5244. + * hardware-induced latecy periods. Obviously, it is not possible to
  5245. + * sample constantly and have the system respond to a sample reader, or,
  5246. + * worse, without having the system appear to have gone out to lunch.
  5247. + */
  5248. +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
  5249. + size_t cnt, loff_t *ppos)
  5250. +{
  5251. + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
  5252. +}
  5253. +
  5254. +/**
  5255. + * debug_width_fwrite - Write function for "width" debugfs entry
  5256. + * @filp: The active open file structure for the debugfs "file"
  5257. + * @ubuf: The user buffer that contains the value to write
  5258. + * @cnt: The maximum number of bytes to write to "file"
  5259. + * @ppos: The current position in the debugfs "file"
  5260. + *
  5261. + * This function provides a write implementation for the "width" debugfs
  5262. + * interface to the hardware latency detector. It can be used to configure
  5263. + * for how many us of the total window us we will actively sample for any
  5264. + * hardware-induced latency periods. Obviously, it is not possible to
  5265. + * sample constantly and have the system respond to a sample reader, or,
  5266. + * worse, without having the system appear to have gone out to lunch. It
  5267. + * is enforced that width is less that the total window size.
  5268. + */
  5269. +static ssize_t debug_width_fwrite(struct file *filp,
  5270. + const char __user *ubuf,
  5271. + size_t cnt,
  5272. + loff_t *ppos)
  5273. +{
  5274. + char buf[U64STR_SIZE];
  5275. + int csize = min(cnt, sizeof(buf));
  5276. + u64 val = 0;
  5277. + int err = 0;
  5278. +
  5279. + memset(buf, '\0', sizeof(buf));
  5280. + if (copy_from_user(buf, ubuf, csize))
  5281. + return -EFAULT;
  5282. +
  5283. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  5284. + err = kstrtoull(buf, 10, &val);
  5285. + if (err)
  5286. + return -EINVAL;
  5287. +
  5288. + mutex_lock(&data.lock);
  5289. + if (val < data.sample_window)
  5290. + data.sample_width = val;
  5291. + else {
  5292. + mutex_unlock(&data.lock);
  5293. + return -EINVAL;
  5294. + }
  5295. + mutex_unlock(&data.lock);
  5296. +
  5297. + if (enabled)
  5298. + wake_up_process(kthread);
  5299. +
  5300. + return csize;
  5301. +}
  5302. +
  5303. +/**
  5304. + * debug_window_fopen - Open function for "window" debugfs entry
  5305. + * @inode: The in-kernel inode representation of the debugfs "file"
  5306. + * @filp: The active open file structure for the debugfs "file"
  5307. + *
  5308. + * This function provides an open implementation for the "window" debugfs
  5309. + * interface to the hardware latency detector. The window is the total time
  5310. + * in us that will be considered one sample period. Conceptually, windows
  5311. + * occur back-to-back and contain a sample width period during which
  5312. + * actual sampling occurs.
  5313. + */
  5314. +static int debug_window_fopen(struct inode *inode, struct file *filp)
  5315. +{
  5316. + return 0;
  5317. +}
  5318. +
  5319. +/**
  5320. + * debug_window_fread - Read function for "window" debugfs entry
  5321. + * @filp: The active open file structure for the debugfs "file"
  5322. + * @ubuf: The userspace provided buffer to read value into
  5323. + * @cnt: The maximum number of bytes to read
  5324. + * @ppos: The current "file" position
  5325. + *
  5326. + * This function provides a read implementation for the "window" debugfs
  5327. + * interface to the hardware latency detector. The window is the total time
  5328. + * in us that will be considered one sample period. Conceptually, windows
  5329. + * occur back-to-back and contain a sample width period during which
  5330. + * actual sampling occurs. Can be used to read the total window size.
  5331. + */
  5332. +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
  5333. + size_t cnt, loff_t *ppos)
  5334. +{
  5335. + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
  5336. +}
  5337. +
  5338. +/**
  5339. + * debug_window_fwrite - Write function for "window" debugfs entry
  5340. + * @filp: The active open file structure for the debugfs "file"
  5341. + * @ubuf: The user buffer that contains the value to write
  5342. + * @cnt: The maximum number of bytes to write to "file"
  5343. + * @ppos: The current position in the debugfs "file"
  5344. + *
  5345. + * This function provides a write implementation for the "window" debufds
  5346. + * interface to the hardware latency detetector. The window is the total time
  5347. + * in us that will be considered one sample period. Conceptually, windows
  5348. + * occur back-to-back and contain a sample width period during which
  5349. + * actual sampling occurs. Can be used to write a new total window size. It
  5350. + * is enfoced that any value written must be greater than the sample width
  5351. + * size, or an error results.
  5352. + */
  5353. +static ssize_t debug_window_fwrite(struct file *filp,
  5354. + const char __user *ubuf,
  5355. + size_t cnt,
  5356. + loff_t *ppos)
  5357. +{
  5358. + char buf[U64STR_SIZE];
  5359. + int csize = min(cnt, sizeof(buf));
  5360. + u64 val = 0;
  5361. + int err = 0;
  5362. +
  5363. + memset(buf, '\0', sizeof(buf));
  5364. + if (copy_from_user(buf, ubuf, csize))
  5365. + return -EFAULT;
  5366. +
  5367. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  5368. + err = kstrtoull(buf, 10, &val);
  5369. + if (err)
  5370. + return -EINVAL;
  5371. +
  5372. + mutex_lock(&data.lock);
  5373. + if (data.sample_width < val)
  5374. + data.sample_window = val;
  5375. + else {
  5376. + mutex_unlock(&data.lock);
  5377. + return -EINVAL;
  5378. + }
  5379. + mutex_unlock(&data.lock);
  5380. +
  5381. + return csize;
  5382. +}
  5383. +
  5384. +/*
  5385. + * Function pointers for the "count" debugfs file operations
  5386. + */
  5387. +static const struct file_operations count_fops = {
  5388. + .open = debug_count_fopen,
  5389. + .read = debug_count_fread,
  5390. + .write = debug_count_fwrite,
  5391. + .owner = THIS_MODULE,
  5392. +};
  5393. +
  5394. +/*
  5395. + * Function pointers for the "enable" debugfs file operations
  5396. + */
  5397. +static const struct file_operations enable_fops = {
  5398. + .open = debug_enable_fopen,
  5399. + .read = debug_enable_fread,
  5400. + .write = debug_enable_fwrite,
  5401. + .owner = THIS_MODULE,
  5402. +};
  5403. +
  5404. +/*
  5405. + * Function pointers for the "max" debugfs file operations
  5406. + */
  5407. +static const struct file_operations max_fops = {
  5408. + .open = debug_max_fopen,
  5409. + .read = debug_max_fread,
  5410. + .write = debug_max_fwrite,
  5411. + .owner = THIS_MODULE,
  5412. +};
  5413. +
  5414. +/*
  5415. + * Function pointers for the "sample" debugfs file operations
  5416. + */
  5417. +static const struct file_operations sample_fops = {
  5418. + .open = debug_sample_fopen,
  5419. + .read = debug_sample_fread,
  5420. + .release = debug_sample_release,
  5421. + .owner = THIS_MODULE,
  5422. +};
  5423. +
  5424. +/*
  5425. + * Function pointers for the "threshold" debugfs file operations
  5426. + */
  5427. +static const struct file_operations threshold_fops = {
  5428. + .open = debug_threshold_fopen,
  5429. + .read = debug_threshold_fread,
  5430. + .write = debug_threshold_fwrite,
  5431. + .owner = THIS_MODULE,
  5432. +};
  5433. +
  5434. +/*
  5435. + * Function pointers for the "width" debugfs file operations
  5436. + */
  5437. +static const struct file_operations width_fops = {
  5438. + .open = debug_width_fopen,
  5439. + .read = debug_width_fread,
  5440. + .write = debug_width_fwrite,
  5441. + .owner = THIS_MODULE,
  5442. +};
  5443. +
  5444. +/*
  5445. + * Function pointers for the "window" debugfs file operations
  5446. + */
  5447. +static const struct file_operations window_fops = {
  5448. + .open = debug_window_fopen,
  5449. + .read = debug_window_fread,
  5450. + .write = debug_window_fwrite,
  5451. + .owner = THIS_MODULE,
  5452. +};
  5453. +
  5454. +/**
  5455. + * init_debugfs - A function to initialize the debugfs interface files
  5456. + *
  5457. + * This function creates entries in debugfs for "hwlat_detector", including
  5458. + * files to read values from the detector, current samples, and the
  5459. + * maximum sample that has been captured since the hardware latency
  5460. + * dectector was started.
  5461. + */
  5462. +static int init_debugfs(void)
  5463. +{
  5464. + int ret = -ENOMEM;
  5465. +
  5466. + debug_dir = debugfs_create_dir(DRVNAME, NULL);
  5467. + if (!debug_dir)
  5468. + goto err_debug_dir;
  5469. +
  5470. + debug_sample = debugfs_create_file("sample", 0444,
  5471. + debug_dir, NULL,
  5472. + &sample_fops);
  5473. + if (!debug_sample)
  5474. + goto err_sample;
  5475. +
  5476. + debug_count = debugfs_create_file("count", 0444,
  5477. + debug_dir, NULL,
  5478. + &count_fops);
  5479. + if (!debug_count)
  5480. + goto err_count;
  5481. +
  5482. + debug_max = debugfs_create_file("max", 0444,
  5483. + debug_dir, NULL,
  5484. + &max_fops);
  5485. + if (!debug_max)
  5486. + goto err_max;
  5487. +
  5488. + debug_sample_window = debugfs_create_file("window", 0644,
  5489. + debug_dir, NULL,
  5490. + &window_fops);
  5491. + if (!debug_sample_window)
  5492. + goto err_window;
  5493. +
  5494. + debug_sample_width = debugfs_create_file("width", 0644,
  5495. + debug_dir, NULL,
  5496. + &width_fops);
  5497. + if (!debug_sample_width)
  5498. + goto err_width;
  5499. +
  5500. + debug_threshold = debugfs_create_file("threshold", 0644,
  5501. + debug_dir, NULL,
  5502. + &threshold_fops);
  5503. + if (!debug_threshold)
  5504. + goto err_threshold;
  5505. +
  5506. + debug_enable = debugfs_create_file("enable", 0644,
  5507. + debug_dir, &enabled,
  5508. + &enable_fops);
  5509. + if (!debug_enable)
  5510. + goto err_enable;
  5511. +
  5512. + else {
  5513. + ret = 0;
  5514. + goto out;
  5515. + }
  5516. +
  5517. +err_enable:
  5518. + debugfs_remove(debug_threshold);
  5519. +err_threshold:
  5520. + debugfs_remove(debug_sample_width);
  5521. +err_width:
  5522. + debugfs_remove(debug_sample_window);
  5523. +err_window:
  5524. + debugfs_remove(debug_max);
  5525. +err_max:
  5526. + debugfs_remove(debug_count);
  5527. +err_count:
  5528. + debugfs_remove(debug_sample);
  5529. +err_sample:
  5530. + debugfs_remove(debug_dir);
  5531. +err_debug_dir:
  5532. +out:
  5533. + return ret;
  5534. +}
  5535. +
  5536. +/**
  5537. + * free_debugfs - A function to cleanup the debugfs file interface
  5538. + */
  5539. +static void free_debugfs(void)
  5540. +{
  5541. + /* could also use a debugfs_remove_recursive */
  5542. + debugfs_remove(debug_enable);
  5543. + debugfs_remove(debug_threshold);
  5544. + debugfs_remove(debug_sample_width);
  5545. + debugfs_remove(debug_sample_window);
  5546. + debugfs_remove(debug_max);
  5547. + debugfs_remove(debug_count);
  5548. + debugfs_remove(debug_sample);
  5549. + debugfs_remove(debug_dir);
  5550. +}
  5551. +
  5552. +/**
  5553. + * detector_init - Standard module initialization code
  5554. + */
  5555. +static int detector_init(void)
  5556. +{
  5557. + int ret = -ENOMEM;
  5558. +
  5559. + pr_info(BANNER "version %s\n", VERSION);
  5560. +
  5561. + ret = init_stats();
  5562. + if (ret)
  5563. + goto out;
  5564. +
  5565. + ret = init_debugfs();
  5566. + if (ret)
  5567. + goto err_stats;
  5568. +
  5569. + if (enabled)
  5570. + ret = start_kthread();
  5571. +
  5572. + goto out;
  5573. +
  5574. +err_stats:
  5575. + ring_buffer_free(ring_buffer);
  5576. +out:
  5577. + return ret;
  5578. +
  5579. +}
  5580. +
  5581. +/**
  5582. + * detector_exit - Standard module cleanup code
  5583. + */
  5584. +static void detector_exit(void)
  5585. +{
  5586. + int err;
  5587. +
  5588. + if (enabled) {
  5589. + enabled = 0;
  5590. + err = stop_kthread();
  5591. + if (err)
  5592. + pr_err(BANNER "cannot stop kthread\n");
  5593. + }
  5594. +
  5595. + free_debugfs();
  5596. + ring_buffer_free(ring_buffer); /* free up the ring buffer */
  5597. +
  5598. +}
  5599. +
  5600. +module_init(detector_init);
  5601. +module_exit(detector_exit);
  5602. diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
  5603. index 03ddf0ecf402..684087db170b 100644
  5604. --- a/drivers/mmc/host/jz4740_mmc.c
  5605. +++ b/drivers/mmc/host/jz4740_mmc.c
  5606. @@ -1068,8 +1068,6 @@ static int jz4740_mmc_probe(struct platform_device* pdev)
  5607. jz4740_mmc_clock_disable(host);
  5608. setup_timer(&host->timeout_timer, jz4740_mmc_timeout,
  5609. (unsigned long)host);
  5610. - /* It is not important when it times out, it just needs to timeout. */
  5611. - set_timer_slack(&host->timeout_timer, HZ);
  5612. host->use_dma = true;
  5613. if (host->use_dma && jz4740_mmc_acquire_dma_channels(host) != 0)
  5614. diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
  5615. index 2e6c96845c9a..578ddbeab8ed 100644
  5616. --- a/drivers/mmc/host/mmci.c
  5617. +++ b/drivers/mmc/host/mmci.c
  5618. @@ -1155,15 +1155,12 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id)
  5619. struct sg_mapping_iter *sg_miter = &host->sg_miter;
  5620. struct variant_data *variant = host->variant;
  5621. void __iomem *base = host->base;
  5622. - unsigned long flags;
  5623. u32 status;
  5624. status = readl(base + MMCISTATUS);
  5625. dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
  5626. - local_irq_save(flags);
  5627. -
  5628. do {
  5629. unsigned int remain, len;
  5630. char *buffer;
  5631. @@ -1203,8 +1200,6 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id)
  5632. sg_miter_stop(sg_miter);
  5633. - local_irq_restore(flags);
  5634. -
  5635. /*
  5636. * If we have less than the fifo 'half-full' threshold to transfer,
  5637. * trigger a PIO interrupt as soon as any data is available.
  5638. diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
  5639. index d81fceddbe0e..dbcecb8845c6 100644
  5640. --- a/drivers/net/ethernet/3com/3c59x.c
  5641. +++ b/drivers/net/ethernet/3com/3c59x.c
  5642. @@ -842,9 +842,9 @@ static void poll_vortex(struct net_device *dev)
  5643. {
  5644. struct vortex_private *vp = netdev_priv(dev);
  5645. unsigned long flags;
  5646. - local_irq_save(flags);
  5647. + local_irq_save_nort(flags);
  5648. (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
  5649. - local_irq_restore(flags);
  5650. + local_irq_restore_nort(flags);
  5651. }
  5652. #endif
  5653. @@ -1910,12 +1910,12 @@ static void vortex_tx_timeout(struct net_device *dev)
  5654. * Block interrupts because vortex_interrupt does a bare spin_lock()
  5655. */
  5656. unsigned long flags;
  5657. - local_irq_save(flags);
  5658. + local_irq_save_nort(flags);
  5659. if (vp->full_bus_master_tx)
  5660. boomerang_interrupt(dev->irq, dev);
  5661. else
  5662. vortex_interrupt(dev->irq, dev);
  5663. - local_irq_restore(flags);
  5664. + local_irq_restore_nort(flags);
  5665. }
  5666. }
  5667. diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
  5668. index d0084d4d1a9b..38370772f5dc 100644
  5669. --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
  5670. +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
  5671. @@ -2217,11 +2217,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
  5672. }
  5673. tpd_req = atl1c_cal_tpd_req(skb);
  5674. - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
  5675. - if (netif_msg_pktdata(adapter))
  5676. - dev_info(&adapter->pdev->dev, "tx locked\n");
  5677. - return NETDEV_TX_LOCKED;
  5678. - }
  5679. + spin_lock_irqsave(&adapter->tx_lock, flags);
  5680. if (atl1c_tpd_avail(adapter, type) < tpd_req) {
  5681. /* no enough descriptor, just stop queue */
  5682. diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
  5683. index 59a03a193e83..734f7a7ad2c3 100644
  5684. --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
  5685. +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
  5686. @@ -1880,8 +1880,7 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb,
  5687. return NETDEV_TX_OK;
  5688. }
  5689. tpd_req = atl1e_cal_tdp_req(skb);
  5690. - if (!spin_trylock_irqsave(&adapter->tx_lock, flags))
  5691. - return NETDEV_TX_LOCKED;
  5692. + spin_lock_irqsave(&adapter->tx_lock, flags);
  5693. if (atl1e_tpd_avail(adapter) < tpd_req) {
  5694. /* no enough descriptor, just stop queue */
  5695. diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
  5696. index 526ea74e82d9..86f467a2c485 100644
  5697. --- a/drivers/net/ethernet/chelsio/cxgb/sge.c
  5698. +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
  5699. @@ -1664,8 +1664,7 @@ static int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter,
  5700. struct cmdQ *q = &sge->cmdQ[qid];
  5701. unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
  5702. - if (!spin_trylock(&q->lock))
  5703. - return NETDEV_TX_LOCKED;
  5704. + spin_lock(&q->lock);
  5705. reclaim_completed_tx(sge, q);
  5706. diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
  5707. index 9ba975853ec6..813cfa698160 100644
  5708. --- a/drivers/net/ethernet/neterion/s2io.c
  5709. +++ b/drivers/net/ethernet/neterion/s2io.c
  5710. @@ -4084,12 +4084,7 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
  5711. [skb->priority & (MAX_TX_FIFOS - 1)];
  5712. fifo = &mac_control->fifos[queue];
  5713. - if (do_spin_lock)
  5714. - spin_lock_irqsave(&fifo->tx_lock, flags);
  5715. - else {
  5716. - if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
  5717. - return NETDEV_TX_LOCKED;
  5718. - }
  5719. + spin_lock_irqsave(&fifo->tx_lock, flags);
  5720. if (sp->config.multiq) {
  5721. if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
  5722. diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
  5723. index 3b98b263bad0..ca4add749410 100644
  5724. --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
  5725. +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
  5726. @@ -2137,10 +2137,8 @@ static int pch_gbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
  5727. struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
  5728. unsigned long flags;
  5729. - if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) {
  5730. - /* Collision - tell upper layer to requeue */
  5731. - return NETDEV_TX_LOCKED;
  5732. - }
  5733. + spin_lock_irqsave(&tx_ring->tx_lock, flags);
  5734. +
  5735. if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) {
  5736. netif_stop_queue(netdev);
  5737. spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
  5738. diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
  5739. index ef668d300800..d987d571fdd6 100644
  5740. --- a/drivers/net/ethernet/realtek/8139too.c
  5741. +++ b/drivers/net/ethernet/realtek/8139too.c
  5742. @@ -2229,7 +2229,7 @@ static void rtl8139_poll_controller(struct net_device *dev)
  5743. struct rtl8139_private *tp = netdev_priv(dev);
  5744. const int irq = tp->pci_dev->irq;
  5745. - disable_irq(irq);
  5746. + disable_irq_nosync(irq);
  5747. rtl8139_interrupt(irq, dev);
  5748. enable_irq(irq);
  5749. }
  5750. diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
  5751. index 14c9d1baa85c..e1a5305418a8 100644
  5752. --- a/drivers/net/ethernet/tehuti/tehuti.c
  5753. +++ b/drivers/net/ethernet/tehuti/tehuti.c
  5754. @@ -1629,13 +1629,8 @@ static netdev_tx_t bdx_tx_transmit(struct sk_buff *skb,
  5755. unsigned long flags;
  5756. ENTER;
  5757. - local_irq_save(flags);
  5758. - if (!spin_trylock(&priv->tx_lock)) {
  5759. - local_irq_restore(flags);
  5760. - DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n",
  5761. - BDX_DRV_NAME, ndev->name);
  5762. - return NETDEV_TX_LOCKED;
  5763. - }
  5764. +
  5765. + spin_lock_irqsave(&priv->tx_lock, flags);
  5766. /* build tx descriptor */
  5767. BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */
  5768. diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c
  5769. index 298e059d0498..ea5d774fc89b 100644
  5770. --- a/drivers/net/ethernet/tile/tilepro.c
  5771. +++ b/drivers/net/ethernet/tile/tilepro.c
  5772. @@ -588,7 +588,7 @@ static bool tile_net_lepp_free_comps(struct net_device *dev, bool all)
  5773. static void tile_net_schedule_egress_timer(struct tile_net_cpu *info)
  5774. {
  5775. if (!info->egress_timer_scheduled) {
  5776. - mod_timer_pinned(&info->egress_timer, jiffies + 1);
  5777. + mod_timer(&info->egress_timer, jiffies + 1);
  5778. info->egress_timer_scheduled = true;
  5779. }
  5780. }
  5781. @@ -1004,7 +1004,7 @@ static void tile_net_register(void *dev_ptr)
  5782. BUG();
  5783. /* Initialize the egress timer. */
  5784. - init_timer(&info->egress_timer);
  5785. + init_timer_pinned(&info->egress_timer);
  5786. info->egress_timer.data = (long)info;
  5787. info->egress_timer.function = tile_net_handle_egress_timer;
  5788. diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
  5789. index 9cfe6aeac84e..a31f4610b493 100644
  5790. --- a/drivers/net/rionet.c
  5791. +++ b/drivers/net/rionet.c
  5792. @@ -179,11 +179,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
  5793. unsigned long flags;
  5794. int add_num = 1;
  5795. - local_irq_save(flags);
  5796. - if (!spin_trylock(&rnet->tx_lock)) {
  5797. - local_irq_restore(flags);
  5798. - return NETDEV_TX_LOCKED;
  5799. - }
  5800. + spin_lock_irqsave(&rnet->tx_lock, flags);
  5801. if (is_multicast_ether_addr(eth->h_dest))
  5802. add_num = nets[rnet->mport->id].nact;
  5803. diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
  5804. index f2cd513d54b2..6c0f4c9638a2 100644
  5805. --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
  5806. +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
  5807. @@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ezusb_priv *upriv,
  5808. while (!ctx->done.done && msecs--)
  5809. udelay(1000);
  5810. } else {
  5811. - wait_event_interruptible(ctx->done.wait,
  5812. + swait_event_interruptible(ctx->done.wait,
  5813. ctx->done.done);
  5814. }
  5815. break;
  5816. diff --git a/drivers/pci/access.c b/drivers/pci/access.c
  5817. index d11cdbb8fba3..223bbb9acb03 100644
  5818. --- a/drivers/pci/access.c
  5819. +++ b/drivers/pci/access.c
  5820. @@ -672,7 +672,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev)
  5821. WARN_ON(!dev->block_cfg_access);
  5822. dev->block_cfg_access = 0;
  5823. - wake_up_all(&pci_cfg_wait);
  5824. + wake_up_all_locked(&pci_cfg_wait);
  5825. raw_spin_unlock_irqrestore(&pci_lock, flags);
  5826. }
  5827. EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
  5828. diff --git a/drivers/power/bq27xxx_battery.c b/drivers/power/bq27xxx_battery.c
  5829. index 45f6ebf88df6..e90b3f307e0f 100644
  5830. --- a/drivers/power/bq27xxx_battery.c
  5831. +++ b/drivers/power/bq27xxx_battery.c
  5832. @@ -735,11 +735,8 @@ static void bq27xxx_battery_poll(struct work_struct *work)
  5833. bq27xxx_battery_update(di);
  5834. - if (poll_interval > 0) {
  5835. - /* The timer does not have to be accurate. */
  5836. - set_timer_slack(&di->work.timer, poll_interval * HZ / 4);
  5837. + if (poll_interval > 0)
  5838. schedule_delayed_work(&di->work, poll_interval * HZ);
  5839. - }
  5840. }
  5841. /*
  5842. diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
  5843. index 0efe7112fc1f..3d232657218d 100644
  5844. --- a/drivers/scsi/fcoe/fcoe.c
  5845. +++ b/drivers/scsi/fcoe/fcoe.c
  5846. @@ -1286,7 +1286,7 @@ static void fcoe_percpu_thread_destroy(unsigned int cpu)
  5847. struct sk_buff *skb;
  5848. #ifdef CONFIG_SMP
  5849. struct fcoe_percpu_s *p0;
  5850. - unsigned targ_cpu = get_cpu();
  5851. + unsigned targ_cpu = get_cpu_light();
  5852. #endif /* CONFIG_SMP */
  5853. FCOE_DBG("Destroying receive thread for CPU %d\n", cpu);
  5854. @@ -1342,7 +1342,7 @@ static void fcoe_percpu_thread_destroy(unsigned int cpu)
  5855. kfree_skb(skb);
  5856. spin_unlock_bh(&p->fcoe_rx_list.lock);
  5857. }
  5858. - put_cpu();
  5859. + put_cpu_light();
  5860. #else
  5861. /*
  5862. * This a non-SMP scenario where the singular Rx thread is
  5863. @@ -1566,11 +1566,11 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
  5864. static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
  5865. {
  5866. struct fcoe_percpu_s *fps;
  5867. - int rc;
  5868. + int rc, cpu = get_cpu_light();
  5869. - fps = &get_cpu_var(fcoe_percpu);
  5870. + fps = &per_cpu(fcoe_percpu, cpu);
  5871. rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
  5872. - put_cpu_var(fcoe_percpu);
  5873. + put_cpu_light();
  5874. return rc;
  5875. }
  5876. @@ -1766,11 +1766,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport,
  5877. return 0;
  5878. }
  5879. - stats = per_cpu_ptr(lport->stats, get_cpu());
  5880. + stats = per_cpu_ptr(lport->stats, get_cpu_light());
  5881. stats->InvalidCRCCount++;
  5882. if (stats->InvalidCRCCount < 5)
  5883. printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
  5884. - put_cpu();
  5885. + put_cpu_light();
  5886. return -EINVAL;
  5887. }
  5888. @@ -1846,13 +1846,13 @@ static void fcoe_recv_frame(struct sk_buff *skb)
  5889. goto drop;
  5890. if (!fcoe_filter_frames(lport, fp)) {
  5891. - put_cpu();
  5892. + put_cpu_light();
  5893. fc_exch_recv(lport, fp);
  5894. return;
  5895. }
  5896. drop:
  5897. stats->ErrorFrames++;
  5898. - put_cpu();
  5899. + put_cpu_light();
  5900. kfree_skb(skb);
  5901. }
  5902. diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
  5903. index 3e83d485f743..bc69ff93127c 100644
  5904. --- a/drivers/scsi/fcoe/fcoe_ctlr.c
  5905. +++ b/drivers/scsi/fcoe/fcoe_ctlr.c
  5906. @@ -831,7 +831,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
  5907. INIT_LIST_HEAD(&del_list);
  5908. - stats = per_cpu_ptr(fip->lp->stats, get_cpu());
  5909. + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
  5910. list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
  5911. deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
  5912. @@ -867,7 +867,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
  5913. sel_time = fcf->time;
  5914. }
  5915. }
  5916. - put_cpu();
  5917. + put_cpu_light();
  5918. list_for_each_entry_safe(fcf, next, &del_list, list) {
  5919. /* Removes fcf from current list */
  5920. diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
  5921. index 30f9ef0c0d4f..6c686bc01a82 100644
  5922. --- a/drivers/scsi/libfc/fc_exch.c
  5923. +++ b/drivers/scsi/libfc/fc_exch.c
  5924. @@ -814,10 +814,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
  5925. }
  5926. memset(ep, 0, sizeof(*ep));
  5927. - cpu = get_cpu();
  5928. + cpu = get_cpu_light();
  5929. pool = per_cpu_ptr(mp->pool, cpu);
  5930. spin_lock_bh(&pool->lock);
  5931. - put_cpu();
  5932. + put_cpu_light();
  5933. /* peek cache of free slot */
  5934. if (pool->left != FC_XID_UNKNOWN) {
  5935. diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
  5936. index 9c706d8c1441..d968ffc79c08 100644
  5937. --- a/drivers/scsi/libsas/sas_ata.c
  5938. +++ b/drivers/scsi/libsas/sas_ata.c
  5939. @@ -190,7 +190,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
  5940. /* TODO: audit callers to ensure they are ready for qc_issue to
  5941. * unconditionally re-enable interrupts
  5942. */
  5943. - local_irq_save(flags);
  5944. + local_irq_save_nort(flags);
  5945. spin_unlock(ap->lock);
  5946. /* If the device fell off, no sense in issuing commands */
  5947. @@ -255,7 +255,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
  5948. out:
  5949. spin_lock(ap->lock);
  5950. - local_irq_restore(flags);
  5951. + local_irq_restore_nort(flags);
  5952. return ret;
  5953. }
  5954. diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
  5955. index edc48f3b8230..ee5c6f9dfb6f 100644
  5956. --- a/drivers/scsi/qla2xxx/qla_inline.h
  5957. +++ b/drivers/scsi/qla2xxx/qla_inline.h
  5958. @@ -59,12 +59,12 @@ qla2x00_poll(struct rsp_que *rsp)
  5959. {
  5960. unsigned long flags;
  5961. struct qla_hw_data *ha = rsp->hw;
  5962. - local_irq_save(flags);
  5963. + local_irq_save_nort(flags);
  5964. if (IS_P3P_TYPE(ha))
  5965. qla82xx_poll(0, rsp);
  5966. else
  5967. ha->isp_ops->intr_handler(0, rsp);
  5968. - local_irq_restore(flags);
  5969. + local_irq_restore_nort(flags);
  5970. }
  5971. static inline uint8_t *
  5972. diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c
  5973. index 7fc919f7da4d..e03fa17b8670 100644
  5974. --- a/drivers/thermal/x86_pkg_temp_thermal.c
  5975. +++ b/drivers/thermal/x86_pkg_temp_thermal.c
  5976. @@ -29,6 +29,7 @@
  5977. #include <linux/pm.h>
  5978. #include <linux/thermal.h>
  5979. #include <linux/debugfs.h>
  5980. +#include <linux/swork.h>
  5981. #include <asm/cpu_device_id.h>
  5982. #include <asm/mce.h>
  5983. @@ -352,7 +353,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
  5984. }
  5985. }
  5986. -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  5987. +static void platform_thermal_notify_work(struct swork_event *event)
  5988. {
  5989. unsigned long flags;
  5990. int cpu = smp_processor_id();
  5991. @@ -369,7 +370,7 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  5992. pkg_work_scheduled[phy_id]) {
  5993. disable_pkg_thres_interrupt();
  5994. spin_unlock_irqrestore(&pkg_work_lock, flags);
  5995. - return -EINVAL;
  5996. + return;
  5997. }
  5998. pkg_work_scheduled[phy_id] = 1;
  5999. spin_unlock_irqrestore(&pkg_work_lock, flags);
  6000. @@ -378,9 +379,48 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  6001. schedule_delayed_work_on(cpu,
  6002. &per_cpu(pkg_temp_thermal_threshold_work, cpu),
  6003. msecs_to_jiffies(notify_delay_ms));
  6004. +}
  6005. +
  6006. +#ifdef CONFIG_PREEMPT_RT_FULL
  6007. +static struct swork_event notify_work;
  6008. +
  6009. +static int thermal_notify_work_init(void)
  6010. +{
  6011. + int err;
  6012. +
  6013. + err = swork_get();
  6014. + if (err)
  6015. + return err;
  6016. +
  6017. + INIT_SWORK(&notify_work, platform_thermal_notify_work);
  6018. return 0;
  6019. }
  6020. +static void thermal_notify_work_cleanup(void)
  6021. +{
  6022. + swork_put();
  6023. +}
  6024. +
  6025. +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  6026. +{
  6027. + swork_queue(&notify_work);
  6028. + return 0;
  6029. +}
  6030. +
  6031. +#else /* !CONFIG_PREEMPT_RT_FULL */
  6032. +
  6033. +static int thermal_notify_work_init(void) { return 0; }
  6034. +
  6035. +static void thermal_notify_work_cleanup(void) { }
  6036. +
  6037. +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  6038. +{
  6039. + platform_thermal_notify_work(NULL);
  6040. +
  6041. + return 0;
  6042. +}
  6043. +#endif /* CONFIG_PREEMPT_RT_FULL */
  6044. +
  6045. static int find_siblings_cpu(int cpu)
  6046. {
  6047. int i;
  6048. @@ -584,6 +624,9 @@ static int __init pkg_temp_thermal_init(void)
  6049. if (!x86_match_cpu(pkg_temp_thermal_ids))
  6050. return -ENODEV;
  6051. + if (!thermal_notify_work_init())
  6052. + return -ENODEV;
  6053. +
  6054. spin_lock_init(&pkg_work_lock);
  6055. platform_thermal_package_notify =
  6056. pkg_temp_thermal_platform_thermal_notify;
  6057. @@ -608,7 +651,7 @@ static int __init pkg_temp_thermal_init(void)
  6058. kfree(pkg_work_scheduled);
  6059. platform_thermal_package_notify = NULL;
  6060. platform_thermal_package_rate_control = NULL;
  6061. -
  6062. + thermal_notify_work_cleanup();
  6063. return -ENODEV;
  6064. }
  6065. @@ -633,6 +676,7 @@ static void __exit pkg_temp_thermal_exit(void)
  6066. mutex_unlock(&phy_dev_list_mutex);
  6067. platform_thermal_package_notify = NULL;
  6068. platform_thermal_package_rate_control = NULL;
  6069. + thermal_notify_work_cleanup();
  6070. for_each_online_cpu(i)
  6071. cancel_delayed_work_sync(
  6072. &per_cpu(pkg_temp_thermal_threshold_work, i));
  6073. diff --git a/drivers/tty/metag_da.c b/drivers/tty/metag_da.c
  6074. index 9325262289f9..25ccef2fe748 100644
  6075. --- a/drivers/tty/metag_da.c
  6076. +++ b/drivers/tty/metag_da.c
  6077. @@ -323,12 +323,12 @@ static void dashtty_timer(unsigned long ignored)
  6078. if (channel >= 0)
  6079. fetch_data(channel);
  6080. - mod_timer_pinned(&poll_timer, jiffies + DA_TTY_POLL);
  6081. + mod_timer(&poll_timer, jiffies + DA_TTY_POLL);
  6082. }
  6083. static void add_poll_timer(struct timer_list *poll_timer)
  6084. {
  6085. - setup_timer(poll_timer, dashtty_timer, 0);
  6086. + setup_pinned_timer(poll_timer, dashtty_timer, 0);
  6087. poll_timer->expires = jiffies + DA_TTY_POLL;
  6088. /*
  6089. diff --git a/drivers/tty/mips_ejtag_fdc.c b/drivers/tty/mips_ejtag_fdc.c
  6090. index a119176a1855..234123b0c642 100644
  6091. --- a/drivers/tty/mips_ejtag_fdc.c
  6092. +++ b/drivers/tty/mips_ejtag_fdc.c
  6093. @@ -689,7 +689,7 @@ static void mips_ejtag_fdc_tty_timer(unsigned long opaque)
  6094. mips_ejtag_fdc_handle(priv);
  6095. if (!priv->removing)
  6096. - mod_timer_pinned(&priv->poll_timer, jiffies + FDC_TTY_POLL);
  6097. + mod_timer(&priv->poll_timer, jiffies + FDC_TTY_POLL);
  6098. }
  6099. /* TTY Port operations */
  6100. @@ -1002,7 +1002,7 @@ static int mips_ejtag_fdc_tty_probe(struct mips_cdmm_device *dev)
  6101. raw_spin_unlock_irq(&priv->lock);
  6102. } else {
  6103. /* If we didn't get an usable IRQ, poll instead */
  6104. - setup_timer(&priv->poll_timer, mips_ejtag_fdc_tty_timer,
  6105. + setup_pinned_timer(&priv->poll_timer, mips_ejtag_fdc_tty_timer,
  6106. (unsigned long)priv);
  6107. priv->poll_timer.expires = jiffies + FDC_TTY_POLL;
  6108. /*
  6109. diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
  6110. index 2f4f5ee651db..6ea6a3134e16 100644
  6111. --- a/drivers/tty/serial/8250/8250_core.c
  6112. +++ b/drivers/tty/serial/8250/8250_core.c
  6113. @@ -58,7 +58,16 @@ static struct uart_driver serial8250_reg;
  6114. static unsigned int skip_txen_test; /* force skip of txen test at init time */
  6115. -#define PASS_LIMIT 512
  6116. +/*
  6117. + * On -rt we can have a more delays, and legitimately
  6118. + * so - so don't drop work spuriously and spam the
  6119. + * syslog:
  6120. + */
  6121. +#ifdef CONFIG_PREEMPT_RT_FULL
  6122. +# define PASS_LIMIT 1000000
  6123. +#else
  6124. +# define PASS_LIMIT 512
  6125. +#endif
  6126. #include <asm/serial.h>
  6127. /*
  6128. diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
  6129. index 00ad2637b08c..c9f6a4937b23 100644
  6130. --- a/drivers/tty/serial/8250/8250_port.c
  6131. +++ b/drivers/tty/serial/8250/8250_port.c
  6132. @@ -35,6 +35,7 @@
  6133. #include <linux/nmi.h>
  6134. #include <linux/mutex.h>
  6135. #include <linux/slab.h>
  6136. +#include <linux/kdb.h>
  6137. #include <linux/uaccess.h>
  6138. #include <linux/pm_runtime.h>
  6139. #include <linux/timer.h>
  6140. @@ -3092,9 +3093,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
  6141. serial8250_rpm_get(up);
  6142. - if (port->sysrq)
  6143. + if (port->sysrq || oops_in_progress)
  6144. locked = 0;
  6145. - else if (oops_in_progress)
  6146. + else if (in_kdb_printk())
  6147. locked = spin_trylock_irqsave(&port->lock, flags);
  6148. else
  6149. spin_lock_irqsave(&port->lock, flags);
  6150. diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
  6151. index 7c198e0a3178..a26b81c4aae6 100644
  6152. --- a/drivers/tty/serial/amba-pl011.c
  6153. +++ b/drivers/tty/serial/amba-pl011.c
  6154. @@ -2166,13 +2166,19 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
  6155. clk_enable(uap->clk);
  6156. - local_irq_save(flags);
  6157. + /*
  6158. + * local_irq_save(flags);
  6159. + *
  6160. + * This local_irq_save() is nonsense. If we come in via sysrq
  6161. + * handling then interrupts are already disabled. Aside of
  6162. + * that the port.sysrq check is racy on SMP regardless.
  6163. + */
  6164. if (uap->port.sysrq)
  6165. locked = 0;
  6166. else if (oops_in_progress)
  6167. - locked = spin_trylock(&uap->port.lock);
  6168. + locked = spin_trylock_irqsave(&uap->port.lock, flags);
  6169. else
  6170. - spin_lock(&uap->port.lock);
  6171. + spin_lock_irqsave(&uap->port.lock, flags);
  6172. /*
  6173. * First save the CR then disable the interrupts
  6174. @@ -2196,8 +2202,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
  6175. pl011_write(old_cr, uap, REG_CR);
  6176. if (locked)
  6177. - spin_unlock(&uap->port.lock);
  6178. - local_irq_restore(flags);
  6179. + spin_unlock_irqrestore(&uap->port.lock, flags);
  6180. clk_disable(uap->clk);
  6181. }
  6182. diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
  6183. index a2a529994ba5..0ee7c4c518df 100644
  6184. --- a/drivers/tty/serial/omap-serial.c
  6185. +++ b/drivers/tty/serial/omap-serial.c
  6186. @@ -1257,13 +1257,10 @@ serial_omap_console_write(struct console *co, const char *s,
  6187. pm_runtime_get_sync(up->dev);
  6188. - local_irq_save(flags);
  6189. - if (up->port.sysrq)
  6190. - locked = 0;
  6191. - else if (oops_in_progress)
  6192. - locked = spin_trylock(&up->port.lock);
  6193. + if (up->port.sysrq || oops_in_progress)
  6194. + locked = spin_trylock_irqsave(&up->port.lock, flags);
  6195. else
  6196. - spin_lock(&up->port.lock);
  6197. + spin_lock_irqsave(&up->port.lock, flags);
  6198. /*
  6199. * First save the IER then disable the interrupts
  6200. @@ -1292,8 +1289,7 @@ serial_omap_console_write(struct console *co, const char *s,
  6201. pm_runtime_mark_last_busy(up->dev);
  6202. pm_runtime_put_autosuspend(up->dev);
  6203. if (locked)
  6204. - spin_unlock(&up->port.lock);
  6205. - local_irq_restore(flags);
  6206. + spin_unlock_irqrestore(&up->port.lock, flags);
  6207. }
  6208. static int __init
  6209. diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
  6210. index 025a4264430e..73d46e2155a9 100644
  6211. --- a/drivers/tty/serial/sc16is7xx.c
  6212. +++ b/drivers/tty/serial/sc16is7xx.c
  6213. @@ -1251,7 +1251,7 @@ static int sc16is7xx_probe(struct device *dev,
  6214. /* Setup interrupt */
  6215. ret = devm_request_irq(dev, irq, sc16is7xx_irq,
  6216. - IRQF_ONESHOT | flags, dev_name(dev), s);
  6217. + flags, dev_name(dev), s);
  6218. if (!ret)
  6219. return 0;
  6220. diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
  6221. index 2d107d0f61b0..28a5cb87289a 100644
  6222. --- a/drivers/usb/core/hcd.c
  6223. +++ b/drivers/usb/core/hcd.c
  6224. @@ -1759,9 +1759,9 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
  6225. * and no one may trigger the above deadlock situation when
  6226. * running complete() in tasklet.
  6227. */
  6228. - local_irq_save(flags);
  6229. + local_irq_save_nort(flags);
  6230. urb->complete(urb);
  6231. - local_irq_restore(flags);
  6232. + local_irq_restore_nort(flags);
  6233. usb_anchor_resume_wakeups(anchor);
  6234. atomic_dec(&urb->use_count);
  6235. diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
  6236. index 73515d54e1cc..78a9351b6226 100644
  6237. --- a/drivers/usb/gadget/function/f_fs.c
  6238. +++ b/drivers/usb/gadget/function/f_fs.c
  6239. @@ -1393,7 +1393,7 @@ static void ffs_data_put(struct ffs_data *ffs)
  6240. pr_info("%s(): freeing\n", __func__);
  6241. ffs_data_clear(ffs);
  6242. BUG_ON(waitqueue_active(&ffs->ev.waitq) ||
  6243. - waitqueue_active(&ffs->ep0req_completion.wait));
  6244. + swait_active(&ffs->ep0req_completion.wait));
  6245. kfree(ffs->dev_name);
  6246. kfree(ffs);
  6247. }
  6248. diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
  6249. index aa3707bdebb4..665654412e4f 100644
  6250. --- a/drivers/usb/gadget/legacy/inode.c
  6251. +++ b/drivers/usb/gadget/legacy/inode.c
  6252. @@ -346,7 +346,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
  6253. spin_unlock_irq (&epdata->dev->lock);
  6254. if (likely (value == 0)) {
  6255. - value = wait_event_interruptible (done.wait, done.done);
  6256. + value = swait_event_interruptible (done.wait, done.done);
  6257. if (value != 0) {
  6258. spin_lock_irq (&epdata->dev->lock);
  6259. if (likely (epdata->ep != NULL)) {
  6260. @@ -355,7 +355,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
  6261. usb_ep_dequeue (epdata->ep, epdata->req);
  6262. spin_unlock_irq (&epdata->dev->lock);
  6263. - wait_event (done.wait, done.done);
  6264. + swait_event (done.wait, done.done);
  6265. if (epdata->status == -ECONNRESET)
  6266. epdata->status = -EINTR;
  6267. } else {
  6268. diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
  6269. index 04dcedfdebf8..86919ec47163 100644
  6270. --- a/drivers/usb/host/ohci-hcd.c
  6271. +++ b/drivers/usb/host/ohci-hcd.c
  6272. @@ -500,7 +500,6 @@ static int ohci_init (struct ohci_hcd *ohci)
  6273. setup_timer(&ohci->io_watchdog, io_watchdog_func,
  6274. (unsigned long) ohci);
  6275. - set_timer_slack(&ohci->io_watchdog, msecs_to_jiffies(20));
  6276. ohci->hcca = dma_alloc_coherent (hcd->self.controller,
  6277. sizeof(*ohci->hcca), &ohci->hcca_dma, GFP_KERNEL);
  6278. diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
  6279. index 327280535848..7901a685b8de 100644
  6280. --- a/drivers/usb/host/xhci.c
  6281. +++ b/drivers/usb/host/xhci.c
  6282. @@ -490,8 +490,6 @@ static void compliance_mode_recovery_timer_init(struct xhci_hcd *xhci)
  6283. xhci->comp_mode_recovery_timer.expires = jiffies +
  6284. msecs_to_jiffies(COMP_MODE_RCVRY_MSECS);
  6285. - set_timer_slack(&xhci->comp_mode_recovery_timer,
  6286. - msecs_to_jiffies(COMP_MODE_RCVRY_MSECS));
  6287. add_timer(&xhci->comp_mode_recovery_timer);
  6288. xhci_dbg_trace(xhci, trace_xhci_dbg_quirks,
  6289. "Compliance mode recovery timer initialized");
  6290. diff --git a/fs/aio.c b/fs/aio.c
  6291. index 155f84253f33..dd8d6f234a0b 100644
  6292. --- a/fs/aio.c
  6293. +++ b/fs/aio.c
  6294. @@ -40,6 +40,7 @@
  6295. #include <linux/ramfs.h>
  6296. #include <linux/percpu-refcount.h>
  6297. #include <linux/mount.h>
  6298. +#include <linux/swork.h>
  6299. #include <asm/kmap_types.h>
  6300. #include <asm/uaccess.h>
  6301. @@ -115,7 +116,7 @@ struct kioctx {
  6302. struct page **ring_pages;
  6303. long nr_pages;
  6304. - struct work_struct free_work;
  6305. + struct swork_event free_work;
  6306. /*
  6307. * signals when all in-flight requests are done
  6308. @@ -253,6 +254,7 @@ static int __init aio_setup(void)
  6309. .mount = aio_mount,
  6310. .kill_sb = kill_anon_super,
  6311. };
  6312. + BUG_ON(swork_get());
  6313. aio_mnt = kern_mount(&aio_fs);
  6314. if (IS_ERR(aio_mnt))
  6315. panic("Failed to create aio fs mount.");
  6316. @@ -568,9 +570,9 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
  6317. return cancel(&kiocb->common);
  6318. }
  6319. -static void free_ioctx(struct work_struct *work)
  6320. +static void free_ioctx(struct swork_event *sev)
  6321. {
  6322. - struct kioctx *ctx = container_of(work, struct kioctx, free_work);
  6323. + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
  6324. pr_debug("freeing %p\n", ctx);
  6325. @@ -589,8 +591,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
  6326. if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
  6327. complete(&ctx->rq_wait->comp);
  6328. - INIT_WORK(&ctx->free_work, free_ioctx);
  6329. - schedule_work(&ctx->free_work);
  6330. + INIT_SWORK(&ctx->free_work, free_ioctx);
  6331. + swork_queue(&ctx->free_work);
  6332. }
  6333. /*
  6334. @@ -598,9 +600,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
  6335. * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
  6336. * now it's safe to cancel any that need to be.
  6337. */
  6338. -static void free_ioctx_users(struct percpu_ref *ref)
  6339. +static void free_ioctx_users_work(struct swork_event *sev)
  6340. {
  6341. - struct kioctx *ctx = container_of(ref, struct kioctx, users);
  6342. + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
  6343. struct aio_kiocb *req;
  6344. spin_lock_irq(&ctx->ctx_lock);
  6345. @@ -619,6 +621,14 @@ static void free_ioctx_users(struct percpu_ref *ref)
  6346. percpu_ref_put(&ctx->reqs);
  6347. }
  6348. +static void free_ioctx_users(struct percpu_ref *ref)
  6349. +{
  6350. + struct kioctx *ctx = container_of(ref, struct kioctx, users);
  6351. +
  6352. + INIT_SWORK(&ctx->free_work, free_ioctx_users_work);
  6353. + swork_queue(&ctx->free_work);
  6354. +}
  6355. +
  6356. static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
  6357. {
  6358. unsigned i, new_nr;
  6359. diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
  6360. index f0d268b97d19..dfbccaefae5d 100644
  6361. --- a/fs/autofs4/autofs_i.h
  6362. +++ b/fs/autofs4/autofs_i.h
  6363. @@ -30,6 +30,7 @@
  6364. #include <linux/sched.h>
  6365. #include <linux/mount.h>
  6366. #include <linux/namei.h>
  6367. +#include <linux/delay.h>
  6368. #include <asm/current.h>
  6369. #include <linux/uaccess.h>
  6370. diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
  6371. index 9510d8d2e9cd..893c87d4f5a9 100644
  6372. --- a/fs/autofs4/expire.c
  6373. +++ b/fs/autofs4/expire.c
  6374. @@ -148,7 +148,7 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev,
  6375. parent = p->d_parent;
  6376. if (!spin_trylock(&parent->d_lock)) {
  6377. spin_unlock(&p->d_lock);
  6378. - cpu_relax();
  6379. + cpu_chill();
  6380. goto relock;
  6381. }
  6382. spin_unlock(&p->d_lock);
  6383. diff --git a/fs/buffer.c b/fs/buffer.c
  6384. index af0d9a82a8ed..370df12f9df7 100644
  6385. --- a/fs/buffer.c
  6386. +++ b/fs/buffer.c
  6387. @@ -300,8 +300,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
  6388. * decide that the page is now completely done.
  6389. */
  6390. first = page_buffers(page);
  6391. - local_irq_save(flags);
  6392. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  6393. + flags = bh_uptodate_lock_irqsave(first);
  6394. clear_buffer_async_read(bh);
  6395. unlock_buffer(bh);
  6396. tmp = bh;
  6397. @@ -314,8 +313,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
  6398. }
  6399. tmp = tmp->b_this_page;
  6400. } while (tmp != bh);
  6401. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6402. - local_irq_restore(flags);
  6403. + bh_uptodate_unlock_irqrestore(first, flags);
  6404. /*
  6405. * If none of the buffers had errors and they are all
  6406. @@ -327,9 +325,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
  6407. return;
  6408. still_busy:
  6409. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6410. - local_irq_restore(flags);
  6411. - return;
  6412. + bh_uptodate_unlock_irqrestore(first, flags);
  6413. }
  6414. /*
  6415. @@ -357,8 +353,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
  6416. }
  6417. first = page_buffers(page);
  6418. - local_irq_save(flags);
  6419. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  6420. + flags = bh_uptodate_lock_irqsave(first);
  6421. clear_buffer_async_write(bh);
  6422. unlock_buffer(bh);
  6423. @@ -370,15 +365,12 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
  6424. }
  6425. tmp = tmp->b_this_page;
  6426. }
  6427. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6428. - local_irq_restore(flags);
  6429. + bh_uptodate_unlock_irqrestore(first, flags);
  6430. end_page_writeback(page);
  6431. return;
  6432. still_busy:
  6433. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6434. - local_irq_restore(flags);
  6435. - return;
  6436. + bh_uptodate_unlock_irqrestore(first, flags);
  6437. }
  6438. EXPORT_SYMBOL(end_buffer_async_write);
  6439. @@ -3314,6 +3306,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
  6440. struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
  6441. if (ret) {
  6442. INIT_LIST_HEAD(&ret->b_assoc_buffers);
  6443. + buffer_head_init_locks(ret);
  6444. preempt_disable();
  6445. __this_cpu_inc(bh_accounting.nr);
  6446. recalc_bh_state();
  6447. diff --git a/fs/dcache.c b/fs/dcache.c
  6448. index 44008e3fafc4..abc87dc3b8bf 100644
  6449. --- a/fs/dcache.c
  6450. +++ b/fs/dcache.c
  6451. @@ -19,6 +19,7 @@
  6452. #include <linux/mm.h>
  6453. #include <linux/fs.h>
  6454. #include <linux/fsnotify.h>
  6455. +#include <linux/delay.h>
  6456. #include <linux/slab.h>
  6457. #include <linux/init.h>
  6458. #include <linux/hash.h>
  6459. @@ -578,7 +579,7 @@ static struct dentry *dentry_kill(struct dentry *dentry)
  6460. failed:
  6461. spin_unlock(&dentry->d_lock);
  6462. - cpu_relax();
  6463. + cpu_chill();
  6464. return dentry; /* try again with same dentry */
  6465. }
  6466. @@ -2316,7 +2317,7 @@ void d_delete(struct dentry * dentry)
  6467. if (dentry->d_lockref.count == 1) {
  6468. if (!spin_trylock(&inode->i_lock)) {
  6469. spin_unlock(&dentry->d_lock);
  6470. - cpu_relax();
  6471. + cpu_chill();
  6472. goto again;
  6473. }
  6474. dentry->d_flags &= ~DCACHE_CANT_MOUNT;
  6475. diff --git a/fs/eventpoll.c b/fs/eventpoll.c
  6476. index 8a74a2a52e0f..b92f9110a641 100644
  6477. --- a/fs/eventpoll.c
  6478. +++ b/fs/eventpoll.c
  6479. @@ -510,12 +510,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
  6480. */
  6481. static void ep_poll_safewake(wait_queue_head_t *wq)
  6482. {
  6483. - int this_cpu = get_cpu();
  6484. + int this_cpu = get_cpu_light();
  6485. ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
  6486. ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
  6487. - put_cpu();
  6488. + put_cpu_light();
  6489. }
  6490. static void ep_remove_wait_queue(struct eppoll_entry *pwq)
  6491. diff --git a/fs/exec.c b/fs/exec.c
  6492. index c4010b8207a1..879fd0772b78 100644
  6493. --- a/fs/exec.c
  6494. +++ b/fs/exec.c
  6495. @@ -961,12 +961,14 @@ static int exec_mmap(struct mm_struct *mm)
  6496. }
  6497. }
  6498. task_lock(tsk);
  6499. + preempt_disable_rt();
  6500. active_mm = tsk->active_mm;
  6501. tsk->mm = mm;
  6502. tsk->active_mm = mm;
  6503. activate_mm(active_mm, mm);
  6504. tsk->mm->vmacache_seqnum = 0;
  6505. vmacache_flush(tsk);
  6506. + preempt_enable_rt();
  6507. task_unlock(tsk);
  6508. if (old_mm) {
  6509. up_read(&old_mm->mmap_sem);
  6510. diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
  6511. index 684996c8a3a4..6e18a06aaabe 100644
  6512. --- a/fs/jbd2/checkpoint.c
  6513. +++ b/fs/jbd2/checkpoint.c
  6514. @@ -116,6 +116,8 @@ void __jbd2_log_wait_for_space(journal_t *journal)
  6515. nblocks = jbd2_space_needed(journal);
  6516. while (jbd2_log_space_left(journal) < nblocks) {
  6517. write_unlock(&journal->j_state_lock);
  6518. + if (current->plug)
  6519. + io_schedule();
  6520. mutex_lock(&journal->j_checkpoint_mutex);
  6521. /*
  6522. diff --git a/fs/namespace.c b/fs/namespace.c
  6523. index 783004af5707..ff8492bd3de7 100644
  6524. --- a/fs/namespace.c
  6525. +++ b/fs/namespace.c
  6526. @@ -14,6 +14,7 @@
  6527. #include <linux/mnt_namespace.h>
  6528. #include <linux/user_namespace.h>
  6529. #include <linux/namei.h>
  6530. +#include <linux/delay.h>
  6531. #include <linux/security.h>
  6532. #include <linux/idr.h>
  6533. #include <linux/init.h> /* init_rootfs */
  6534. @@ -353,8 +354,11 @@ int __mnt_want_write(struct vfsmount *m)
  6535. * incremented count after it has set MNT_WRITE_HOLD.
  6536. */
  6537. smp_mb();
  6538. - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
  6539. - cpu_relax();
  6540. + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
  6541. + preempt_enable();
  6542. + cpu_chill();
  6543. + preempt_disable();
  6544. + }
  6545. /*
  6546. * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
  6547. * be set to match its requirements. So we must not load that until
  6548. diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
  6549. index 97768a1379f2..6d2ea1af146b 100644
  6550. --- a/fs/ntfs/aops.c
  6551. +++ b/fs/ntfs/aops.c
  6552. @@ -92,13 +92,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  6553. ofs = 0;
  6554. if (file_ofs < init_size)
  6555. ofs = init_size - file_ofs;
  6556. - local_irq_save(flags);
  6557. + local_irq_save_nort(flags);
  6558. kaddr = kmap_atomic(page);
  6559. memset(kaddr + bh_offset(bh) + ofs, 0,
  6560. bh->b_size - ofs);
  6561. flush_dcache_page(page);
  6562. kunmap_atomic(kaddr);
  6563. - local_irq_restore(flags);
  6564. + local_irq_restore_nort(flags);
  6565. }
  6566. } else {
  6567. clear_buffer_uptodate(bh);
  6568. @@ -107,8 +107,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  6569. "0x%llx.", (unsigned long long)bh->b_blocknr);
  6570. }
  6571. first = page_buffers(page);
  6572. - local_irq_save(flags);
  6573. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  6574. + flags = bh_uptodate_lock_irqsave(first);
  6575. clear_buffer_async_read(bh);
  6576. unlock_buffer(bh);
  6577. tmp = bh;
  6578. @@ -123,8 +122,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  6579. }
  6580. tmp = tmp->b_this_page;
  6581. } while (tmp != bh);
  6582. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6583. - local_irq_restore(flags);
  6584. + bh_uptodate_unlock_irqrestore(first, flags);
  6585. /*
  6586. * If none of the buffers had errors then we can set the page uptodate,
  6587. * but we first have to perform the post read mst fixups, if the
  6588. @@ -145,13 +143,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  6589. recs = PAGE_SIZE / rec_size;
  6590. /* Should have been verified before we got here... */
  6591. BUG_ON(!recs);
  6592. - local_irq_save(flags);
  6593. + local_irq_save_nort(flags);
  6594. kaddr = kmap_atomic(page);
  6595. for (i = 0; i < recs; i++)
  6596. post_read_mst_fixup((NTFS_RECORD*)(kaddr +
  6597. i * rec_size), rec_size);
  6598. kunmap_atomic(kaddr);
  6599. - local_irq_restore(flags);
  6600. + local_irq_restore_nort(flags);
  6601. flush_dcache_page(page);
  6602. if (likely(page_uptodate && !PageError(page)))
  6603. SetPageUptodate(page);
  6604. @@ -159,9 +157,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  6605. unlock_page(page);
  6606. return;
  6607. still_busy:
  6608. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6609. - local_irq_restore(flags);
  6610. - return;
  6611. + bh_uptodate_unlock_irqrestore(first, flags);
  6612. }
  6613. /**
  6614. diff --git a/fs/timerfd.c b/fs/timerfd.c
  6615. index 053818dd6c18..c4bc14fe0085 100644
  6616. --- a/fs/timerfd.c
  6617. +++ b/fs/timerfd.c
  6618. @@ -450,7 +450,10 @@ static int do_timerfd_settime(int ufd, int flags,
  6619. break;
  6620. }
  6621. spin_unlock_irq(&ctx->wqh.lock);
  6622. - cpu_relax();
  6623. + if (isalarm(ctx))
  6624. + hrtimer_wait_for_timer(&ctx->t.alarm.timer);
  6625. + else
  6626. + hrtimer_wait_for_timer(&ctx->t.tmr);
  6627. }
  6628. /*
  6629. diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
  6630. index 45c2d6528829..794eead4f075 100644
  6631. --- a/include/acpi/platform/aclinux.h
  6632. +++ b/include/acpi/platform/aclinux.h
  6633. @@ -127,6 +127,7 @@
  6634. #define acpi_cache_t struct kmem_cache
  6635. #define acpi_spinlock spinlock_t *
  6636. +#define acpi_raw_spinlock raw_spinlock_t *
  6637. #define acpi_cpu_flags unsigned long
  6638. /* Use native linux version of acpi_os_allocate_zeroed */
  6639. @@ -145,6 +146,20 @@
  6640. #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id
  6641. #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock
  6642. +#define acpi_os_create_raw_lock(__handle) \
  6643. +({ \
  6644. + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \
  6645. + \
  6646. + if (lock) { \
  6647. + *(__handle) = lock; \
  6648. + raw_spin_lock_init(*(__handle)); \
  6649. + } \
  6650. + lock ? AE_OK : AE_NO_MEMORY; \
  6651. + })
  6652. +
  6653. +#define acpi_os_delete_raw_lock(__handle) kfree(__handle)
  6654. +
  6655. +
  6656. /*
  6657. * OSL interfaces used by debugger/disassembler
  6658. */
  6659. diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
  6660. index 6f96247226a4..fa53a21263c2 100644
  6661. --- a/include/asm-generic/bug.h
  6662. +++ b/include/asm-generic/bug.h
  6663. @@ -215,6 +215,20 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
  6664. # define WARN_ON_SMP(x) ({0;})
  6665. #endif
  6666. +#ifdef CONFIG_PREEMPT_RT_BASE
  6667. +# define BUG_ON_RT(c) BUG_ON(c)
  6668. +# define BUG_ON_NONRT(c) do { } while (0)
  6669. +# define WARN_ON_RT(condition) WARN_ON(condition)
  6670. +# define WARN_ON_NONRT(condition) do { } while (0)
  6671. +# define WARN_ON_ONCE_NONRT(condition) do { } while (0)
  6672. +#else
  6673. +# define BUG_ON_RT(c) do { } while (0)
  6674. +# define BUG_ON_NONRT(c) BUG_ON(c)
  6675. +# define WARN_ON_RT(condition) do { } while (0)
  6676. +# define WARN_ON_NONRT(condition) WARN_ON(condition)
  6677. +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition)
  6678. +#endif
  6679. +
  6680. #endif /* __ASSEMBLY__ */
  6681. #endif
  6682. diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
  6683. index 5d8ffa3e6f8c..c1cde3577551 100644
  6684. --- a/include/asm-generic/preempt.h
  6685. +++ b/include/asm-generic/preempt.h
  6686. @@ -7,10 +7,10 @@
  6687. static __always_inline int preempt_count(void)
  6688. {
  6689. - return current_thread_info()->preempt_count;
  6690. + return READ_ONCE(current_thread_info()->preempt_count);
  6691. }
  6692. -static __always_inline int *preempt_count_ptr(void)
  6693. +static __always_inline volatile int *preempt_count_ptr(void)
  6694. {
  6695. return &current_thread_info()->preempt_count;
  6696. }
  6697. diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
  6698. index 9ac9799b702b..05fc2492131a 100644
  6699. --- a/include/linux/blk-mq.h
  6700. +++ b/include/linux/blk-mq.h
  6701. @@ -218,6 +218,7 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
  6702. struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
  6703. struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
  6704. +void __blk_mq_complete_request_remote_work(struct work_struct *work);
  6705. int blk_mq_request_started(struct request *rq);
  6706. void blk_mq_start_request(struct request *rq);
  6707. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
  6708. index 669e419d6234..6678028d8508 100644
  6709. --- a/include/linux/blkdev.h
  6710. +++ b/include/linux/blkdev.h
  6711. @@ -90,6 +90,7 @@ struct request {
  6712. struct list_head queuelist;
  6713. union {
  6714. struct call_single_data csd;
  6715. + struct work_struct work;
  6716. unsigned long fifo_time;
  6717. };
  6718. @@ -457,7 +458,7 @@ struct request_queue {
  6719. struct throtl_data *td;
  6720. #endif
  6721. struct rcu_head rcu_head;
  6722. - wait_queue_head_t mq_freeze_wq;
  6723. + struct swait_queue_head mq_freeze_wq;
  6724. struct percpu_ref q_usage_counter;
  6725. struct list_head all_q_node;
  6726. diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h
  6727. index 8fdcb783197d..d07dbeec7bc1 100644
  6728. --- a/include/linux/bottom_half.h
  6729. +++ b/include/linux/bottom_half.h
  6730. @@ -3,6 +3,39 @@
  6731. #include <linux/preempt.h>
  6732. +#ifdef CONFIG_PREEMPT_RT_FULL
  6733. +
  6734. +extern void __local_bh_disable(void);
  6735. +extern void _local_bh_enable(void);
  6736. +extern void __local_bh_enable(void);
  6737. +
  6738. +static inline void local_bh_disable(void)
  6739. +{
  6740. + __local_bh_disable();
  6741. +}
  6742. +
  6743. +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
  6744. +{
  6745. + __local_bh_disable();
  6746. +}
  6747. +
  6748. +static inline void local_bh_enable(void)
  6749. +{
  6750. + __local_bh_enable();
  6751. +}
  6752. +
  6753. +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
  6754. +{
  6755. + __local_bh_enable();
  6756. +}
  6757. +
  6758. +static inline void local_bh_enable_ip(unsigned long ip)
  6759. +{
  6760. + __local_bh_enable();
  6761. +}
  6762. +
  6763. +#else
  6764. +
  6765. #ifdef CONFIG_TRACE_IRQFLAGS
  6766. extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
  6767. #else
  6768. @@ -30,5 +63,6 @@ static inline void local_bh_enable(void)
  6769. {
  6770. __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
  6771. }
  6772. +#endif
  6773. #endif /* _LINUX_BH_H */
  6774. diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
  6775. index d48daa3f6f20..91588cd809ce 100644
  6776. --- a/include/linux/buffer_head.h
  6777. +++ b/include/linux/buffer_head.h
  6778. @@ -75,8 +75,50 @@ struct buffer_head {
  6779. struct address_space *b_assoc_map; /* mapping this buffer is
  6780. associated with */
  6781. atomic_t b_count; /* users using this buffer_head */
  6782. +#ifdef CONFIG_PREEMPT_RT_BASE
  6783. + spinlock_t b_uptodate_lock;
  6784. +#if IS_ENABLED(CONFIG_JBD2)
  6785. + spinlock_t b_state_lock;
  6786. + spinlock_t b_journal_head_lock;
  6787. +#endif
  6788. +#endif
  6789. };
  6790. +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
  6791. +{
  6792. + unsigned long flags;
  6793. +
  6794. +#ifndef CONFIG_PREEMPT_RT_BASE
  6795. + local_irq_save(flags);
  6796. + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
  6797. +#else
  6798. + spin_lock_irqsave(&bh->b_uptodate_lock, flags);
  6799. +#endif
  6800. + return flags;
  6801. +}
  6802. +
  6803. +static inline void
  6804. +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
  6805. +{
  6806. +#ifndef CONFIG_PREEMPT_RT_BASE
  6807. + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
  6808. + local_irq_restore(flags);
  6809. +#else
  6810. + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
  6811. +#endif
  6812. +}
  6813. +
  6814. +static inline void buffer_head_init_locks(struct buffer_head *bh)
  6815. +{
  6816. +#ifdef CONFIG_PREEMPT_RT_BASE
  6817. + spin_lock_init(&bh->b_uptodate_lock);
  6818. +#if IS_ENABLED(CONFIG_JBD2)
  6819. + spin_lock_init(&bh->b_state_lock);
  6820. + spin_lock_init(&bh->b_journal_head_lock);
  6821. +#endif
  6822. +#endif
  6823. +}
  6824. +
  6825. /*
  6826. * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
  6827. * and buffer_foo() functions.
  6828. diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
  6829. index 5b17de62c962..56027cc01a56 100644
  6830. --- a/include/linux/cgroup-defs.h
  6831. +++ b/include/linux/cgroup-defs.h
  6832. @@ -16,6 +16,7 @@
  6833. #include <linux/percpu-refcount.h>
  6834. #include <linux/percpu-rwsem.h>
  6835. #include <linux/workqueue.h>
  6836. +#include <linux/swork.h>
  6837. #ifdef CONFIG_CGROUPS
  6838. @@ -137,6 +138,7 @@ struct cgroup_subsys_state {
  6839. /* percpu_ref killing and RCU release */
  6840. struct rcu_head rcu_head;
  6841. struct work_struct destroy_work;
  6842. + struct swork_event destroy_swork;
  6843. };
  6844. /*
  6845. diff --git a/include/linux/completion.h b/include/linux/completion.h
  6846. index 5d5aaae3af43..3bca1590e29f 100644
  6847. --- a/include/linux/completion.h
  6848. +++ b/include/linux/completion.h
  6849. @@ -7,8 +7,7 @@
  6850. * Atomic wait-for-completion handler data structures.
  6851. * See kernel/sched/completion.c for details.
  6852. */
  6853. -
  6854. -#include <linux/wait.h>
  6855. +#include <linux/swait.h>
  6856. /*
  6857. * struct completion - structure used to maintain state for a "completion"
  6858. @@ -24,11 +23,11 @@
  6859. */
  6860. struct completion {
  6861. unsigned int done;
  6862. - wait_queue_head_t wait;
  6863. + struct swait_queue_head wait;
  6864. };
  6865. #define COMPLETION_INITIALIZER(work) \
  6866. - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
  6867. + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
  6868. #define COMPLETION_INITIALIZER_ONSTACK(work) \
  6869. ({ init_completion(&work); work; })
  6870. @@ -73,7 +72,7 @@ struct completion {
  6871. static inline void init_completion(struct completion *x)
  6872. {
  6873. x->done = 0;
  6874. - init_waitqueue_head(&x->wait);
  6875. + init_swait_queue_head(&x->wait);
  6876. }
  6877. /**
  6878. diff --git a/include/linux/cpu.h b/include/linux/cpu.h
  6879. index f9b1fab4388a..d4cc4818d13e 100644
  6880. --- a/include/linux/cpu.h
  6881. +++ b/include/linux/cpu.h
  6882. @@ -230,6 +230,8 @@ extern void get_online_cpus(void);
  6883. extern void put_online_cpus(void);
  6884. extern void cpu_hotplug_disable(void);
  6885. extern void cpu_hotplug_enable(void);
  6886. +extern void pin_current_cpu(void);
  6887. +extern void unpin_current_cpu(void);
  6888. #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
  6889. #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri)
  6890. #define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
  6891. @@ -247,6 +249,8 @@ static inline void cpu_hotplug_done(void) {}
  6892. #define put_online_cpus() do { } while (0)
  6893. #define cpu_hotplug_disable() do { } while (0)
  6894. #define cpu_hotplug_enable() do { } while (0)
  6895. +static inline void pin_current_cpu(void) { }
  6896. +static inline void unpin_current_cpu(void) { }
  6897. #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
  6898. #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
  6899. /* These aren't inline functions due to a GCC bug. */
  6900. diff --git a/include/linux/delay.h b/include/linux/delay.h
  6901. index a6ecb34cf547..37caab306336 100644
  6902. --- a/include/linux/delay.h
  6903. +++ b/include/linux/delay.h
  6904. @@ -52,4 +52,10 @@ static inline void ssleep(unsigned int seconds)
  6905. msleep(seconds * 1000);
  6906. }
  6907. +#ifdef CONFIG_PREEMPT_RT_FULL
  6908. +extern void cpu_chill(void);
  6909. +#else
  6910. +# define cpu_chill() cpu_relax()
  6911. +#endif
  6912. +
  6913. #endif /* defined(_LINUX_DELAY_H) */
  6914. diff --git a/include/linux/highmem.h b/include/linux/highmem.h
  6915. index bb3f3297062a..a117a33ef72c 100644
  6916. --- a/include/linux/highmem.h
  6917. +++ b/include/linux/highmem.h
  6918. @@ -7,6 +7,7 @@
  6919. #include <linux/mm.h>
  6920. #include <linux/uaccess.h>
  6921. #include <linux/hardirq.h>
  6922. +#include <linux/sched.h>
  6923. #include <asm/cacheflush.h>
  6924. @@ -65,7 +66,7 @@ static inline void kunmap(struct page *page)
  6925. static inline void *kmap_atomic(struct page *page)
  6926. {
  6927. - preempt_disable();
  6928. + preempt_disable_nort();
  6929. pagefault_disable();
  6930. return page_address(page);
  6931. }
  6932. @@ -74,7 +75,7 @@ static inline void *kmap_atomic(struct page *page)
  6933. static inline void __kunmap_atomic(void *addr)
  6934. {
  6935. pagefault_enable();
  6936. - preempt_enable();
  6937. + preempt_enable_nort();
  6938. }
  6939. #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn))
  6940. @@ -86,32 +87,51 @@ static inline void __kunmap_atomic(void *addr)
  6941. #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
  6942. +#ifndef CONFIG_PREEMPT_RT_FULL
  6943. DECLARE_PER_CPU(int, __kmap_atomic_idx);
  6944. +#endif
  6945. static inline int kmap_atomic_idx_push(void)
  6946. {
  6947. +#ifndef CONFIG_PREEMPT_RT_FULL
  6948. int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
  6949. -#ifdef CONFIG_DEBUG_HIGHMEM
  6950. +# ifdef CONFIG_DEBUG_HIGHMEM
  6951. WARN_ON_ONCE(in_irq() && !irqs_disabled());
  6952. BUG_ON(idx >= KM_TYPE_NR);
  6953. -#endif
  6954. +# endif
  6955. return idx;
  6956. +#else
  6957. + current->kmap_idx++;
  6958. + BUG_ON(current->kmap_idx > KM_TYPE_NR);
  6959. + return current->kmap_idx - 1;
  6960. +#endif
  6961. }
  6962. static inline int kmap_atomic_idx(void)
  6963. {
  6964. +#ifndef CONFIG_PREEMPT_RT_FULL
  6965. return __this_cpu_read(__kmap_atomic_idx) - 1;
  6966. +#else
  6967. + return current->kmap_idx - 1;
  6968. +#endif
  6969. }
  6970. static inline void kmap_atomic_idx_pop(void)
  6971. {
  6972. -#ifdef CONFIG_DEBUG_HIGHMEM
  6973. +#ifndef CONFIG_PREEMPT_RT_FULL
  6974. +# ifdef CONFIG_DEBUG_HIGHMEM
  6975. int idx = __this_cpu_dec_return(__kmap_atomic_idx);
  6976. BUG_ON(idx < 0);
  6977. -#else
  6978. +# else
  6979. __this_cpu_dec(__kmap_atomic_idx);
  6980. +# endif
  6981. +#else
  6982. + current->kmap_idx--;
  6983. +# ifdef CONFIG_DEBUG_HIGHMEM
  6984. + BUG_ON(current->kmap_idx < 0);
  6985. +# endif
  6986. #endif
  6987. }
  6988. diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
  6989. index c98c6539e2c2..573ffbce6ab7 100644
  6990. --- a/include/linux/hrtimer.h
  6991. +++ b/include/linux/hrtimer.h
  6992. @@ -87,6 +87,9 @@ enum hrtimer_restart {
  6993. * @function: timer expiry callback function
  6994. * @base: pointer to the timer base (per cpu and per clock)
  6995. * @state: state information (See bit values above)
  6996. + * @cb_entry: list entry to defer timers from hardirq context
  6997. + * @irqsafe: timer can run in hardirq context
  6998. + * @praecox: timer expiry time if expired at the time of programming
  6999. * @is_rel: Set if the timer was armed relative
  7000. * @start_pid: timer statistics field to store the pid of the task which
  7001. * started the timer
  7002. @@ -103,6 +106,11 @@ struct hrtimer {
  7003. enum hrtimer_restart (*function)(struct hrtimer *);
  7004. struct hrtimer_clock_base *base;
  7005. u8 state;
  7006. + struct list_head cb_entry;
  7007. + int irqsafe;
  7008. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  7009. + ktime_t praecox;
  7010. +#endif
  7011. u8 is_rel;
  7012. #ifdef CONFIG_TIMER_STATS
  7013. int start_pid;
  7014. @@ -123,11 +131,7 @@ struct hrtimer_sleeper {
  7015. struct task_struct *task;
  7016. };
  7017. -#ifdef CONFIG_64BIT
  7018. # define HRTIMER_CLOCK_BASE_ALIGN 64
  7019. -#else
  7020. -# define HRTIMER_CLOCK_BASE_ALIGN 32
  7021. -#endif
  7022. /**
  7023. * struct hrtimer_clock_base - the timer base for a specific clock
  7024. @@ -136,6 +140,7 @@ struct hrtimer_sleeper {
  7025. * timer to a base on another cpu.
  7026. * @clockid: clock id for per_cpu support
  7027. * @active: red black tree root node for the active timers
  7028. + * @expired: list head for deferred timers.
  7029. * @get_time: function to retrieve the current time of the clock
  7030. * @offset: offset of this clock to the monotonic base
  7031. */
  7032. @@ -144,6 +149,7 @@ struct hrtimer_clock_base {
  7033. int index;
  7034. clockid_t clockid;
  7035. struct timerqueue_head active;
  7036. + struct list_head expired;
  7037. ktime_t (*get_time)(void);
  7038. ktime_t offset;
  7039. } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));
  7040. @@ -187,6 +193,7 @@ struct hrtimer_cpu_base {
  7041. raw_spinlock_t lock;
  7042. seqcount_t seq;
  7043. struct hrtimer *running;
  7044. + struct hrtimer *running_soft;
  7045. unsigned int cpu;
  7046. unsigned int active_bases;
  7047. unsigned int clock_was_set_seq;
  7048. @@ -203,6 +210,9 @@ struct hrtimer_cpu_base {
  7049. unsigned int nr_hangs;
  7050. unsigned int max_hang_time;
  7051. #endif
  7052. +#ifdef CONFIG_PREEMPT_RT_BASE
  7053. + wait_queue_head_t wait;
  7054. +#endif
  7055. struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
  7056. } ____cacheline_aligned;
  7057. @@ -412,6 +422,13 @@ static inline void hrtimer_restart(struct hrtimer *timer)
  7058. hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
  7059. }
  7060. +/* Softirq preemption could deadlock timer removal */
  7061. +#ifdef CONFIG_PREEMPT_RT_BASE
  7062. + extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
  7063. +#else
  7064. +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
  7065. +#endif
  7066. +
  7067. /* Query timers: */
  7068. extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
  7069. @@ -436,7 +453,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer)
  7070. * Helper function to check, whether the timer is running the callback
  7071. * function
  7072. */
  7073. -static inline int hrtimer_callback_running(struct hrtimer *timer)
  7074. +static inline int hrtimer_callback_running(const struct hrtimer *timer)
  7075. {
  7076. return timer->base->cpu_base->running == timer;
  7077. }
  7078. diff --git a/include/linux/idr.h b/include/linux/idr.h
  7079. index 083d61e92706..5899796f50cb 100644
  7080. --- a/include/linux/idr.h
  7081. +++ b/include/linux/idr.h
  7082. @@ -95,10 +95,14 @@ bool idr_is_empty(struct idr *idp);
  7083. * Each idr_preload() should be matched with an invocation of this
  7084. * function. See idr_preload() for details.
  7085. */
  7086. +#ifdef CONFIG_PREEMPT_RT_FULL
  7087. +void idr_preload_end(void);
  7088. +#else
  7089. static inline void idr_preload_end(void)
  7090. {
  7091. preempt_enable();
  7092. }
  7093. +#endif
  7094. /**
  7095. * idr_find - return pointer for given id
  7096. diff --git a/include/linux/init_task.h b/include/linux/init_task.h
  7097. index f2cb8d45513d..60fadde71a44 100644
  7098. --- a/include/linux/init_task.h
  7099. +++ b/include/linux/init_task.h
  7100. @@ -148,6 +148,12 @@ extern struct task_group root_task_group;
  7101. # define INIT_PERF_EVENTS(tsk)
  7102. #endif
  7103. +#ifdef CONFIG_PREEMPT_RT_BASE
  7104. +# define INIT_TIMER_LIST .posix_timer_list = NULL,
  7105. +#else
  7106. +# define INIT_TIMER_LIST
  7107. +#endif
  7108. +
  7109. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  7110. # define INIT_VTIME(tsk) \
  7111. .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \
  7112. @@ -239,6 +245,7 @@ extern struct task_group root_task_group;
  7113. .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
  7114. .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
  7115. .timer_slack_ns = 50000, /* 50 usec default slack */ \
  7116. + INIT_TIMER_LIST \
  7117. .pids = { \
  7118. [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
  7119. [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
  7120. diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
  7121. index 9fcabeb07787..e6f7e42c8253 100644
  7122. --- a/include/linux/interrupt.h
  7123. +++ b/include/linux/interrupt.h
  7124. @@ -61,6 +61,7 @@
  7125. * interrupt handler after suspending interrupts. For system
  7126. * wakeup devices users need to implement wakeup detection in
  7127. * their interrupt handlers.
  7128. + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT)
  7129. */
  7130. #define IRQF_SHARED 0x00000080
  7131. #define IRQF_PROBE_SHARED 0x00000100
  7132. @@ -74,6 +75,7 @@
  7133. #define IRQF_NO_THREAD 0x00010000
  7134. #define IRQF_EARLY_RESUME 0x00020000
  7135. #define IRQF_COND_SUSPEND 0x00040000
  7136. +#define IRQF_NO_SOFTIRQ_CALL 0x00080000
  7137. #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
  7138. @@ -196,7 +198,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
  7139. #ifdef CONFIG_LOCKDEP
  7140. # define local_irq_enable_in_hardirq() do { } while (0)
  7141. #else
  7142. -# define local_irq_enable_in_hardirq() local_irq_enable()
  7143. +# define local_irq_enable_in_hardirq() local_irq_enable_nort()
  7144. #endif
  7145. extern void disable_irq_nosync(unsigned int irq);
  7146. @@ -217,6 +219,7 @@ extern void resume_device_irqs(void);
  7147. * @irq: Interrupt to which notification applies
  7148. * @kref: Reference count, for internal use
  7149. * @work: Work item, for internal use
  7150. + * @list: List item for deferred callbacks
  7151. * @notify: Function to be called on change. This will be
  7152. * called in process context.
  7153. * @release: Function to be called on release. This will be
  7154. @@ -228,6 +231,7 @@ struct irq_affinity_notify {
  7155. unsigned int irq;
  7156. struct kref kref;
  7157. struct work_struct work;
  7158. + struct list_head list;
  7159. void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
  7160. void (*release)(struct kref *ref);
  7161. };
  7162. @@ -390,9 +394,13 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
  7163. bool state);
  7164. #ifdef CONFIG_IRQ_FORCED_THREADING
  7165. +# ifndef CONFIG_PREEMPT_RT_BASE
  7166. extern bool force_irqthreads;
  7167. +# else
  7168. +# define force_irqthreads (true)
  7169. +# endif
  7170. #else
  7171. -#define force_irqthreads (0)
  7172. +#define force_irqthreads (false)
  7173. #endif
  7174. #ifndef __ARCH_SET_SOFTIRQ_PENDING
  7175. @@ -449,9 +457,10 @@ struct softirq_action
  7176. void (*action)(struct softirq_action *);
  7177. };
  7178. +#ifndef CONFIG_PREEMPT_RT_FULL
  7179. asmlinkage void do_softirq(void);
  7180. asmlinkage void __do_softirq(void);
  7181. -
  7182. +static inline void thread_do_softirq(void) { do_softirq(); }
  7183. #ifdef __ARCH_HAS_DO_SOFTIRQ
  7184. void do_softirq_own_stack(void);
  7185. #else
  7186. @@ -460,13 +469,25 @@ static inline void do_softirq_own_stack(void)
  7187. __do_softirq();
  7188. }
  7189. #endif
  7190. +#else
  7191. +extern void thread_do_softirq(void);
  7192. +#endif
  7193. extern void open_softirq(int nr, void (*action)(struct softirq_action *));
  7194. extern void softirq_init(void);
  7195. extern void __raise_softirq_irqoff(unsigned int nr);
  7196. +#ifdef CONFIG_PREEMPT_RT_FULL
  7197. +extern void __raise_softirq_irqoff_ksoft(unsigned int nr);
  7198. +#else
  7199. +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr)
  7200. +{
  7201. + __raise_softirq_irqoff(nr);
  7202. +}
  7203. +#endif
  7204. extern void raise_softirq_irqoff(unsigned int nr);
  7205. extern void raise_softirq(unsigned int nr);
  7206. +extern void softirq_check_pending_idle(void);
  7207. DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
  7208. @@ -488,8 +509,9 @@ static inline struct task_struct *this_cpu_ksoftirqd(void)
  7209. to be executed on some cpu at least once after this.
  7210. * If the tasklet is already scheduled, but its execution is still not
  7211. started, it will be executed only once.
  7212. - * If this tasklet is already running on another CPU (or schedule is called
  7213. - from tasklet itself), it is rescheduled for later.
  7214. + * If this tasklet is already running on another CPU, it is rescheduled
  7215. + for later.
  7216. + * Schedule must not be called from the tasklet itself (a lockup occurs)
  7217. * Tasklet is strictly serialized wrt itself, but not
  7218. wrt another tasklets. If client needs some intertask synchronization,
  7219. he makes it with spinlocks.
  7220. @@ -514,27 +536,36 @@ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data }
  7221. enum
  7222. {
  7223. TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */
  7224. - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */
  7225. + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */
  7226. + TASKLET_STATE_PENDING /* Tasklet is pending */
  7227. };
  7228. -#ifdef CONFIG_SMP
  7229. +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED)
  7230. +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN)
  7231. +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING)
  7232. +
  7233. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  7234. static inline int tasklet_trylock(struct tasklet_struct *t)
  7235. {
  7236. return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
  7237. }
  7238. +static inline int tasklet_tryunlock(struct tasklet_struct *t)
  7239. +{
  7240. + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN;
  7241. +}
  7242. +
  7243. static inline void tasklet_unlock(struct tasklet_struct *t)
  7244. {
  7245. smp_mb__before_atomic();
  7246. clear_bit(TASKLET_STATE_RUN, &(t)->state);
  7247. }
  7248. -static inline void tasklet_unlock_wait(struct tasklet_struct *t)
  7249. -{
  7250. - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
  7251. -}
  7252. +extern void tasklet_unlock_wait(struct tasklet_struct *t);
  7253. +
  7254. #else
  7255. #define tasklet_trylock(t) 1
  7256. +#define tasklet_tryunlock(t) 1
  7257. #define tasklet_unlock_wait(t) do { } while (0)
  7258. #define tasklet_unlock(t) do { } while (0)
  7259. #endif
  7260. @@ -583,12 +614,7 @@ static inline void tasklet_disable(struct tasklet_struct *t)
  7261. smp_mb();
  7262. }
  7263. -static inline void tasklet_enable(struct tasklet_struct *t)
  7264. -{
  7265. - smp_mb__before_atomic();
  7266. - atomic_dec(&t->count);
  7267. -}
  7268. -
  7269. +extern void tasklet_enable(struct tasklet_struct *t);
  7270. extern void tasklet_kill(struct tasklet_struct *t);
  7271. extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
  7272. extern void tasklet_init(struct tasklet_struct *t,
  7273. @@ -619,6 +645,12 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer)
  7274. tasklet_kill(&ttimer->tasklet);
  7275. }
  7276. +#ifdef CONFIG_PREEMPT_RT_FULL
  7277. +extern void softirq_early_init(void);
  7278. +#else
  7279. +static inline void softirq_early_init(void) { }
  7280. +#endif
  7281. +
  7282. /*
  7283. * Autoprobing for irqs:
  7284. *
  7285. diff --git a/include/linux/irq.h b/include/linux/irq.h
  7286. index c4de62348ff2..7432b5e3d79c 100644
  7287. --- a/include/linux/irq.h
  7288. +++ b/include/linux/irq.h
  7289. @@ -72,6 +72,7 @@ enum irqchip_irq_state;
  7290. * IRQ_IS_POLLED - Always polled by another interrupt. Exclude
  7291. * it from the spurious interrupt detection
  7292. * mechanism and from core side polling.
  7293. + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT)
  7294. * IRQ_DISABLE_UNLAZY - Disable lazy irq disable
  7295. */
  7296. enum {
  7297. @@ -99,13 +100,14 @@ enum {
  7298. IRQ_PER_CPU_DEVID = (1 << 17),
  7299. IRQ_IS_POLLED = (1 << 18),
  7300. IRQ_DISABLE_UNLAZY = (1 << 19),
  7301. + IRQ_NO_SOFTIRQ_CALL = (1 << 20),
  7302. };
  7303. #define IRQF_MODIFY_MASK \
  7304. (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
  7305. IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
  7306. IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
  7307. - IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY)
  7308. + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_NO_SOFTIRQ_CALL)
  7309. #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
  7310. diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
  7311. index 47b9ebd4a74f..2543aab05daa 100644
  7312. --- a/include/linux/irq_work.h
  7313. +++ b/include/linux/irq_work.h
  7314. @@ -16,6 +16,7 @@
  7315. #define IRQ_WORK_BUSY 2UL
  7316. #define IRQ_WORK_FLAGS 3UL
  7317. #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */
  7318. +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */
  7319. struct irq_work {
  7320. unsigned long flags;
  7321. @@ -51,4 +52,10 @@ static inline bool irq_work_needs_cpu(void) { return false; }
  7322. static inline void irq_work_run(void) { }
  7323. #endif
  7324. +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
  7325. +void irq_work_tick_soft(void);
  7326. +#else
  7327. +static inline void irq_work_tick_soft(void) { }
  7328. +#endif
  7329. +
  7330. #endif /* _LINUX_IRQ_WORK_H */
  7331. diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
  7332. index dcca77c4b9d2..edb6573c9e0e 100644
  7333. --- a/include/linux/irqdesc.h
  7334. +++ b/include/linux/irqdesc.h
  7335. @@ -64,6 +64,7 @@ struct irq_desc {
  7336. unsigned int irqs_unhandled;
  7337. atomic_t threads_handled;
  7338. int threads_handled_last;
  7339. + u64 random_ip;
  7340. raw_spinlock_t lock;
  7341. struct cpumask *percpu_enabled;
  7342. #ifdef CONFIG_SMP
  7343. diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
  7344. index 5dd1272d1ab2..9b77034f7c5e 100644
  7345. --- a/include/linux/irqflags.h
  7346. +++ b/include/linux/irqflags.h
  7347. @@ -25,8 +25,6 @@
  7348. # define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
  7349. # define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
  7350. # define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
  7351. -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
  7352. -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
  7353. # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
  7354. #else
  7355. # define trace_hardirqs_on() do { } while (0)
  7356. @@ -39,9 +37,15 @@
  7357. # define trace_softirqs_enabled(p) 0
  7358. # define trace_hardirq_enter() do { } while (0)
  7359. # define trace_hardirq_exit() do { } while (0)
  7360. +# define INIT_TRACE_IRQFLAGS
  7361. +#endif
  7362. +
  7363. +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL)
  7364. +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
  7365. +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
  7366. +#else
  7367. # define lockdep_softirq_enter() do { } while (0)
  7368. # define lockdep_softirq_exit() do { } while (0)
  7369. -# define INIT_TRACE_IRQFLAGS
  7370. #endif
  7371. #if defined(CONFIG_IRQSOFF_TRACER) || \
  7372. @@ -148,4 +152,23 @@
  7373. #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
  7374. +/*
  7375. + * local_irq* variants depending on RT/!RT
  7376. + */
  7377. +#ifdef CONFIG_PREEMPT_RT_FULL
  7378. +# define local_irq_disable_nort() do { } while (0)
  7379. +# define local_irq_enable_nort() do { } while (0)
  7380. +# define local_irq_save_nort(flags) local_save_flags(flags)
  7381. +# define local_irq_restore_nort(flags) (void)(flags)
  7382. +# define local_irq_disable_rt() local_irq_disable()
  7383. +# define local_irq_enable_rt() local_irq_enable()
  7384. +#else
  7385. +# define local_irq_disable_nort() local_irq_disable()
  7386. +# define local_irq_enable_nort() local_irq_enable()
  7387. +# define local_irq_save_nort(flags) local_irq_save(flags)
  7388. +# define local_irq_restore_nort(flags) local_irq_restore(flags)
  7389. +# define local_irq_disable_rt() do { } while (0)
  7390. +# define local_irq_enable_rt() do { } while (0)
  7391. +#endif
  7392. +
  7393. #endif
  7394. diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
  7395. index fd1083c46c61..1485e18b9c3b 100644
  7396. --- a/include/linux/jbd2.h
  7397. +++ b/include/linux/jbd2.h
  7398. @@ -347,32 +347,56 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh)
  7399. static inline void jbd_lock_bh_state(struct buffer_head *bh)
  7400. {
  7401. +#ifndef CONFIG_PREEMPT_RT_BASE
  7402. bit_spin_lock(BH_State, &bh->b_state);
  7403. +#else
  7404. + spin_lock(&bh->b_state_lock);
  7405. +#endif
  7406. }
  7407. static inline int jbd_trylock_bh_state(struct buffer_head *bh)
  7408. {
  7409. +#ifndef CONFIG_PREEMPT_RT_BASE
  7410. return bit_spin_trylock(BH_State, &bh->b_state);
  7411. +#else
  7412. + return spin_trylock(&bh->b_state_lock);
  7413. +#endif
  7414. }
  7415. static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
  7416. {
  7417. +#ifndef CONFIG_PREEMPT_RT_BASE
  7418. return bit_spin_is_locked(BH_State, &bh->b_state);
  7419. +#else
  7420. + return spin_is_locked(&bh->b_state_lock);
  7421. +#endif
  7422. }
  7423. static inline void jbd_unlock_bh_state(struct buffer_head *bh)
  7424. {
  7425. +#ifndef CONFIG_PREEMPT_RT_BASE
  7426. bit_spin_unlock(BH_State, &bh->b_state);
  7427. +#else
  7428. + spin_unlock(&bh->b_state_lock);
  7429. +#endif
  7430. }
  7431. static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
  7432. {
  7433. +#ifndef CONFIG_PREEMPT_RT_BASE
  7434. bit_spin_lock(BH_JournalHead, &bh->b_state);
  7435. +#else
  7436. + spin_lock(&bh->b_journal_head_lock);
  7437. +#endif
  7438. }
  7439. static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
  7440. {
  7441. +#ifndef CONFIG_PREEMPT_RT_BASE
  7442. bit_spin_unlock(BH_JournalHead, &bh->b_state);
  7443. +#else
  7444. + spin_unlock(&bh->b_journal_head_lock);
  7445. +#endif
  7446. }
  7447. #define J_ASSERT(assert) BUG_ON(!(assert))
  7448. diff --git a/include/linux/kdb.h b/include/linux/kdb.h
  7449. index a19bcf9e762e..897495386446 100644
  7450. --- a/include/linux/kdb.h
  7451. +++ b/include/linux/kdb.h
  7452. @@ -167,6 +167,7 @@ extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt,
  7453. extern __printf(1, 2) int kdb_printf(const char *, ...);
  7454. typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
  7455. +#define in_kdb_printk() (kdb_trap_printk)
  7456. extern void kdb_init(int level);
  7457. /* Access to kdb specific polling devices */
  7458. @@ -201,6 +202,7 @@ extern int kdb_register_flags(char *, kdb_func_t, char *, char *,
  7459. extern int kdb_unregister(char *);
  7460. #else /* ! CONFIG_KGDB_KDB */
  7461. static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
  7462. +#define in_kdb_printk() (0)
  7463. static inline void kdb_init(int level) {}
  7464. static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
  7465. char *help, short minlen) { return 0; }
  7466. diff --git a/include/linux/kernel.h b/include/linux/kernel.h
  7467. index 2f7775e229b0..a7f8e266ab50 100644
  7468. --- a/include/linux/kernel.h
  7469. +++ b/include/linux/kernel.h
  7470. @@ -188,6 +188,9 @@ extern int _cond_resched(void);
  7471. */
  7472. # define might_sleep() \
  7473. do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
  7474. +
  7475. +# define might_sleep_no_state_check() \
  7476. + do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
  7477. # define sched_annotate_sleep() (current->task_state_change = 0)
  7478. #else
  7479. static inline void ___might_sleep(const char *file, int line,
  7480. @@ -195,6 +198,7 @@ extern int _cond_resched(void);
  7481. static inline void __might_sleep(const char *file, int line,
  7482. int preempt_offset) { }
  7483. # define might_sleep() do { might_resched(); } while (0)
  7484. +# define might_sleep_no_state_check() do { might_resched(); } while (0)
  7485. # define sched_annotate_sleep() do { } while (0)
  7486. #endif
  7487. @@ -484,6 +488,7 @@ extern enum system_states {
  7488. SYSTEM_HALT,
  7489. SYSTEM_POWER_OFF,
  7490. SYSTEM_RESTART,
  7491. + SYSTEM_SUSPEND,
  7492. } system_state;
  7493. #define TAINT_PROPRIETARY_MODULE 0
  7494. diff --git a/include/linux/lglock.h b/include/linux/lglock.h
  7495. index c92ebd100d9b..6f035f635d0e 100644
  7496. --- a/include/linux/lglock.h
  7497. +++ b/include/linux/lglock.h
  7498. @@ -34,13 +34,30 @@
  7499. #endif
  7500. struct lglock {
  7501. +#ifdef CONFIG_PREEMPT_RT_FULL
  7502. + struct rt_mutex __percpu *lock;
  7503. +#else
  7504. arch_spinlock_t __percpu *lock;
  7505. +#endif
  7506. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  7507. struct lock_class_key lock_key;
  7508. struct lockdep_map lock_dep_map;
  7509. #endif
  7510. };
  7511. +#ifdef CONFIG_PREEMPT_RT_FULL
  7512. +# define DEFINE_LGLOCK(name) \
  7513. + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \
  7514. + = __RT_MUTEX_INITIALIZER( name ## _lock); \
  7515. + struct lglock name = { .lock = &name ## _lock }
  7516. +
  7517. +# define DEFINE_STATIC_LGLOCK(name) \
  7518. + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \
  7519. + = __RT_MUTEX_INITIALIZER( name ## _lock); \
  7520. + static struct lglock name = { .lock = &name ## _lock }
  7521. +
  7522. +#else
  7523. +
  7524. #define DEFINE_LGLOCK(name) \
  7525. static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
  7526. = __ARCH_SPIN_LOCK_UNLOCKED; \
  7527. @@ -50,6 +67,7 @@ struct lglock {
  7528. static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
  7529. = __ARCH_SPIN_LOCK_UNLOCKED; \
  7530. static struct lglock name = { .lock = &name ## _lock }
  7531. +#endif
  7532. void lg_lock_init(struct lglock *lg, char *name);
  7533. @@ -64,6 +82,12 @@ void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2);
  7534. void lg_global_lock(struct lglock *lg);
  7535. void lg_global_unlock(struct lglock *lg);
  7536. +#ifndef CONFIG_PREEMPT_RT_FULL
  7537. +#define lg_global_trylock_relax(name) lg_global_lock(name)
  7538. +#else
  7539. +void lg_global_trylock_relax(struct lglock *lg);
  7540. +#endif
  7541. +
  7542. #else
  7543. /* When !CONFIG_SMP, map lglock to spinlock */
  7544. #define lglock spinlock
  7545. diff --git a/include/linux/list.h b/include/linux/list.h
  7546. index 5356f4d661a7..3df0783a25e4 100644
  7547. --- a/include/linux/list.h
  7548. +++ b/include/linux/list.h
  7549. @@ -679,6 +679,16 @@ static inline bool hlist_fake(struct hlist_node *h)
  7550. }
  7551. /*
  7552. + * Check whether the node is the only node of the head without
  7553. + * accessing head.
  7554. + */
  7555. +static inline bool hlist_is_singular_node(struct hlist_node *n,
  7556. + struct hlist_head *h)
  7557. +{
  7558. + return !n->next && n->pprev == &h->first;
  7559. +}
  7560. +
  7561. +/*
  7562. * Move a list from one list head to another. Fixup the pprev
  7563. * reference of the first entry if it exists.
  7564. */
  7565. diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
  7566. index cb483305e1f5..4e5062316bb6 100644
  7567. --- a/include/linux/list_bl.h
  7568. +++ b/include/linux/list_bl.h
  7569. @@ -2,6 +2,7 @@
  7570. #define _LINUX_LIST_BL_H
  7571. #include <linux/list.h>
  7572. +#include <linux/spinlock.h>
  7573. #include <linux/bit_spinlock.h>
  7574. /*
  7575. @@ -32,13 +33,24 @@
  7576. struct hlist_bl_head {
  7577. struct hlist_bl_node *first;
  7578. +#ifdef CONFIG_PREEMPT_RT_BASE
  7579. + raw_spinlock_t lock;
  7580. +#endif
  7581. };
  7582. struct hlist_bl_node {
  7583. struct hlist_bl_node *next, **pprev;
  7584. };
  7585. -#define INIT_HLIST_BL_HEAD(ptr) \
  7586. - ((ptr)->first = NULL)
  7587. +
  7588. +#ifdef CONFIG_PREEMPT_RT_BASE
  7589. +#define INIT_HLIST_BL_HEAD(h) \
  7590. +do { \
  7591. + (h)->first = NULL; \
  7592. + raw_spin_lock_init(&(h)->lock); \
  7593. +} while (0)
  7594. +#else
  7595. +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL
  7596. +#endif
  7597. static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
  7598. {
  7599. @@ -118,12 +130,26 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n)
  7600. static inline void hlist_bl_lock(struct hlist_bl_head *b)
  7601. {
  7602. +#ifndef CONFIG_PREEMPT_RT_BASE
  7603. bit_spin_lock(0, (unsigned long *)b);
  7604. +#else
  7605. + raw_spin_lock(&b->lock);
  7606. +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  7607. + __set_bit(0, (unsigned long *)b);
  7608. +#endif
  7609. +#endif
  7610. }
  7611. static inline void hlist_bl_unlock(struct hlist_bl_head *b)
  7612. {
  7613. +#ifndef CONFIG_PREEMPT_RT_BASE
  7614. __bit_spin_unlock(0, (unsigned long *)b);
  7615. +#else
  7616. +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  7617. + __clear_bit(0, (unsigned long *)b);
  7618. +#endif
  7619. + raw_spin_unlock(&b->lock);
  7620. +#endif
  7621. }
  7622. static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
  7623. diff --git a/include/linux/locallock.h b/include/linux/locallock.h
  7624. new file mode 100644
  7625. index 000000000000..845c77f1a5ca
  7626. --- /dev/null
  7627. +++ b/include/linux/locallock.h
  7628. @@ -0,0 +1,278 @@
  7629. +#ifndef _LINUX_LOCALLOCK_H
  7630. +#define _LINUX_LOCALLOCK_H
  7631. +
  7632. +#include <linux/percpu.h>
  7633. +#include <linux/spinlock.h>
  7634. +
  7635. +#ifdef CONFIG_PREEMPT_RT_BASE
  7636. +
  7637. +#ifdef CONFIG_DEBUG_SPINLOCK
  7638. +# define LL_WARN(cond) WARN_ON(cond)
  7639. +#else
  7640. +# define LL_WARN(cond) do { } while (0)
  7641. +#endif
  7642. +
  7643. +/*
  7644. + * per cpu lock based substitute for local_irq_*()
  7645. + */
  7646. +struct local_irq_lock {
  7647. + spinlock_t lock;
  7648. + struct task_struct *owner;
  7649. + int nestcnt;
  7650. + unsigned long flags;
  7651. +};
  7652. +
  7653. +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \
  7654. + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \
  7655. + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
  7656. +
  7657. +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \
  7658. + DECLARE_PER_CPU(struct local_irq_lock, lvar)
  7659. +
  7660. +#define local_irq_lock_init(lvar) \
  7661. + do { \
  7662. + int __cpu; \
  7663. + for_each_possible_cpu(__cpu) \
  7664. + spin_lock_init(&per_cpu(lvar, __cpu).lock); \
  7665. + } while (0)
  7666. +
  7667. +/*
  7668. + * spin_lock|trylock|unlock_local flavour that does not migrate disable
  7669. + * used for __local_lock|trylock|unlock where get_local_var/put_local_var
  7670. + * already takes care of the migrate_disable/enable
  7671. + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls.
  7672. + */
  7673. +#ifdef CONFIG_PREEMPT_RT_FULL
  7674. +# define spin_lock_local(lock) rt_spin_lock__no_mg(lock)
  7675. +# define spin_trylock_local(lock) rt_spin_trylock__no_mg(lock)
  7676. +# define spin_unlock_local(lock) rt_spin_unlock__no_mg(lock)
  7677. +#else
  7678. +# define spin_lock_local(lock) spin_lock(lock)
  7679. +# define spin_trylock_local(lock) spin_trylock(lock)
  7680. +# define spin_unlock_local(lock) spin_unlock(lock)
  7681. +#endif
  7682. +
  7683. +static inline void __local_lock(struct local_irq_lock *lv)
  7684. +{
  7685. + if (lv->owner != current) {
  7686. + spin_lock_local(&lv->lock);
  7687. + LL_WARN(lv->owner);
  7688. + LL_WARN(lv->nestcnt);
  7689. + lv->owner = current;
  7690. + }
  7691. + lv->nestcnt++;
  7692. +}
  7693. +
  7694. +#define local_lock(lvar) \
  7695. + do { __local_lock(&get_local_var(lvar)); } while (0)
  7696. +
  7697. +#define local_lock_on(lvar, cpu) \
  7698. + do { __local_lock(&per_cpu(lvar, cpu)); } while (0)
  7699. +
  7700. +static inline int __local_trylock(struct local_irq_lock *lv)
  7701. +{
  7702. + if (lv->owner != current && spin_trylock_local(&lv->lock)) {
  7703. + LL_WARN(lv->owner);
  7704. + LL_WARN(lv->nestcnt);
  7705. + lv->owner = current;
  7706. + lv->nestcnt = 1;
  7707. + return 1;
  7708. + }
  7709. + return 0;
  7710. +}
  7711. +
  7712. +#define local_trylock(lvar) \
  7713. + ({ \
  7714. + int __locked; \
  7715. + __locked = __local_trylock(&get_local_var(lvar)); \
  7716. + if (!__locked) \
  7717. + put_local_var(lvar); \
  7718. + __locked; \
  7719. + })
  7720. +
  7721. +static inline void __local_unlock(struct local_irq_lock *lv)
  7722. +{
  7723. + LL_WARN(lv->nestcnt == 0);
  7724. + LL_WARN(lv->owner != current);
  7725. + if (--lv->nestcnt)
  7726. + return;
  7727. +
  7728. + lv->owner = NULL;
  7729. + spin_unlock_local(&lv->lock);
  7730. +}
  7731. +
  7732. +#define local_unlock(lvar) \
  7733. + do { \
  7734. + __local_unlock(this_cpu_ptr(&lvar)); \
  7735. + put_local_var(lvar); \
  7736. + } while (0)
  7737. +
  7738. +#define local_unlock_on(lvar, cpu) \
  7739. + do { __local_unlock(&per_cpu(lvar, cpu)); } while (0)
  7740. +
  7741. +static inline void __local_lock_irq(struct local_irq_lock *lv)
  7742. +{
  7743. + spin_lock_irqsave(&lv->lock, lv->flags);
  7744. + LL_WARN(lv->owner);
  7745. + LL_WARN(lv->nestcnt);
  7746. + lv->owner = current;
  7747. + lv->nestcnt = 1;
  7748. +}
  7749. +
  7750. +#define local_lock_irq(lvar) \
  7751. + do { __local_lock_irq(&get_local_var(lvar)); } while (0)
  7752. +
  7753. +#define local_lock_irq_on(lvar, cpu) \
  7754. + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0)
  7755. +
  7756. +static inline void __local_unlock_irq(struct local_irq_lock *lv)
  7757. +{
  7758. + LL_WARN(!lv->nestcnt);
  7759. + LL_WARN(lv->owner != current);
  7760. + lv->owner = NULL;
  7761. + lv->nestcnt = 0;
  7762. + spin_unlock_irq(&lv->lock);
  7763. +}
  7764. +
  7765. +#define local_unlock_irq(lvar) \
  7766. + do { \
  7767. + __local_unlock_irq(this_cpu_ptr(&lvar)); \
  7768. + put_local_var(lvar); \
  7769. + } while (0)
  7770. +
  7771. +#define local_unlock_irq_on(lvar, cpu) \
  7772. + do { \
  7773. + __local_unlock_irq(&per_cpu(lvar, cpu)); \
  7774. + } while (0)
  7775. +
  7776. +static inline int __local_lock_irqsave(struct local_irq_lock *lv)
  7777. +{
  7778. + if (lv->owner != current) {
  7779. + __local_lock_irq(lv);
  7780. + return 0;
  7781. + } else {
  7782. + lv->nestcnt++;
  7783. + return 1;
  7784. + }
  7785. +}
  7786. +
  7787. +#define local_lock_irqsave(lvar, _flags) \
  7788. + do { \
  7789. + if (__local_lock_irqsave(&get_local_var(lvar))) \
  7790. + put_local_var(lvar); \
  7791. + _flags = __this_cpu_read(lvar.flags); \
  7792. + } while (0)
  7793. +
  7794. +#define local_lock_irqsave_on(lvar, _flags, cpu) \
  7795. + do { \
  7796. + __local_lock_irqsave(&per_cpu(lvar, cpu)); \
  7797. + _flags = per_cpu(lvar, cpu).flags; \
  7798. + } while (0)
  7799. +
  7800. +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
  7801. + unsigned long flags)
  7802. +{
  7803. + LL_WARN(!lv->nestcnt);
  7804. + LL_WARN(lv->owner != current);
  7805. + if (--lv->nestcnt)
  7806. + return 0;
  7807. +
  7808. + lv->owner = NULL;
  7809. + spin_unlock_irqrestore(&lv->lock, lv->flags);
  7810. + return 1;
  7811. +}
  7812. +
  7813. +#define local_unlock_irqrestore(lvar, flags) \
  7814. + do { \
  7815. + if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \
  7816. + put_local_var(lvar); \
  7817. + } while (0)
  7818. +
  7819. +#define local_unlock_irqrestore_on(lvar, flags, cpu) \
  7820. + do { \
  7821. + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \
  7822. + } while (0)
  7823. +
  7824. +#define local_spin_trylock_irq(lvar, lock) \
  7825. + ({ \
  7826. + int __locked; \
  7827. + local_lock_irq(lvar); \
  7828. + __locked = spin_trylock(lock); \
  7829. + if (!__locked) \
  7830. + local_unlock_irq(lvar); \
  7831. + __locked; \
  7832. + })
  7833. +
  7834. +#define local_spin_lock_irq(lvar, lock) \
  7835. + do { \
  7836. + local_lock_irq(lvar); \
  7837. + spin_lock(lock); \
  7838. + } while (0)
  7839. +
  7840. +#define local_spin_unlock_irq(lvar, lock) \
  7841. + do { \
  7842. + spin_unlock(lock); \
  7843. + local_unlock_irq(lvar); \
  7844. + } while (0)
  7845. +
  7846. +#define local_spin_lock_irqsave(lvar, lock, flags) \
  7847. + do { \
  7848. + local_lock_irqsave(lvar, flags); \
  7849. + spin_lock(lock); \
  7850. + } while (0)
  7851. +
  7852. +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
  7853. + do { \
  7854. + spin_unlock(lock); \
  7855. + local_unlock_irqrestore(lvar, flags); \
  7856. + } while (0)
  7857. +
  7858. +#define get_locked_var(lvar, var) \
  7859. + (*({ \
  7860. + local_lock(lvar); \
  7861. + this_cpu_ptr(&var); \
  7862. + }))
  7863. +
  7864. +#define put_locked_var(lvar, var) local_unlock(lvar);
  7865. +
  7866. +#define local_lock_cpu(lvar) \
  7867. + ({ \
  7868. + local_lock(lvar); \
  7869. + smp_processor_id(); \
  7870. + })
  7871. +
  7872. +#define local_unlock_cpu(lvar) local_unlock(lvar)
  7873. +
  7874. +#else /* PREEMPT_RT_BASE */
  7875. +
  7876. +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar
  7877. +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar
  7878. +
  7879. +static inline void local_irq_lock_init(int lvar) { }
  7880. +
  7881. +#define local_lock(lvar) preempt_disable()
  7882. +#define local_unlock(lvar) preempt_enable()
  7883. +#define local_lock_irq(lvar) local_irq_disable()
  7884. +#define local_lock_irq_on(lvar, cpu) local_irq_disable()
  7885. +#define local_unlock_irq(lvar) local_irq_enable()
  7886. +#define local_unlock_irq_on(lvar, cpu) local_irq_enable()
  7887. +#define local_lock_irqsave(lvar, flags) local_irq_save(flags)
  7888. +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags)
  7889. +
  7890. +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock)
  7891. +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock)
  7892. +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock)
  7893. +#define local_spin_lock_irqsave(lvar, lock, flags) \
  7894. + spin_lock_irqsave(lock, flags)
  7895. +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
  7896. + spin_unlock_irqrestore(lock, flags)
  7897. +
  7898. +#define get_locked_var(lvar, var) get_cpu_var(var)
  7899. +#define put_locked_var(lvar, var) put_cpu_var(var)
  7900. +
  7901. +#define local_lock_cpu(lvar) get_cpu()
  7902. +#define local_unlock_cpu(lvar) put_cpu()
  7903. +
  7904. +#endif
  7905. +
  7906. +#endif
  7907. diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
  7908. index c2d75b4fa86c..8349504e0bbe 100644
  7909. --- a/include/linux/mm_types.h
  7910. +++ b/include/linux/mm_types.h
  7911. @@ -11,6 +11,7 @@
  7912. #include <linux/completion.h>
  7913. #include <linux/cpumask.h>
  7914. #include <linux/uprobes.h>
  7915. +#include <linux/rcupdate.h>
  7916. #include <linux/page-flags-layout.h>
  7917. #include <asm/page.h>
  7918. #include <asm/mmu.h>
  7919. @@ -502,6 +503,9 @@ struct mm_struct {
  7920. bool tlb_flush_pending;
  7921. #endif
  7922. struct uprobes_state uprobes_state;
  7923. +#ifdef CONFIG_PREEMPT_RT_BASE
  7924. + struct rcu_head delayed_drop;
  7925. +#endif
  7926. #ifdef CONFIG_X86_INTEL_MPX
  7927. /* address of the bounds directory */
  7928. void __user *bd_addr;
  7929. diff --git a/include/linux/mutex.h b/include/linux/mutex.h
  7930. index 2cb7531e7d7a..b3fdfc820216 100644
  7931. --- a/include/linux/mutex.h
  7932. +++ b/include/linux/mutex.h
  7933. @@ -19,6 +19,17 @@
  7934. #include <asm/processor.h>
  7935. #include <linux/osq_lock.h>
  7936. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  7937. +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
  7938. + , .dep_map = { .name = #lockname }
  7939. +#else
  7940. +# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
  7941. +#endif
  7942. +
  7943. +#ifdef CONFIG_PREEMPT_RT_FULL
  7944. +# include <linux/mutex_rt.h>
  7945. +#else
  7946. +
  7947. /*
  7948. * Simple, straightforward mutexes with strict semantics:
  7949. *
  7950. @@ -99,13 +110,6 @@ do { \
  7951. static inline void mutex_destroy(struct mutex *lock) {}
  7952. #endif
  7953. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  7954. -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
  7955. - , .dep_map = { .name = #lockname }
  7956. -#else
  7957. -# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
  7958. -#endif
  7959. -
  7960. #define __MUTEX_INITIALIZER(lockname) \
  7961. { .count = ATOMIC_INIT(1) \
  7962. , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
  7963. @@ -173,6 +177,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
  7964. extern int mutex_trylock(struct mutex *lock);
  7965. extern void mutex_unlock(struct mutex *lock);
  7966. +#endif /* !PREEMPT_RT_FULL */
  7967. +
  7968. extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
  7969. #endif /* __LINUX_MUTEX_H */
  7970. diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h
  7971. new file mode 100644
  7972. index 000000000000..c38a44b14da5
  7973. --- /dev/null
  7974. +++ b/include/linux/mutex_rt.h
  7975. @@ -0,0 +1,84 @@
  7976. +#ifndef __LINUX_MUTEX_RT_H
  7977. +#define __LINUX_MUTEX_RT_H
  7978. +
  7979. +#ifndef __LINUX_MUTEX_H
  7980. +#error "Please include mutex.h"
  7981. +#endif
  7982. +
  7983. +#include <linux/rtmutex.h>
  7984. +
  7985. +/* FIXME: Just for __lockfunc */
  7986. +#include <linux/spinlock.h>
  7987. +
  7988. +struct mutex {
  7989. + struct rt_mutex lock;
  7990. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  7991. + struct lockdep_map dep_map;
  7992. +#endif
  7993. +};
  7994. +
  7995. +#define __MUTEX_INITIALIZER(mutexname) \
  7996. + { \
  7997. + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \
  7998. + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \
  7999. + }
  8000. +
  8001. +#define DEFINE_MUTEX(mutexname) \
  8002. + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
  8003. +
  8004. +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
  8005. +extern void __lockfunc _mutex_lock(struct mutex *lock);
  8006. +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
  8007. +extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
  8008. +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass);
  8009. +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
  8010. +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass);
  8011. +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass);
  8012. +extern int __lockfunc _mutex_trylock(struct mutex *lock);
  8013. +extern void __lockfunc _mutex_unlock(struct mutex *lock);
  8014. +
  8015. +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock)
  8016. +#define mutex_lock(l) _mutex_lock(l)
  8017. +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l)
  8018. +#define mutex_lock_killable(l) _mutex_lock_killable(l)
  8019. +#define mutex_trylock(l) _mutex_trylock(l)
  8020. +#define mutex_unlock(l) _mutex_unlock(l)
  8021. +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock)
  8022. +
  8023. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8024. +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s)
  8025. +# define mutex_lock_interruptible_nested(l, s) \
  8026. + _mutex_lock_interruptible_nested(l, s)
  8027. +# define mutex_lock_killable_nested(l, s) \
  8028. + _mutex_lock_killable_nested(l, s)
  8029. +
  8030. +# define mutex_lock_nest_lock(lock, nest_lock) \
  8031. +do { \
  8032. + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
  8033. + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
  8034. +} while (0)
  8035. +
  8036. +#else
  8037. +# define mutex_lock_nested(l, s) _mutex_lock(l)
  8038. +# define mutex_lock_interruptible_nested(l, s) \
  8039. + _mutex_lock_interruptible(l)
  8040. +# define mutex_lock_killable_nested(l, s) \
  8041. + _mutex_lock_killable(l)
  8042. +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
  8043. +#endif
  8044. +
  8045. +# define mutex_init(mutex) \
  8046. +do { \
  8047. + static struct lock_class_key __key; \
  8048. + \
  8049. + rt_mutex_init(&(mutex)->lock); \
  8050. + __mutex_do_init((mutex), #mutex, &__key); \
  8051. +} while (0)
  8052. +
  8053. +# define __mutex_init(mutex, name, key) \
  8054. +do { \
  8055. + rt_mutex_init(&(mutex)->lock); \
  8056. + __mutex_do_init((mutex), name, key); \
  8057. +} while (0)
  8058. +
  8059. +#endif
  8060. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
  8061. index 78181a88903b..06863e4f1172 100644
  8062. --- a/include/linux/netdevice.h
  8063. +++ b/include/linux/netdevice.h
  8064. @@ -2396,11 +2396,20 @@ void netdev_freemem(struct net_device *dev);
  8065. void synchronize_net(void);
  8066. int init_dummy_netdev(struct net_device *dev);
  8067. +#ifdef CONFIG_PREEMPT_RT_FULL
  8068. +static inline int dev_recursion_level(void)
  8069. +{
  8070. + return current->xmit_recursion;
  8071. +}
  8072. +
  8073. +#else
  8074. +
  8075. DECLARE_PER_CPU(int, xmit_recursion);
  8076. static inline int dev_recursion_level(void)
  8077. {
  8078. return this_cpu_read(xmit_recursion);
  8079. }
  8080. +#endif
  8081. struct net_device *dev_get_by_index(struct net *net, int ifindex);
  8082. struct net_device *__dev_get_by_index(struct net *net, int ifindex);
  8083. @@ -2776,6 +2785,7 @@ struct softnet_data {
  8084. unsigned int dropped;
  8085. struct sk_buff_head input_pkt_queue;
  8086. struct napi_struct backlog;
  8087. + struct sk_buff_head tofree_queue;
  8088. };
  8089. diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
  8090. index 4dd9306c9d56..6b13e8fd0aba 100644
  8091. --- a/include/linux/netfilter/x_tables.h
  8092. +++ b/include/linux/netfilter/x_tables.h
  8093. @@ -4,6 +4,7 @@
  8094. #include <linux/netdevice.h>
  8095. #include <linux/static_key.h>
  8096. +#include <linux/locallock.h>
  8097. #include <uapi/linux/netfilter/x_tables.h>
  8098. /**
  8099. @@ -292,6 +293,8 @@ void xt_free_table_info(struct xt_table_info *info);
  8100. */
  8101. DECLARE_PER_CPU(seqcount_t, xt_recseq);
  8102. +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock);
  8103. +
  8104. /* xt_tee_enabled - true if x_tables needs to handle reentrancy
  8105. *
  8106. * Enabled if current ip(6)tables ruleset has at least one -j TEE rule.
  8107. @@ -312,6 +315,9 @@ static inline unsigned int xt_write_recseq_begin(void)
  8108. {
  8109. unsigned int addend;
  8110. + /* RT protection */
  8111. + local_lock(xt_write_lock);
  8112. +
  8113. /*
  8114. * Low order bit of sequence is set if we already
  8115. * called xt_write_recseq_begin().
  8116. @@ -342,6 +348,7 @@ static inline void xt_write_recseq_end(unsigned int addend)
  8117. /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
  8118. smp_wmb();
  8119. __this_cpu_add(xt_recseq.sequence, addend);
  8120. + local_unlock(xt_write_lock);
  8121. }
  8122. /*
  8123. diff --git a/include/linux/notifier.h b/include/linux/notifier.h
  8124. index 4149868de4e6..babe5b9bcb91 100644
  8125. --- a/include/linux/notifier.h
  8126. +++ b/include/linux/notifier.h
  8127. @@ -6,7 +6,7 @@
  8128. *
  8129. * Alan Cox <Alan.Cox@linux.org>
  8130. */
  8131. -
  8132. +
  8133. #ifndef _LINUX_NOTIFIER_H
  8134. #define _LINUX_NOTIFIER_H
  8135. #include <linux/errno.h>
  8136. @@ -42,9 +42,7 @@
  8137. * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
  8138. * As compensation, srcu_notifier_chain_unregister() is rather expensive.
  8139. * SRCU notifier chains should be used when the chain will be called very
  8140. - * often but notifier_blocks will seldom be removed. Also, SRCU notifier
  8141. - * chains are slightly more difficult to use because they require special
  8142. - * runtime initialization.
  8143. + * often but notifier_blocks will seldom be removed.
  8144. */
  8145. struct notifier_block;
  8146. @@ -90,7 +88,7 @@ struct srcu_notifier_head {
  8147. (name)->head = NULL; \
  8148. } while (0)
  8149. -/* srcu_notifier_heads must be initialized and cleaned up dynamically */
  8150. +/* srcu_notifier_heads must be cleaned up dynamically */
  8151. extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
  8152. #define srcu_cleanup_notifier_head(name) \
  8153. cleanup_srcu_struct(&(name)->srcu);
  8154. @@ -103,7 +101,13 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
  8155. .head = NULL }
  8156. #define RAW_NOTIFIER_INIT(name) { \
  8157. .head = NULL }
  8158. -/* srcu_notifier_heads cannot be initialized statically */
  8159. +
  8160. +#define SRCU_NOTIFIER_INIT(name, pcpu) \
  8161. + { \
  8162. + .mutex = __MUTEX_INITIALIZER(name.mutex), \
  8163. + .head = NULL, \
  8164. + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \
  8165. + }
  8166. #define ATOMIC_NOTIFIER_HEAD(name) \
  8167. struct atomic_notifier_head name = \
  8168. @@ -115,6 +119,18 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
  8169. struct raw_notifier_head name = \
  8170. RAW_NOTIFIER_INIT(name)
  8171. +#define _SRCU_NOTIFIER_HEAD(name, mod) \
  8172. + static DEFINE_PER_CPU(struct srcu_struct_array, \
  8173. + name##_head_srcu_array); \
  8174. + mod struct srcu_notifier_head name = \
  8175. + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array)
  8176. +
  8177. +#define SRCU_NOTIFIER_HEAD(name) \
  8178. + _SRCU_NOTIFIER_HEAD(name, )
  8179. +
  8180. +#define SRCU_NOTIFIER_HEAD_STATIC(name) \
  8181. + _SRCU_NOTIFIER_HEAD(name, static)
  8182. +
  8183. #ifdef __KERNEL__
  8184. extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
  8185. @@ -184,12 +200,12 @@ static inline int notifier_to_errno(int ret)
  8186. /*
  8187. * Declared notifiers so far. I can imagine quite a few more chains
  8188. - * over time (eg laptop power reset chains, reboot chain (to clean
  8189. + * over time (eg laptop power reset chains, reboot chain (to clean
  8190. * device units up), device [un]mount chain, module load/unload chain,
  8191. - * low memory chain, screenblank chain (for plug in modular screenblankers)
  8192. + * low memory chain, screenblank chain (for plug in modular screenblankers)
  8193. * VC switch chains (for loadable kernel svgalib VC switch helpers) etc...
  8194. */
  8195. -
  8196. +
  8197. /* CPU notfiers are defined in include/linux/cpu.h. */
  8198. /* netdevice notifiers are defined in include/linux/netdevice.h */
  8199. diff --git a/include/linux/percpu.h b/include/linux/percpu.h
  8200. index 4bc6dafb703e..c0b6779d8f67 100644
  8201. --- a/include/linux/percpu.h
  8202. +++ b/include/linux/percpu.h
  8203. @@ -18,6 +18,35 @@
  8204. #define PERCPU_MODULE_RESERVE 0
  8205. #endif
  8206. +#ifdef CONFIG_PREEMPT_RT_FULL
  8207. +
  8208. +#define get_local_var(var) (*({ \
  8209. + migrate_disable(); \
  8210. + this_cpu_ptr(&var); }))
  8211. +
  8212. +#define put_local_var(var) do { \
  8213. + (void)&(var); \
  8214. + migrate_enable(); \
  8215. +} while (0)
  8216. +
  8217. +# define get_local_ptr(var) ({ \
  8218. + migrate_disable(); \
  8219. + this_cpu_ptr(var); })
  8220. +
  8221. +# define put_local_ptr(var) do { \
  8222. + (void)(var); \
  8223. + migrate_enable(); \
  8224. +} while (0)
  8225. +
  8226. +#else
  8227. +
  8228. +#define get_local_var(var) get_cpu_var(var)
  8229. +#define put_local_var(var) put_cpu_var(var)
  8230. +#define get_local_ptr(var) get_cpu_ptr(var)
  8231. +#define put_local_ptr(var) put_cpu_ptr(var)
  8232. +
  8233. +#endif
  8234. +
  8235. /* minimum unit size, also is the maximum supported allocation size */
  8236. #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
  8237. diff --git a/include/linux/pid.h b/include/linux/pid.h
  8238. index 23705a53abba..2cc64b779f03 100644
  8239. --- a/include/linux/pid.h
  8240. +++ b/include/linux/pid.h
  8241. @@ -2,6 +2,7 @@
  8242. #define _LINUX_PID_H
  8243. #include <linux/rcupdate.h>
  8244. +#include <linux/atomic.h>
  8245. enum pid_type
  8246. {
  8247. diff --git a/include/linux/preempt.h b/include/linux/preempt.h
  8248. index 75e4e30677f1..1cfb1cb72354 100644
  8249. --- a/include/linux/preempt.h
  8250. +++ b/include/linux/preempt.h
  8251. @@ -50,7 +50,11 @@
  8252. #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
  8253. #define NMI_OFFSET (1UL << NMI_SHIFT)
  8254. -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
  8255. +#ifndef CONFIG_PREEMPT_RT_FULL
  8256. +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
  8257. +#else
  8258. +# define SOFTIRQ_DISABLE_OFFSET (0)
  8259. +#endif
  8260. /* We use the MSB mostly because its available */
  8261. #define PREEMPT_NEED_RESCHED 0x80000000
  8262. @@ -59,9 +63,15 @@
  8263. #include <asm/preempt.h>
  8264. #define hardirq_count() (preempt_count() & HARDIRQ_MASK)
  8265. -#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
  8266. #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
  8267. | NMI_MASK))
  8268. +#ifndef CONFIG_PREEMPT_RT_FULL
  8269. +# define softirq_count() (preempt_count() & SOFTIRQ_MASK)
  8270. +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
  8271. +#else
  8272. +# define softirq_count() (0UL)
  8273. +extern int in_serving_softirq(void);
  8274. +#endif
  8275. /*
  8276. * Are we doing bottom half or hardware interrupt processing?
  8277. @@ -72,7 +82,6 @@
  8278. #define in_irq() (hardirq_count())
  8279. #define in_softirq() (softirq_count())
  8280. #define in_interrupt() (irq_count())
  8281. -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
  8282. /*
  8283. * Are we in NMI context?
  8284. @@ -91,7 +100,11 @@
  8285. /*
  8286. * The preempt_count offset after spin_lock()
  8287. */
  8288. +#if !defined(CONFIG_PREEMPT_RT_FULL)
  8289. #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
  8290. +#else
  8291. +#define PREEMPT_LOCK_OFFSET 0
  8292. +#endif
  8293. /*
  8294. * The preempt_count offset needed for things like:
  8295. @@ -140,6 +153,20 @@ extern void preempt_count_sub(int val);
  8296. #define preempt_count_inc() preempt_count_add(1)
  8297. #define preempt_count_dec() preempt_count_sub(1)
  8298. +#ifdef CONFIG_PREEMPT_LAZY
  8299. +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
  8300. +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
  8301. +#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
  8302. +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
  8303. +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
  8304. +#else
  8305. +#define add_preempt_lazy_count(val) do { } while (0)
  8306. +#define sub_preempt_lazy_count(val) do { } while (0)
  8307. +#define inc_preempt_lazy_count() do { } while (0)
  8308. +#define dec_preempt_lazy_count() do { } while (0)
  8309. +#define preempt_lazy_count() (0)
  8310. +#endif
  8311. +
  8312. #ifdef CONFIG_PREEMPT_COUNT
  8313. #define preempt_disable() \
  8314. @@ -148,13 +175,25 @@ do { \
  8315. barrier(); \
  8316. } while (0)
  8317. +#define preempt_lazy_disable() \
  8318. +do { \
  8319. + inc_preempt_lazy_count(); \
  8320. + barrier(); \
  8321. +} while (0)
  8322. +
  8323. #define sched_preempt_enable_no_resched() \
  8324. do { \
  8325. barrier(); \
  8326. preempt_count_dec(); \
  8327. } while (0)
  8328. -#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
  8329. +#ifdef CONFIG_PREEMPT_RT_BASE
  8330. +# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
  8331. +# define preempt_check_resched_rt() preempt_check_resched()
  8332. +#else
  8333. +# define preempt_enable_no_resched() preempt_enable()
  8334. +# define preempt_check_resched_rt() barrier();
  8335. +#endif
  8336. #define preemptible() (preempt_count() == 0 && !irqs_disabled())
  8337. @@ -179,6 +218,13 @@ do { \
  8338. __preempt_schedule(); \
  8339. } while (0)
  8340. +#define preempt_lazy_enable() \
  8341. +do { \
  8342. + dec_preempt_lazy_count(); \
  8343. + barrier(); \
  8344. + preempt_check_resched(); \
  8345. +} while (0)
  8346. +
  8347. #else /* !CONFIG_PREEMPT */
  8348. #define preempt_enable() \
  8349. do { \
  8350. @@ -224,6 +270,7 @@ do { \
  8351. #define preempt_disable_notrace() barrier()
  8352. #define preempt_enable_no_resched_notrace() barrier()
  8353. #define preempt_enable_notrace() barrier()
  8354. +#define preempt_check_resched_rt() barrier()
  8355. #define preemptible() 0
  8356. #endif /* CONFIG_PREEMPT_COUNT */
  8357. @@ -244,10 +291,31 @@ do { \
  8358. } while (0)
  8359. #define preempt_fold_need_resched() \
  8360. do { \
  8361. - if (tif_need_resched()) \
  8362. + if (tif_need_resched_now()) \
  8363. set_preempt_need_resched(); \
  8364. } while (0)
  8365. +#ifdef CONFIG_PREEMPT_RT_FULL
  8366. +# define preempt_disable_rt() preempt_disable()
  8367. +# define preempt_enable_rt() preempt_enable()
  8368. +# define preempt_disable_nort() barrier()
  8369. +# define preempt_enable_nort() barrier()
  8370. +# ifdef CONFIG_SMP
  8371. + extern void migrate_disable(void);
  8372. + extern void migrate_enable(void);
  8373. +# else /* CONFIG_SMP */
  8374. +# define migrate_disable() barrier()
  8375. +# define migrate_enable() barrier()
  8376. +# endif /* CONFIG_SMP */
  8377. +#else
  8378. +# define preempt_disable_rt() barrier()
  8379. +# define preempt_enable_rt() barrier()
  8380. +# define preempt_disable_nort() preempt_disable()
  8381. +# define preempt_enable_nort() preempt_enable()
  8382. +# define migrate_disable() preempt_disable()
  8383. +# define migrate_enable() preempt_enable()
  8384. +#endif
  8385. +
  8386. #ifdef CONFIG_PREEMPT_NOTIFIERS
  8387. struct preempt_notifier;
  8388. diff --git a/include/linux/printk.h b/include/linux/printk.h
  8389. index 9ccbdf2c1453..428d6a7fa4e8 100644
  8390. --- a/include/linux/printk.h
  8391. +++ b/include/linux/printk.h
  8392. @@ -117,9 +117,11 @@ do { \
  8393. #ifdef CONFIG_EARLY_PRINTK
  8394. extern asmlinkage __printf(1, 2)
  8395. void early_printk(const char *fmt, ...);
  8396. +extern void printk_kill(void);
  8397. #else
  8398. static inline __printf(1, 2) __cold
  8399. void early_printk(const char *s, ...) { }
  8400. +static inline void printk_kill(void) { }
  8401. #endif
  8402. typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args);
  8403. diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
  8404. index 51a97ac8bfbf..0608483c91fc 100644
  8405. --- a/include/linux/radix-tree.h
  8406. +++ b/include/linux/radix-tree.h
  8407. @@ -294,8 +294,13 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
  8408. unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
  8409. void ***results, unsigned long *indices,
  8410. unsigned long first_index, unsigned int max_items);
  8411. +#ifndef CONFIG_PREEMPT_RT_FULL
  8412. int radix_tree_preload(gfp_t gfp_mask);
  8413. int radix_tree_maybe_preload(gfp_t gfp_mask);
  8414. +#else
  8415. +static inline int radix_tree_preload(gfp_t gm) { return 0; }
  8416. +static inline int radix_tree_maybe_preload(gfp_t gfp_mask) { return 0; }
  8417. +#endif
  8418. void radix_tree_init(void);
  8419. void *radix_tree_tag_set(struct radix_tree_root *root,
  8420. unsigned long index, unsigned int tag);
  8421. @@ -320,7 +325,7 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);
  8422. static inline void radix_tree_preload_end(void)
  8423. {
  8424. - preempt_enable();
  8425. + preempt_enable_nort();
  8426. }
  8427. /**
  8428. diff --git a/include/linux/random.h b/include/linux/random.h
  8429. index 9c29122037f9..e7f2f8604918 100644
  8430. --- a/include/linux/random.h
  8431. +++ b/include/linux/random.h
  8432. @@ -20,7 +20,7 @@ struct random_ready_callback {
  8433. extern void add_device_randomness(const void *, unsigned int);
  8434. extern void add_input_randomness(unsigned int type, unsigned int code,
  8435. unsigned int value);
  8436. -extern void add_interrupt_randomness(int irq, int irq_flags);
  8437. +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip);
  8438. extern void get_random_bytes(void *buf, int nbytes);
  8439. extern int add_random_ready_callback(struct random_ready_callback *rdy);
  8440. diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
  8441. index b6900099ea81..fdc2e95173e0 100644
  8442. --- a/include/linux/rbtree.h
  8443. +++ b/include/linux/rbtree.h
  8444. @@ -31,7 +31,6 @@
  8445. #include <linux/kernel.h>
  8446. #include <linux/stddef.h>
  8447. -#include <linux/rcupdate.h>
  8448. struct rb_node {
  8449. unsigned long __rb_parent_color;
  8450. @@ -86,14 +85,8 @@ static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
  8451. *rb_link = node;
  8452. }
  8453. -static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
  8454. - struct rb_node **rb_link)
  8455. -{
  8456. - node->__rb_parent_color = (unsigned long)parent;
  8457. - node->rb_left = node->rb_right = NULL;
  8458. -
  8459. - rcu_assign_pointer(*rb_link, node);
  8460. -}
  8461. +void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
  8462. + struct rb_node **rb_link);
  8463. #define rb_entry_safe(ptr, type, member) \
  8464. ({ typeof(ptr) ____ptr = (ptr); \
  8465. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
  8466. index 2657aff2725b..ea90179fee34 100644
  8467. --- a/include/linux/rcupdate.h
  8468. +++ b/include/linux/rcupdate.h
  8469. @@ -177,6 +177,9 @@ void call_rcu(struct rcu_head *head,
  8470. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  8471. +#ifdef CONFIG_PREEMPT_RT_FULL
  8472. +#define call_rcu_bh call_rcu
  8473. +#else
  8474. /**
  8475. * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
  8476. * @head: structure to be used for queueing the RCU updates.
  8477. @@ -200,6 +203,7 @@ void call_rcu(struct rcu_head *head,
  8478. */
  8479. void call_rcu_bh(struct rcu_head *head,
  8480. rcu_callback_t func);
  8481. +#endif
  8482. /**
  8483. * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
  8484. @@ -300,6 +304,11 @@ void synchronize_rcu(void);
  8485. * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
  8486. */
  8487. #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
  8488. +#ifndef CONFIG_PREEMPT_RT_FULL
  8489. +#define sched_rcu_preempt_depth() rcu_preempt_depth()
  8490. +#else
  8491. +static inline int sched_rcu_preempt_depth(void) { return 0; }
  8492. +#endif
  8493. #else /* #ifdef CONFIG_PREEMPT_RCU */
  8494. @@ -325,6 +334,8 @@ static inline int rcu_preempt_depth(void)
  8495. return 0;
  8496. }
  8497. +#define sched_rcu_preempt_depth() rcu_preempt_depth()
  8498. +
  8499. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  8500. /* Internal to kernel */
  8501. @@ -498,7 +509,14 @@ extern struct lockdep_map rcu_callback_map;
  8502. int debug_lockdep_rcu_enabled(void);
  8503. int rcu_read_lock_held(void);
  8504. +#ifdef CONFIG_PREEMPT_RT_FULL
  8505. +static inline int rcu_read_lock_bh_held(void)
  8506. +{
  8507. + return rcu_read_lock_held();
  8508. +}
  8509. +#else
  8510. int rcu_read_lock_bh_held(void);
  8511. +#endif
  8512. /**
  8513. * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
  8514. @@ -946,10 +964,14 @@ static inline void rcu_read_unlock(void)
  8515. static inline void rcu_read_lock_bh(void)
  8516. {
  8517. local_bh_disable();
  8518. +#ifdef CONFIG_PREEMPT_RT_FULL
  8519. + rcu_read_lock();
  8520. +#else
  8521. __acquire(RCU_BH);
  8522. rcu_lock_acquire(&rcu_bh_lock_map);
  8523. RCU_LOCKDEP_WARN(!rcu_is_watching(),
  8524. "rcu_read_lock_bh() used illegally while idle");
  8525. +#endif
  8526. }
  8527. /*
  8528. @@ -959,10 +981,14 @@ static inline void rcu_read_lock_bh(void)
  8529. */
  8530. static inline void rcu_read_unlock_bh(void)
  8531. {
  8532. +#ifdef CONFIG_PREEMPT_RT_FULL
  8533. + rcu_read_unlock();
  8534. +#else
  8535. RCU_LOCKDEP_WARN(!rcu_is_watching(),
  8536. "rcu_read_unlock_bh() used illegally while idle");
  8537. rcu_lock_release(&rcu_bh_lock_map);
  8538. __release(RCU_BH);
  8539. +#endif
  8540. local_bh_enable();
  8541. }
  8542. diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
  8543. index ad1eda9fa4da..732192c5da11 100644
  8544. --- a/include/linux/rcutree.h
  8545. +++ b/include/linux/rcutree.h
  8546. @@ -44,7 +44,11 @@ static inline void rcu_virt_note_context_switch(int cpu)
  8547. rcu_note_context_switch();
  8548. }
  8549. +#ifdef CONFIG_PREEMPT_RT_FULL
  8550. +# define synchronize_rcu_bh synchronize_rcu
  8551. +#else
  8552. void synchronize_rcu_bh(void);
  8553. +#endif
  8554. void synchronize_sched_expedited(void);
  8555. void synchronize_rcu_expedited(void);
  8556. @@ -72,7 +76,11 @@ static inline void synchronize_rcu_bh_expedited(void)
  8557. }
  8558. void rcu_barrier(void);
  8559. +#ifdef CONFIG_PREEMPT_RT_FULL
  8560. +# define rcu_barrier_bh rcu_barrier
  8561. +#else
  8562. void rcu_barrier_bh(void);
  8563. +#endif
  8564. void rcu_barrier_sched(void);
  8565. unsigned long get_state_synchronize_rcu(void);
  8566. void cond_synchronize_rcu(unsigned long oldstate);
  8567. @@ -85,12 +93,10 @@ unsigned long rcu_batches_started(void);
  8568. unsigned long rcu_batches_started_bh(void);
  8569. unsigned long rcu_batches_started_sched(void);
  8570. unsigned long rcu_batches_completed(void);
  8571. -unsigned long rcu_batches_completed_bh(void);
  8572. unsigned long rcu_batches_completed_sched(void);
  8573. void show_rcu_gp_kthreads(void);
  8574. void rcu_force_quiescent_state(void);
  8575. -void rcu_bh_force_quiescent_state(void);
  8576. void rcu_sched_force_quiescent_state(void);
  8577. void rcu_idle_enter(void);
  8578. @@ -107,6 +113,14 @@ extern int rcu_scheduler_active __read_mostly;
  8579. bool rcu_is_watching(void);
  8580. +#ifndef CONFIG_PREEMPT_RT_FULL
  8581. +void rcu_bh_force_quiescent_state(void);
  8582. +unsigned long rcu_batches_completed_bh(void);
  8583. +#else
  8584. +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state
  8585. +# define rcu_batches_completed_bh rcu_batches_completed
  8586. +#endif
  8587. +
  8588. void rcu_all_qs(void);
  8589. #endif /* __LINUX_RCUTREE_H */
  8590. diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
  8591. index 1abba5ce2a2f..30211c627511 100644
  8592. --- a/include/linux/rtmutex.h
  8593. +++ b/include/linux/rtmutex.h
  8594. @@ -13,11 +13,15 @@
  8595. #define __LINUX_RT_MUTEX_H
  8596. #include <linux/linkage.h>
  8597. +#include <linux/spinlock_types_raw.h>
  8598. #include <linux/rbtree.h>
  8599. -#include <linux/spinlock_types.h>
  8600. extern int max_lock_depth; /* for sysctl */
  8601. +#ifdef CONFIG_DEBUG_MUTEXES
  8602. +#include <linux/debug_locks.h>
  8603. +#endif
  8604. +
  8605. /**
  8606. * The rt_mutex structure
  8607. *
  8608. @@ -31,8 +35,8 @@ struct rt_mutex {
  8609. struct rb_root waiters;
  8610. struct rb_node *waiters_leftmost;
  8611. struct task_struct *owner;
  8612. -#ifdef CONFIG_DEBUG_RT_MUTEXES
  8613. int save_state;
  8614. +#ifdef CONFIG_DEBUG_RT_MUTEXES
  8615. const char *name, *file;
  8616. int line;
  8617. void *magic;
  8618. @@ -55,22 +59,33 @@ struct hrtimer_sleeper;
  8619. # define rt_mutex_debug_check_no_locks_held(task) do { } while (0)
  8620. #endif
  8621. +# define rt_mutex_init(mutex) \
  8622. + do { \
  8623. + raw_spin_lock_init(&(mutex)->wait_lock); \
  8624. + __rt_mutex_init(mutex, #mutex); \
  8625. + } while (0)
  8626. +
  8627. #ifdef CONFIG_DEBUG_RT_MUTEXES
  8628. # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
  8629. , .name = #mutexname, .file = __FILE__, .line = __LINE__
  8630. -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__)
  8631. extern void rt_mutex_debug_task_free(struct task_struct *tsk);
  8632. #else
  8633. # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
  8634. -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL)
  8635. # define rt_mutex_debug_task_free(t) do { } while (0)
  8636. #endif
  8637. -#define __RT_MUTEX_INITIALIZER(mutexname) \
  8638. - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  8639. +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
  8640. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  8641. , .waiters = RB_ROOT \
  8642. , .owner = NULL \
  8643. - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
  8644. + __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
  8645. +
  8646. +#define __RT_MUTEX_INITIALIZER(mutexname) \
  8647. + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
  8648. +
  8649. +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
  8650. + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
  8651. + , .save_state = 1 }
  8652. #define DEFINE_RT_MUTEX(mutexname) \
  8653. struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
  8654. @@ -91,6 +106,7 @@ extern void rt_mutex_destroy(struct rt_mutex *lock);
  8655. extern void rt_mutex_lock(struct rt_mutex *lock);
  8656. extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
  8657. +extern int rt_mutex_lock_killable(struct rt_mutex *lock);
  8658. extern int rt_mutex_timed_lock(struct rt_mutex *lock,
  8659. struct hrtimer_sleeper *timeout);
  8660. diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h
  8661. new file mode 100644
  8662. index 000000000000..49ed2d45d3be
  8663. --- /dev/null
  8664. +++ b/include/linux/rwlock_rt.h
  8665. @@ -0,0 +1,99 @@
  8666. +#ifndef __LINUX_RWLOCK_RT_H
  8667. +#define __LINUX_RWLOCK_RT_H
  8668. +
  8669. +#ifndef __LINUX_SPINLOCK_H
  8670. +#error Do not include directly. Use spinlock.h
  8671. +#endif
  8672. +
  8673. +#define rwlock_init(rwl) \
  8674. +do { \
  8675. + static struct lock_class_key __key; \
  8676. + \
  8677. + rt_mutex_init(&(rwl)->lock); \
  8678. + __rt_rwlock_init(rwl, #rwl, &__key); \
  8679. +} while (0)
  8680. +
  8681. +extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
  8682. +extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
  8683. +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
  8684. +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags);
  8685. +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
  8686. +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
  8687. +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
  8688. +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock);
  8689. +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock);
  8690. +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
  8691. +
  8692. +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock))
  8693. +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock))
  8694. +
  8695. +#define write_trylock_irqsave(lock, flags) \
  8696. + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags))
  8697. +
  8698. +#define read_lock_irqsave(lock, flags) \
  8699. + do { \
  8700. + typecheck(unsigned long, flags); \
  8701. + flags = rt_read_lock_irqsave(lock); \
  8702. + } while (0)
  8703. +
  8704. +#define write_lock_irqsave(lock, flags) \
  8705. + do { \
  8706. + typecheck(unsigned long, flags); \
  8707. + flags = rt_write_lock_irqsave(lock); \
  8708. + } while (0)
  8709. +
  8710. +#define read_lock(lock) rt_read_lock(lock)
  8711. +
  8712. +#define read_lock_bh(lock) \
  8713. + do { \
  8714. + local_bh_disable(); \
  8715. + rt_read_lock(lock); \
  8716. + } while (0)
  8717. +
  8718. +#define read_lock_irq(lock) read_lock(lock)
  8719. +
  8720. +#define write_lock(lock) rt_write_lock(lock)
  8721. +
  8722. +#define write_lock_bh(lock) \
  8723. + do { \
  8724. + local_bh_disable(); \
  8725. + rt_write_lock(lock); \
  8726. + } while (0)
  8727. +
  8728. +#define write_lock_irq(lock) write_lock(lock)
  8729. +
  8730. +#define read_unlock(lock) rt_read_unlock(lock)
  8731. +
  8732. +#define read_unlock_bh(lock) \
  8733. + do { \
  8734. + rt_read_unlock(lock); \
  8735. + local_bh_enable(); \
  8736. + } while (0)
  8737. +
  8738. +#define read_unlock_irq(lock) read_unlock(lock)
  8739. +
  8740. +#define write_unlock(lock) rt_write_unlock(lock)
  8741. +
  8742. +#define write_unlock_bh(lock) \
  8743. + do { \
  8744. + rt_write_unlock(lock); \
  8745. + local_bh_enable(); \
  8746. + } while (0)
  8747. +
  8748. +#define write_unlock_irq(lock) write_unlock(lock)
  8749. +
  8750. +#define read_unlock_irqrestore(lock, flags) \
  8751. + do { \
  8752. + typecheck(unsigned long, flags); \
  8753. + (void) flags; \
  8754. + rt_read_unlock(lock); \
  8755. + } while (0)
  8756. +
  8757. +#define write_unlock_irqrestore(lock, flags) \
  8758. + do { \
  8759. + typecheck(unsigned long, flags); \
  8760. + (void) flags; \
  8761. + rt_write_unlock(lock); \
  8762. + } while (0)
  8763. +
  8764. +#endif
  8765. diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
  8766. index cc0072e93e36..5317cd957292 100644
  8767. --- a/include/linux/rwlock_types.h
  8768. +++ b/include/linux/rwlock_types.h
  8769. @@ -1,6 +1,10 @@
  8770. #ifndef __LINUX_RWLOCK_TYPES_H
  8771. #define __LINUX_RWLOCK_TYPES_H
  8772. +#if !defined(__LINUX_SPINLOCK_TYPES_H)
  8773. +# error "Do not include directly, include spinlock_types.h"
  8774. +#endif
  8775. +
  8776. /*
  8777. * include/linux/rwlock_types.h - generic rwlock type definitions
  8778. * and initializers
  8779. diff --git a/include/linux/rwlock_types_rt.h b/include/linux/rwlock_types_rt.h
  8780. new file mode 100644
  8781. index 000000000000..51b28d775fe1
  8782. --- /dev/null
  8783. +++ b/include/linux/rwlock_types_rt.h
  8784. @@ -0,0 +1,33 @@
  8785. +#ifndef __LINUX_RWLOCK_TYPES_RT_H
  8786. +#define __LINUX_RWLOCK_TYPES_RT_H
  8787. +
  8788. +#ifndef __LINUX_SPINLOCK_TYPES_H
  8789. +#error "Do not include directly. Include spinlock_types.h instead"
  8790. +#endif
  8791. +
  8792. +/*
  8793. + * rwlocks - rtmutex which allows single reader recursion
  8794. + */
  8795. +typedef struct {
  8796. + struct rt_mutex lock;
  8797. + int read_depth;
  8798. + unsigned int break_lock;
  8799. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8800. + struct lockdep_map dep_map;
  8801. +#endif
  8802. +} rwlock_t;
  8803. +
  8804. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8805. +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  8806. +#else
  8807. +# define RW_DEP_MAP_INIT(lockname)
  8808. +#endif
  8809. +
  8810. +#define __RW_LOCK_UNLOCKED(name) \
  8811. + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \
  8812. + RW_DEP_MAP_INIT(name) }
  8813. +
  8814. +#define DEFINE_RWLOCK(name) \
  8815. + rwlock_t name = __RW_LOCK_UNLOCKED(name)
  8816. +
  8817. +#endif
  8818. diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
  8819. index 8f498cdde280..2b2148431f14 100644
  8820. --- a/include/linux/rwsem.h
  8821. +++ b/include/linux/rwsem.h
  8822. @@ -18,6 +18,10 @@
  8823. #include <linux/osq_lock.h>
  8824. #endif
  8825. +#ifdef CONFIG_PREEMPT_RT_FULL
  8826. +#include <linux/rwsem_rt.h>
  8827. +#else /* PREEMPT_RT_FULL */
  8828. +
  8829. struct rw_semaphore;
  8830. #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
  8831. @@ -177,4 +181,6 @@ extern void up_read_non_owner(struct rw_semaphore *sem);
  8832. # define up_read_non_owner(sem) up_read(sem)
  8833. #endif
  8834. +#endif /* !PREEMPT_RT_FULL */
  8835. +
  8836. #endif /* _LINUX_RWSEM_H */
  8837. diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h
  8838. new file mode 100644
  8839. index 000000000000..f97860b2e2a4
  8840. --- /dev/null
  8841. +++ b/include/linux/rwsem_rt.h
  8842. @@ -0,0 +1,152 @@
  8843. +#ifndef _LINUX_RWSEM_RT_H
  8844. +#define _LINUX_RWSEM_RT_H
  8845. +
  8846. +#ifndef _LINUX_RWSEM_H
  8847. +#error "Include rwsem.h"
  8848. +#endif
  8849. +
  8850. +/*
  8851. + * RW-semaphores are a spinlock plus a reader-depth count.
  8852. + *
  8853. + * Note that the semantics are different from the usual
  8854. + * Linux rw-sems, in PREEMPT_RT mode we do not allow
  8855. + * multiple readers to hold the lock at once, we only allow
  8856. + * a read-lock owner to read-lock recursively. This is
  8857. + * better for latency, makes the implementation inherently
  8858. + * fair and makes it simpler as well.
  8859. + */
  8860. +
  8861. +#include <linux/rtmutex.h>
  8862. +
  8863. +struct rw_semaphore {
  8864. + struct rt_mutex lock;
  8865. + int read_depth;
  8866. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8867. + struct lockdep_map dep_map;
  8868. +#endif
  8869. +};
  8870. +
  8871. +#define __RWSEM_INITIALIZER(name) \
  8872. + { .lock = __RT_MUTEX_INITIALIZER(name.lock), \
  8873. + RW_DEP_MAP_INIT(name) }
  8874. +
  8875. +#define DECLARE_RWSEM(lockname) \
  8876. + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
  8877. +
  8878. +extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
  8879. + struct lock_class_key *key);
  8880. +
  8881. +#define __rt_init_rwsem(sem, name, key) \
  8882. + do { \
  8883. + rt_mutex_init(&(sem)->lock); \
  8884. + __rt_rwsem_init((sem), (name), (key));\
  8885. + } while (0)
  8886. +
  8887. +#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key)
  8888. +
  8889. +# define rt_init_rwsem(sem) \
  8890. +do { \
  8891. + static struct lock_class_key __key; \
  8892. + \
  8893. + __rt_init_rwsem((sem), #sem, &__key); \
  8894. +} while (0)
  8895. +
  8896. +extern void rt_down_write(struct rw_semaphore *rwsem);
  8897. +extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass);
  8898. +extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass);
  8899. +extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
  8900. + struct lockdep_map *nest);
  8901. +extern void rt__down_read(struct rw_semaphore *rwsem);
  8902. +extern void rt_down_read(struct rw_semaphore *rwsem);
  8903. +extern int rt_down_write_trylock(struct rw_semaphore *rwsem);
  8904. +extern int rt__down_read_trylock(struct rw_semaphore *rwsem);
  8905. +extern int rt_down_read_trylock(struct rw_semaphore *rwsem);
  8906. +extern void __rt_up_read(struct rw_semaphore *rwsem);
  8907. +extern void rt_up_read(struct rw_semaphore *rwsem);
  8908. +extern void rt_up_write(struct rw_semaphore *rwsem);
  8909. +extern void rt_downgrade_write(struct rw_semaphore *rwsem);
  8910. +
  8911. +#define init_rwsem(sem) rt_init_rwsem(sem)
  8912. +#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock)
  8913. +
  8914. +static inline int rwsem_is_contended(struct rw_semaphore *sem)
  8915. +{
  8916. + /* rt_mutex_has_waiters() */
  8917. + return !RB_EMPTY_ROOT(&sem->lock.waiters);
  8918. +}
  8919. +
  8920. +static inline void __down_read(struct rw_semaphore *sem)
  8921. +{
  8922. + rt__down_read(sem);
  8923. +}
  8924. +
  8925. +static inline void down_read(struct rw_semaphore *sem)
  8926. +{
  8927. + rt_down_read(sem);
  8928. +}
  8929. +
  8930. +static inline int __down_read_trylock(struct rw_semaphore *sem)
  8931. +{
  8932. + return rt__down_read_trylock(sem);
  8933. +}
  8934. +
  8935. +static inline int down_read_trylock(struct rw_semaphore *sem)
  8936. +{
  8937. + return rt_down_read_trylock(sem);
  8938. +}
  8939. +
  8940. +static inline void down_write(struct rw_semaphore *sem)
  8941. +{
  8942. + rt_down_write(sem);
  8943. +}
  8944. +
  8945. +static inline int down_write_trylock(struct rw_semaphore *sem)
  8946. +{
  8947. + return rt_down_write_trylock(sem);
  8948. +}
  8949. +
  8950. +static inline void __up_read(struct rw_semaphore *sem)
  8951. +{
  8952. + __rt_up_read(sem);
  8953. +}
  8954. +
  8955. +static inline void up_read(struct rw_semaphore *sem)
  8956. +{
  8957. + rt_up_read(sem);
  8958. +}
  8959. +
  8960. +static inline void up_write(struct rw_semaphore *sem)
  8961. +{
  8962. + rt_up_write(sem);
  8963. +}
  8964. +
  8965. +static inline void downgrade_write(struct rw_semaphore *sem)
  8966. +{
  8967. + rt_downgrade_write(sem);
  8968. +}
  8969. +
  8970. +static inline void down_read_nested(struct rw_semaphore *sem, int subclass)
  8971. +{
  8972. + return rt_down_read_nested(sem, subclass);
  8973. +}
  8974. +
  8975. +static inline void down_write_nested(struct rw_semaphore *sem, int subclass)
  8976. +{
  8977. + rt_down_write_nested(sem, subclass);
  8978. +}
  8979. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8980. +static inline void down_write_nest_lock(struct rw_semaphore *sem,
  8981. + struct rw_semaphore *nest_lock)
  8982. +{
  8983. + rt_down_write_nested_lock(sem, &nest_lock->dep_map);
  8984. +}
  8985. +
  8986. +#else
  8987. +
  8988. +static inline void down_write_nest_lock(struct rw_semaphore *sem,
  8989. + struct rw_semaphore *nest_lock)
  8990. +{
  8991. + rt_down_write_nested_lock(sem, NULL);
  8992. +}
  8993. +#endif
  8994. +#endif
  8995. diff --git a/include/linux/sched.h b/include/linux/sched.h
  8996. index 52c4847b05e2..c242cbd84485 100644
  8997. --- a/include/linux/sched.h
  8998. +++ b/include/linux/sched.h
  8999. @@ -26,6 +26,7 @@ struct sched_param {
  9000. #include <linux/nodemask.h>
  9001. #include <linux/mm_types.h>
  9002. #include <linux/preempt.h>
  9003. +#include <asm/kmap_types.h>
  9004. #include <asm/page.h>
  9005. #include <asm/ptrace.h>
  9006. @@ -241,10 +242,7 @@ extern char ___assert_task_state[1 - 2*!!(
  9007. TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
  9008. __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
  9009. -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
  9010. #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
  9011. -#define task_is_stopped_or_traced(task) \
  9012. - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
  9013. #define task_contributes_to_load(task) \
  9014. ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
  9015. (task->flags & PF_FROZEN) == 0 && \
  9016. @@ -310,6 +308,11 @@ extern char ___assert_task_state[1 - 2*!!(
  9017. #endif
  9018. +#define __set_current_state_no_track(state_value) \
  9019. + do { current->state = (state_value); } while (0)
  9020. +#define set_current_state_no_track(state_value) \
  9021. + set_mb(current->state, (state_value))
  9022. +
  9023. /* Task command name length */
  9024. #define TASK_COMM_LEN 16
  9025. @@ -981,8 +984,18 @@ struct wake_q_head {
  9026. struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
  9027. extern void wake_q_add(struct wake_q_head *head,
  9028. - struct task_struct *task);
  9029. -extern void wake_up_q(struct wake_q_head *head);
  9030. + struct task_struct *task);
  9031. +extern void __wake_up_q(struct wake_q_head *head, bool sleeper);
  9032. +
  9033. +static inline void wake_up_q(struct wake_q_head *head)
  9034. +{
  9035. + __wake_up_q(head, false);
  9036. +}
  9037. +
  9038. +static inline void wake_up_q_sleeper(struct wake_q_head *head)
  9039. +{
  9040. + __wake_up_q(head, true);
  9041. +}
  9042. /*
  9043. * sched-domains (multiprocessor balancing) declarations:
  9044. @@ -1393,6 +1406,7 @@ struct tlbflush_unmap_batch {
  9045. struct task_struct {
  9046. volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
  9047. + volatile long saved_state; /* saved state for "spinlock sleepers" */
  9048. void *stack;
  9049. atomic_t usage;
  9050. unsigned int flags; /* per process flags, defined below */
  9051. @@ -1429,6 +1443,12 @@ struct task_struct {
  9052. #endif
  9053. unsigned int policy;
  9054. +#ifdef CONFIG_PREEMPT_RT_FULL
  9055. + int migrate_disable;
  9056. +# ifdef CONFIG_SCHED_DEBUG
  9057. + int migrate_disable_atomic;
  9058. +# endif
  9059. +#endif
  9060. int nr_cpus_allowed;
  9061. cpumask_t cpus_allowed;
  9062. @@ -1559,6 +1579,9 @@ struct task_struct {
  9063. struct task_cputime cputime_expires;
  9064. struct list_head cpu_timers[3];
  9065. +#ifdef CONFIG_PREEMPT_RT_BASE
  9066. + struct task_struct *posix_timer_list;
  9067. +#endif
  9068. /* process credentials */
  9069. const struct cred __rcu *real_cred; /* objective and real subjective task
  9070. @@ -1589,10 +1612,15 @@ struct task_struct {
  9071. /* signal handlers */
  9072. struct signal_struct *signal;
  9073. struct sighand_struct *sighand;
  9074. + struct sigqueue *sigqueue_cache;
  9075. sigset_t blocked, real_blocked;
  9076. sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
  9077. struct sigpending pending;
  9078. +#ifdef CONFIG_PREEMPT_RT_FULL
  9079. + /* TODO: move me into ->restart_block ? */
  9080. + struct siginfo forced_info;
  9081. +#endif
  9082. unsigned long sas_ss_sp;
  9083. size_t sas_ss_size;
  9084. @@ -1820,6 +1848,12 @@ struct task_struct {
  9085. /* bitmask and counter of trace recursion */
  9086. unsigned long trace_recursion;
  9087. #endif /* CONFIG_TRACING */
  9088. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  9089. + u64 preempt_timestamp_hist;
  9090. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  9091. + long timer_offset;
  9092. +#endif
  9093. +#endif
  9094. #ifdef CONFIG_KCOV
  9095. /* Coverage collection mode enabled for this task (0 if disabled). */
  9096. enum kcov_mode kcov_mode;
  9097. @@ -1845,9 +1879,23 @@ struct task_struct {
  9098. unsigned int sequential_io;
  9099. unsigned int sequential_io_avg;
  9100. #endif
  9101. +#ifdef CONFIG_PREEMPT_RT_BASE
  9102. + struct rcu_head put_rcu;
  9103. + int softirq_nestcnt;
  9104. + unsigned int softirqs_raised;
  9105. +#endif
  9106. +#ifdef CONFIG_PREEMPT_RT_FULL
  9107. +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
  9108. + int kmap_idx;
  9109. + pte_t kmap_pte[KM_TYPE_NR];
  9110. +# endif
  9111. +#endif
  9112. #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  9113. unsigned long task_state_change;
  9114. #endif
  9115. +#ifdef CONFIG_PREEMPT_RT_FULL
  9116. + int xmit_recursion;
  9117. +#endif
  9118. int pagefault_disabled;
  9119. #ifdef CONFIG_MMU
  9120. struct task_struct *oom_reaper_list;
  9121. @@ -1868,9 +1916,6 @@ extern int arch_task_struct_size __read_mostly;
  9122. # define arch_task_struct_size (sizeof(struct task_struct))
  9123. #endif
  9124. -/* Future-safe accessor for struct task_struct's cpus_allowed. */
  9125. -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
  9126. -
  9127. #define TNF_MIGRATED 0x01
  9128. #define TNF_NO_GROUP 0x02
  9129. #define TNF_SHARED 0x04
  9130. @@ -2060,6 +2105,15 @@ extern struct pid *cad_pid;
  9131. extern void free_task(struct task_struct *tsk);
  9132. #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
  9133. +#ifdef CONFIG_PREEMPT_RT_BASE
  9134. +extern void __put_task_struct_cb(struct rcu_head *rhp);
  9135. +
  9136. +static inline void put_task_struct(struct task_struct *t)
  9137. +{
  9138. + if (atomic_dec_and_test(&t->usage))
  9139. + call_rcu(&t->put_rcu, __put_task_struct_cb);
  9140. +}
  9141. +#else
  9142. extern void __put_task_struct(struct task_struct *t);
  9143. static inline void put_task_struct(struct task_struct *t)
  9144. @@ -2067,6 +2121,7 @@ static inline void put_task_struct(struct task_struct *t)
  9145. if (atomic_dec_and_test(&t->usage))
  9146. __put_task_struct(t);
  9147. }
  9148. +#endif
  9149. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  9150. extern void task_cputime(struct task_struct *t,
  9151. @@ -2105,6 +2160,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
  9152. /*
  9153. * Per process flags
  9154. */
  9155. +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */
  9156. #define PF_EXITING 0x00000004 /* getting shut down */
  9157. #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
  9158. #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
  9159. @@ -2269,6 +2325,10 @@ extern void do_set_cpus_allowed(struct task_struct *p,
  9160. extern int set_cpus_allowed_ptr(struct task_struct *p,
  9161. const struct cpumask *new_mask);
  9162. +int migrate_me(void);
  9163. +void tell_sched_cpu_down_begin(int cpu);
  9164. +void tell_sched_cpu_down_done(int cpu);
  9165. +
  9166. #else
  9167. static inline void do_set_cpus_allowed(struct task_struct *p,
  9168. const struct cpumask *new_mask)
  9169. @@ -2281,6 +2341,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
  9170. return -EINVAL;
  9171. return 0;
  9172. }
  9173. +static inline int migrate_me(void) { return 0; }
  9174. +static inline void tell_sched_cpu_down_begin(int cpu) { }
  9175. +static inline void tell_sched_cpu_down_done(int cpu) { }
  9176. #endif
  9177. #ifdef CONFIG_NO_HZ_COMMON
  9178. @@ -2487,6 +2550,7 @@ extern void xtime_update(unsigned long ticks);
  9179. extern int wake_up_state(struct task_struct *tsk, unsigned int state);
  9180. extern int wake_up_process(struct task_struct *tsk);
  9181. +extern int wake_up_lock_sleeper(struct task_struct * tsk);
  9182. extern void wake_up_new_task(struct task_struct *tsk);
  9183. #ifdef CONFIG_SMP
  9184. extern void kick_process(struct task_struct *tsk);
  9185. @@ -2610,12 +2674,24 @@ extern struct mm_struct * mm_alloc(void);
  9186. /* mmdrop drops the mm and the page tables */
  9187. extern void __mmdrop(struct mm_struct *);
  9188. +
  9189. static inline void mmdrop(struct mm_struct * mm)
  9190. {
  9191. if (unlikely(atomic_dec_and_test(&mm->mm_count)))
  9192. __mmdrop(mm);
  9193. }
  9194. +#ifdef CONFIG_PREEMPT_RT_BASE
  9195. +extern void __mmdrop_delayed(struct rcu_head *rhp);
  9196. +static inline void mmdrop_delayed(struct mm_struct *mm)
  9197. +{
  9198. + if (atomic_dec_and_test(&mm->mm_count))
  9199. + call_rcu(&mm->delayed_drop, __mmdrop_delayed);
  9200. +}
  9201. +#else
  9202. +# define mmdrop_delayed(mm) mmdrop(mm)
  9203. +#endif
  9204. +
  9205. /* mmput gets rid of the mappings and all user-space */
  9206. extern void mmput(struct mm_struct *);
  9207. /* Grab a reference to a task's mm, if it is not already going away */
  9208. @@ -2933,6 +3009,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
  9209. return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
  9210. }
  9211. +#ifdef CONFIG_PREEMPT_LAZY
  9212. +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
  9213. +{
  9214. + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
  9215. +}
  9216. +
  9217. +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
  9218. +{
  9219. + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
  9220. +}
  9221. +
  9222. +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
  9223. +{
  9224. + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
  9225. +}
  9226. +
  9227. +static inline int need_resched_lazy(void)
  9228. +{
  9229. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  9230. +}
  9231. +
  9232. +static inline int need_resched_now(void)
  9233. +{
  9234. + return test_thread_flag(TIF_NEED_RESCHED);
  9235. +}
  9236. +
  9237. +#else
  9238. +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
  9239. +static inline int need_resched_lazy(void) { return 0; }
  9240. +
  9241. +static inline int need_resched_now(void)
  9242. +{
  9243. + return test_thread_flag(TIF_NEED_RESCHED);
  9244. +}
  9245. +
  9246. +#endif
  9247. +
  9248. static inline int restart_syscall(void)
  9249. {
  9250. set_tsk_thread_flag(current, TIF_SIGPENDING);
  9251. @@ -2964,6 +3077,51 @@ static inline int signal_pending_state(long state, struct task_struct *p)
  9252. return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
  9253. }
  9254. +static inline bool __task_is_stopped_or_traced(struct task_struct *task)
  9255. +{
  9256. + if (task->state & (__TASK_STOPPED | __TASK_TRACED))
  9257. + return true;
  9258. +#ifdef CONFIG_PREEMPT_RT_FULL
  9259. + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
  9260. + return true;
  9261. +#endif
  9262. + return false;
  9263. +}
  9264. +
  9265. +static inline bool task_is_stopped_or_traced(struct task_struct *task)
  9266. +{
  9267. + bool traced_stopped;
  9268. +
  9269. +#ifdef CONFIG_PREEMPT_RT_FULL
  9270. + unsigned long flags;
  9271. +
  9272. + raw_spin_lock_irqsave(&task->pi_lock, flags);
  9273. + traced_stopped = __task_is_stopped_or_traced(task);
  9274. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  9275. +#else
  9276. + traced_stopped = __task_is_stopped_or_traced(task);
  9277. +#endif
  9278. + return traced_stopped;
  9279. +}
  9280. +
  9281. +static inline bool task_is_traced(struct task_struct *task)
  9282. +{
  9283. + bool traced = false;
  9284. +
  9285. + if (task->state & __TASK_TRACED)
  9286. + return true;
  9287. +#ifdef CONFIG_PREEMPT_RT_FULL
  9288. + /* in case the task is sleeping on tasklist_lock */
  9289. + raw_spin_lock_irq(&task->pi_lock);
  9290. + if (task->state & __TASK_TRACED)
  9291. + traced = true;
  9292. + else if (task->saved_state & __TASK_TRACED)
  9293. + traced = true;
  9294. + raw_spin_unlock_irq(&task->pi_lock);
  9295. +#endif
  9296. + return traced;
  9297. +}
  9298. +
  9299. /*
  9300. * cond_resched() and cond_resched_lock(): latency reduction via
  9301. * explicit rescheduling in places that are safe. The return
  9302. @@ -2985,12 +3143,16 @@ extern int __cond_resched_lock(spinlock_t *lock);
  9303. __cond_resched_lock(lock); \
  9304. })
  9305. +#ifndef CONFIG_PREEMPT_RT_FULL
  9306. extern int __cond_resched_softirq(void);
  9307. #define cond_resched_softirq() ({ \
  9308. ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
  9309. __cond_resched_softirq(); \
  9310. })
  9311. +#else
  9312. +# define cond_resched_softirq() cond_resched()
  9313. +#endif
  9314. static inline void cond_resched_rcu(void)
  9315. {
  9316. @@ -3152,6 +3314,31 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
  9317. #endif /* CONFIG_SMP */
  9318. +static inline int __migrate_disabled(struct task_struct *p)
  9319. +{
  9320. +#ifdef CONFIG_PREEMPT_RT_FULL
  9321. + return p->migrate_disable;
  9322. +#else
  9323. + return 0;
  9324. +#endif
  9325. +}
  9326. +
  9327. +/* Future-safe accessor for struct task_struct's cpus_allowed. */
  9328. +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p)
  9329. +{
  9330. + if (__migrate_disabled(p))
  9331. + return cpumask_of(task_cpu(p));
  9332. +
  9333. + return &p->cpus_allowed;
  9334. +}
  9335. +
  9336. +static inline int tsk_nr_cpus_allowed(struct task_struct *p)
  9337. +{
  9338. + if (__migrate_disabled(p))
  9339. + return 1;
  9340. + return p->nr_cpus_allowed;
  9341. +}
  9342. +
  9343. extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
  9344. extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
  9345. diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
  9346. index e0582106ef4f..b14f4d2368aa 100644
  9347. --- a/include/linux/seqlock.h
  9348. +++ b/include/linux/seqlock.h
  9349. @@ -220,20 +220,30 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
  9350. return __read_seqcount_retry(s, start);
  9351. }
  9352. -
  9353. -
  9354. -static inline void raw_write_seqcount_begin(seqcount_t *s)
  9355. +static inline void __raw_write_seqcount_begin(seqcount_t *s)
  9356. {
  9357. s->sequence++;
  9358. smp_wmb();
  9359. }
  9360. -static inline void raw_write_seqcount_end(seqcount_t *s)
  9361. +static inline void raw_write_seqcount_begin(seqcount_t *s)
  9362. +{
  9363. + preempt_disable_rt();
  9364. + __raw_write_seqcount_begin(s);
  9365. +}
  9366. +
  9367. +static inline void __raw_write_seqcount_end(seqcount_t *s)
  9368. {
  9369. smp_wmb();
  9370. s->sequence++;
  9371. }
  9372. +static inline void raw_write_seqcount_end(seqcount_t *s)
  9373. +{
  9374. + __raw_write_seqcount_end(s);
  9375. + preempt_enable_rt();
  9376. +}
  9377. +
  9378. /**
  9379. * raw_write_seqcount_barrier - do a seq write barrier
  9380. * @s: pointer to seqcount_t
  9381. @@ -425,10 +435,32 @@ typedef struct {
  9382. /*
  9383. * Read side functions for starting and finalizing a read side section.
  9384. */
  9385. +#ifndef CONFIG_PREEMPT_RT_FULL
  9386. static inline unsigned read_seqbegin(const seqlock_t *sl)
  9387. {
  9388. return read_seqcount_begin(&sl->seqcount);
  9389. }
  9390. +#else
  9391. +/*
  9392. + * Starvation safe read side for RT
  9393. + */
  9394. +static inline unsigned read_seqbegin(seqlock_t *sl)
  9395. +{
  9396. + unsigned ret;
  9397. +
  9398. +repeat:
  9399. + ret = ACCESS_ONCE(sl->seqcount.sequence);
  9400. + if (unlikely(ret & 1)) {
  9401. + /*
  9402. + * Take the lock and let the writer proceed (i.e. evtl
  9403. + * boost it), otherwise we could loop here forever.
  9404. + */
  9405. + spin_unlock_wait(&sl->lock);
  9406. + goto repeat;
  9407. + }
  9408. + return ret;
  9409. +}
  9410. +#endif
  9411. static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
  9412. {
  9413. @@ -443,36 +475,36 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
  9414. static inline void write_seqlock(seqlock_t *sl)
  9415. {
  9416. spin_lock(&sl->lock);
  9417. - write_seqcount_begin(&sl->seqcount);
  9418. + __raw_write_seqcount_begin(&sl->seqcount);
  9419. }
  9420. static inline void write_sequnlock(seqlock_t *sl)
  9421. {
  9422. - write_seqcount_end(&sl->seqcount);
  9423. + __raw_write_seqcount_end(&sl->seqcount);
  9424. spin_unlock(&sl->lock);
  9425. }
  9426. static inline void write_seqlock_bh(seqlock_t *sl)
  9427. {
  9428. spin_lock_bh(&sl->lock);
  9429. - write_seqcount_begin(&sl->seqcount);
  9430. + __raw_write_seqcount_begin(&sl->seqcount);
  9431. }
  9432. static inline void write_sequnlock_bh(seqlock_t *sl)
  9433. {
  9434. - write_seqcount_end(&sl->seqcount);
  9435. + __raw_write_seqcount_end(&sl->seqcount);
  9436. spin_unlock_bh(&sl->lock);
  9437. }
  9438. static inline void write_seqlock_irq(seqlock_t *sl)
  9439. {
  9440. spin_lock_irq(&sl->lock);
  9441. - write_seqcount_begin(&sl->seqcount);
  9442. + __raw_write_seqcount_begin(&sl->seqcount);
  9443. }
  9444. static inline void write_sequnlock_irq(seqlock_t *sl)
  9445. {
  9446. - write_seqcount_end(&sl->seqcount);
  9447. + __raw_write_seqcount_end(&sl->seqcount);
  9448. spin_unlock_irq(&sl->lock);
  9449. }
  9450. @@ -481,7 +513,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
  9451. unsigned long flags;
  9452. spin_lock_irqsave(&sl->lock, flags);
  9453. - write_seqcount_begin(&sl->seqcount);
  9454. + __raw_write_seqcount_begin(&sl->seqcount);
  9455. return flags;
  9456. }
  9457. @@ -491,7 +523,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
  9458. static inline void
  9459. write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
  9460. {
  9461. - write_seqcount_end(&sl->seqcount);
  9462. + __raw_write_seqcount_end(&sl->seqcount);
  9463. spin_unlock_irqrestore(&sl->lock, flags);
  9464. }
  9465. diff --git a/include/linux/signal.h b/include/linux/signal.h
  9466. index d80259afb9e5..ddd1e6866a54 100644
  9467. --- a/include/linux/signal.h
  9468. +++ b/include/linux/signal.h
  9469. @@ -233,6 +233,7 @@ static inline void init_sigpending(struct sigpending *sig)
  9470. }
  9471. extern void flush_sigqueue(struct sigpending *queue);
  9472. +extern void flush_task_sigqueue(struct task_struct *tsk);
  9473. /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
  9474. static inline int valid_signal(unsigned long sig)
  9475. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
  9476. index 794b924e9669..db2af7c88a6a 100644
  9477. --- a/include/linux/skbuff.h
  9478. +++ b/include/linux/skbuff.h
  9479. @@ -283,6 +283,7 @@ struct sk_buff_head {
  9480. __u32 qlen;
  9481. spinlock_t lock;
  9482. + raw_spinlock_t raw_lock;
  9483. };
  9484. struct sk_buff;
  9485. @@ -1538,6 +1539,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
  9486. __skb_queue_head_init(list);
  9487. }
  9488. +static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
  9489. +{
  9490. + raw_spin_lock_init(&list->raw_lock);
  9491. + __skb_queue_head_init(list);
  9492. +}
  9493. +
  9494. static inline void skb_queue_head_init_class(struct sk_buff_head *list,
  9495. struct lock_class_key *class)
  9496. {
  9497. diff --git a/include/linux/smp.h b/include/linux/smp.h
  9498. index c4414074bd88..e6ab36aeaaab 100644
  9499. --- a/include/linux/smp.h
  9500. +++ b/include/linux/smp.h
  9501. @@ -185,6 +185,9 @@ static inline void smp_init(void) { }
  9502. #define get_cpu() ({ preempt_disable(); smp_processor_id(); })
  9503. #define put_cpu() preempt_enable()
  9504. +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
  9505. +#define put_cpu_light() migrate_enable()
  9506. +
  9507. /*
  9508. * Callback to arch code if there's nosmp or maxcpus=0 on the
  9509. * boot command line:
  9510. diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
  9511. index 47dd0cebd204..b241cc044bd3 100644
  9512. --- a/include/linux/spinlock.h
  9513. +++ b/include/linux/spinlock.h
  9514. @@ -271,7 +271,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
  9515. #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock))
  9516. /* Include rwlock functions */
  9517. -#include <linux/rwlock.h>
  9518. +#ifdef CONFIG_PREEMPT_RT_FULL
  9519. +# include <linux/rwlock_rt.h>
  9520. +#else
  9521. +# include <linux/rwlock.h>
  9522. +#endif
  9523. /*
  9524. * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
  9525. @@ -282,6 +286,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
  9526. # include <linux/spinlock_api_up.h>
  9527. #endif
  9528. +#ifdef CONFIG_PREEMPT_RT_FULL
  9529. +# include <linux/spinlock_rt.h>
  9530. +#else /* PREEMPT_RT_FULL */
  9531. +
  9532. /*
  9533. * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
  9534. */
  9535. @@ -416,4 +424,6 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
  9536. #define atomic_dec_and_lock(atomic, lock) \
  9537. __cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
  9538. +#endif /* !PREEMPT_RT_FULL */
  9539. +
  9540. #endif /* __LINUX_SPINLOCK_H */
  9541. diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
  9542. index 5344268e6e62..043263f30e81 100644
  9543. --- a/include/linux/spinlock_api_smp.h
  9544. +++ b/include/linux/spinlock_api_smp.h
  9545. @@ -189,6 +189,8 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock)
  9546. return 0;
  9547. }
  9548. -#include <linux/rwlock_api_smp.h>
  9549. +#ifndef CONFIG_PREEMPT_RT_FULL
  9550. +# include <linux/rwlock_api_smp.h>
  9551. +#endif
  9552. #endif /* __LINUX_SPINLOCK_API_SMP_H */
  9553. diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
  9554. new file mode 100644
  9555. index 000000000000..3b2825537531
  9556. --- /dev/null
  9557. +++ b/include/linux/spinlock_rt.h
  9558. @@ -0,0 +1,163 @@
  9559. +#ifndef __LINUX_SPINLOCK_RT_H
  9560. +#define __LINUX_SPINLOCK_RT_H
  9561. +
  9562. +#ifndef __LINUX_SPINLOCK_H
  9563. +#error Do not include directly. Use spinlock.h
  9564. +#endif
  9565. +
  9566. +#include <linux/bug.h>
  9567. +
  9568. +extern void
  9569. +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key);
  9570. +
  9571. +#define spin_lock_init(slock) \
  9572. +do { \
  9573. + static struct lock_class_key __key; \
  9574. + \
  9575. + rt_mutex_init(&(slock)->lock); \
  9576. + __rt_spin_lock_init(slock, #slock, &__key); \
  9577. +} while (0)
  9578. +
  9579. +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock);
  9580. +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock);
  9581. +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock);
  9582. +
  9583. +extern void __lockfunc rt_spin_lock(spinlock_t *lock);
  9584. +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
  9585. +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
  9586. +extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
  9587. +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
  9588. +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
  9589. +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
  9590. +extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
  9591. +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock);
  9592. +
  9593. +/*
  9594. + * lockdep-less calls, for derived types like rwlock:
  9595. + * (for trylock they can use rt_mutex_trylock() directly.
  9596. + */
  9597. +extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock);
  9598. +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
  9599. +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
  9600. +extern int __lockfunc __rt_spin_trylock(struct rt_mutex *lock);
  9601. +
  9602. +#define spin_lock(lock) rt_spin_lock(lock)
  9603. +
  9604. +#define spin_lock_bh(lock) \
  9605. + do { \
  9606. + local_bh_disable(); \
  9607. + rt_spin_lock(lock); \
  9608. + } while (0)
  9609. +
  9610. +#define spin_lock_irq(lock) spin_lock(lock)
  9611. +
  9612. +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock))
  9613. +
  9614. +#define spin_trylock(lock) \
  9615. +({ \
  9616. + int __locked; \
  9617. + __locked = spin_do_trylock(lock); \
  9618. + __locked; \
  9619. +})
  9620. +
  9621. +#ifdef CONFIG_LOCKDEP
  9622. +# define spin_lock_nested(lock, subclass) \
  9623. + do { \
  9624. + rt_spin_lock_nested(lock, subclass); \
  9625. + } while (0)
  9626. +
  9627. +#define spin_lock_bh_nested(lock, subclass) \
  9628. + do { \
  9629. + local_bh_disable(); \
  9630. + rt_spin_lock_nested(lock, subclass); \
  9631. + } while (0)
  9632. +
  9633. +# define spin_lock_irqsave_nested(lock, flags, subclass) \
  9634. + do { \
  9635. + typecheck(unsigned long, flags); \
  9636. + flags = 0; \
  9637. + rt_spin_lock_nested(lock, subclass); \
  9638. + } while (0)
  9639. +#else
  9640. +# define spin_lock_nested(lock, subclass) spin_lock(lock)
  9641. +# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock)
  9642. +
  9643. +# define spin_lock_irqsave_nested(lock, flags, subclass) \
  9644. + do { \
  9645. + typecheck(unsigned long, flags); \
  9646. + flags = 0; \
  9647. + spin_lock(lock); \
  9648. + } while (0)
  9649. +#endif
  9650. +
  9651. +#define spin_lock_irqsave(lock, flags) \
  9652. + do { \
  9653. + typecheck(unsigned long, flags); \
  9654. + flags = 0; \
  9655. + spin_lock(lock); \
  9656. + } while (0)
  9657. +
  9658. +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock)
  9659. +{
  9660. + unsigned long flags = 0;
  9661. +#ifdef CONFIG_TRACE_IRQFLAGS
  9662. + flags = rt_spin_lock_trace_flags(lock);
  9663. +#else
  9664. + spin_lock(lock); /* lock_local */
  9665. +#endif
  9666. + return flags;
  9667. +}
  9668. +
  9669. +/* FIXME: we need rt_spin_lock_nest_lock */
  9670. +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0)
  9671. +
  9672. +#define spin_unlock(lock) rt_spin_unlock(lock)
  9673. +
  9674. +#define spin_unlock_bh(lock) \
  9675. + do { \
  9676. + rt_spin_unlock(lock); \
  9677. + local_bh_enable(); \
  9678. + } while (0)
  9679. +
  9680. +#define spin_unlock_irq(lock) spin_unlock(lock)
  9681. +
  9682. +#define spin_unlock_irqrestore(lock, flags) \
  9683. + do { \
  9684. + typecheck(unsigned long, flags); \
  9685. + (void) flags; \
  9686. + spin_unlock(lock); \
  9687. + } while (0)
  9688. +
  9689. +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock))
  9690. +#define spin_trylock_irq(lock) spin_trylock(lock)
  9691. +
  9692. +#define spin_trylock_irqsave(lock, flags) \
  9693. + rt_spin_trylock_irqsave(lock, &(flags))
  9694. +
  9695. +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock)
  9696. +
  9697. +#ifdef CONFIG_GENERIC_LOCKBREAK
  9698. +# define spin_is_contended(lock) ((lock)->break_lock)
  9699. +#else
  9700. +# define spin_is_contended(lock) (((void)(lock), 0))
  9701. +#endif
  9702. +
  9703. +static inline int spin_can_lock(spinlock_t *lock)
  9704. +{
  9705. + return !rt_mutex_is_locked(&lock->lock);
  9706. +}
  9707. +
  9708. +static inline int spin_is_locked(spinlock_t *lock)
  9709. +{
  9710. + return rt_mutex_is_locked(&lock->lock);
  9711. +}
  9712. +
  9713. +static inline void assert_spin_locked(spinlock_t *lock)
  9714. +{
  9715. + BUG_ON(!spin_is_locked(lock));
  9716. +}
  9717. +
  9718. +#define atomic_dec_and_lock(atomic, lock) \
  9719. + atomic_dec_and_spin_lock(atomic, lock)
  9720. +
  9721. +#endif
  9722. diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
  9723. index 73548eb13a5d..10bac715ea96 100644
  9724. --- a/include/linux/spinlock_types.h
  9725. +++ b/include/linux/spinlock_types.h
  9726. @@ -9,80 +9,15 @@
  9727. * Released under the General Public License (GPL).
  9728. */
  9729. -#if defined(CONFIG_SMP)
  9730. -# include <asm/spinlock_types.h>
  9731. +#include <linux/spinlock_types_raw.h>
  9732. +
  9733. +#ifndef CONFIG_PREEMPT_RT_FULL
  9734. +# include <linux/spinlock_types_nort.h>
  9735. +# include <linux/rwlock_types.h>
  9736. #else
  9737. -# include <linux/spinlock_types_up.h>
  9738. +# include <linux/rtmutex.h>
  9739. +# include <linux/spinlock_types_rt.h>
  9740. +# include <linux/rwlock_types_rt.h>
  9741. #endif
  9742. -#include <linux/lockdep.h>
  9743. -
  9744. -typedef struct raw_spinlock {
  9745. - arch_spinlock_t raw_lock;
  9746. -#ifdef CONFIG_GENERIC_LOCKBREAK
  9747. - unsigned int break_lock;
  9748. -#endif
  9749. -#ifdef CONFIG_DEBUG_SPINLOCK
  9750. - unsigned int magic, owner_cpu;
  9751. - void *owner;
  9752. -#endif
  9753. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9754. - struct lockdep_map dep_map;
  9755. -#endif
  9756. -} raw_spinlock_t;
  9757. -
  9758. -#define SPINLOCK_MAGIC 0xdead4ead
  9759. -
  9760. -#define SPINLOCK_OWNER_INIT ((void *)-1L)
  9761. -
  9762. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9763. -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  9764. -#else
  9765. -# define SPIN_DEP_MAP_INIT(lockname)
  9766. -#endif
  9767. -
  9768. -#ifdef CONFIG_DEBUG_SPINLOCK
  9769. -# define SPIN_DEBUG_INIT(lockname) \
  9770. - .magic = SPINLOCK_MAGIC, \
  9771. - .owner_cpu = -1, \
  9772. - .owner = SPINLOCK_OWNER_INIT,
  9773. -#else
  9774. -# define SPIN_DEBUG_INIT(lockname)
  9775. -#endif
  9776. -
  9777. -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
  9778. - { \
  9779. - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
  9780. - SPIN_DEBUG_INIT(lockname) \
  9781. - SPIN_DEP_MAP_INIT(lockname) }
  9782. -
  9783. -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
  9784. - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
  9785. -
  9786. -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
  9787. -
  9788. -typedef struct spinlock {
  9789. - union {
  9790. - struct raw_spinlock rlock;
  9791. -
  9792. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9793. -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
  9794. - struct {
  9795. - u8 __padding[LOCK_PADSIZE];
  9796. - struct lockdep_map dep_map;
  9797. - };
  9798. -#endif
  9799. - };
  9800. -} spinlock_t;
  9801. -
  9802. -#define __SPIN_LOCK_INITIALIZER(lockname) \
  9803. - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
  9804. -
  9805. -#define __SPIN_LOCK_UNLOCKED(lockname) \
  9806. - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
  9807. -
  9808. -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
  9809. -
  9810. -#include <linux/rwlock_types.h>
  9811. -
  9812. #endif /* __LINUX_SPINLOCK_TYPES_H */
  9813. diff --git a/include/linux/spinlock_types_nort.h b/include/linux/spinlock_types_nort.h
  9814. new file mode 100644
  9815. index 000000000000..f1dac1fb1d6a
  9816. --- /dev/null
  9817. +++ b/include/linux/spinlock_types_nort.h
  9818. @@ -0,0 +1,33 @@
  9819. +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H
  9820. +#define __LINUX_SPINLOCK_TYPES_NORT_H
  9821. +
  9822. +#ifndef __LINUX_SPINLOCK_TYPES_H
  9823. +#error "Do not include directly. Include spinlock_types.h instead"
  9824. +#endif
  9825. +
  9826. +/*
  9827. + * The non RT version maps spinlocks to raw_spinlocks
  9828. + */
  9829. +typedef struct spinlock {
  9830. + union {
  9831. + struct raw_spinlock rlock;
  9832. +
  9833. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9834. +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
  9835. + struct {
  9836. + u8 __padding[LOCK_PADSIZE];
  9837. + struct lockdep_map dep_map;
  9838. + };
  9839. +#endif
  9840. + };
  9841. +} spinlock_t;
  9842. +
  9843. +#define __SPIN_LOCK_INITIALIZER(lockname) \
  9844. + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
  9845. +
  9846. +#define __SPIN_LOCK_UNLOCKED(lockname) \
  9847. + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
  9848. +
  9849. +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
  9850. +
  9851. +#endif
  9852. diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h
  9853. new file mode 100644
  9854. index 000000000000..edffc4d53fc9
  9855. --- /dev/null
  9856. +++ b/include/linux/spinlock_types_raw.h
  9857. @@ -0,0 +1,56 @@
  9858. +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  9859. +#define __LINUX_SPINLOCK_TYPES_RAW_H
  9860. +
  9861. +#if defined(CONFIG_SMP)
  9862. +# include <asm/spinlock_types.h>
  9863. +#else
  9864. +# include <linux/spinlock_types_up.h>
  9865. +#endif
  9866. +
  9867. +#include <linux/lockdep.h>
  9868. +
  9869. +typedef struct raw_spinlock {
  9870. + arch_spinlock_t raw_lock;
  9871. +#ifdef CONFIG_GENERIC_LOCKBREAK
  9872. + unsigned int break_lock;
  9873. +#endif
  9874. +#ifdef CONFIG_DEBUG_SPINLOCK
  9875. + unsigned int magic, owner_cpu;
  9876. + void *owner;
  9877. +#endif
  9878. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9879. + struct lockdep_map dep_map;
  9880. +#endif
  9881. +} raw_spinlock_t;
  9882. +
  9883. +#define SPINLOCK_MAGIC 0xdead4ead
  9884. +
  9885. +#define SPINLOCK_OWNER_INIT ((void *)-1L)
  9886. +
  9887. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9888. +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  9889. +#else
  9890. +# define SPIN_DEP_MAP_INIT(lockname)
  9891. +#endif
  9892. +
  9893. +#ifdef CONFIG_DEBUG_SPINLOCK
  9894. +# define SPIN_DEBUG_INIT(lockname) \
  9895. + .magic = SPINLOCK_MAGIC, \
  9896. + .owner_cpu = -1, \
  9897. + .owner = SPINLOCK_OWNER_INIT,
  9898. +#else
  9899. +# define SPIN_DEBUG_INIT(lockname)
  9900. +#endif
  9901. +
  9902. +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
  9903. + { \
  9904. + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
  9905. + SPIN_DEBUG_INIT(lockname) \
  9906. + SPIN_DEP_MAP_INIT(lockname) }
  9907. +
  9908. +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
  9909. + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
  9910. +
  9911. +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
  9912. +
  9913. +#endif
  9914. diff --git a/include/linux/spinlock_types_rt.h b/include/linux/spinlock_types_rt.h
  9915. new file mode 100644
  9916. index 000000000000..3e3d8c5f7a9a
  9917. --- /dev/null
  9918. +++ b/include/linux/spinlock_types_rt.h
  9919. @@ -0,0 +1,48 @@
  9920. +#ifndef __LINUX_SPINLOCK_TYPES_RT_H
  9921. +#define __LINUX_SPINLOCK_TYPES_RT_H
  9922. +
  9923. +#ifndef __LINUX_SPINLOCK_TYPES_H
  9924. +#error "Do not include directly. Include spinlock_types.h instead"
  9925. +#endif
  9926. +
  9927. +#include <linux/cache.h>
  9928. +
  9929. +/*
  9930. + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field:
  9931. + */
  9932. +typedef struct spinlock {
  9933. + struct rt_mutex lock;
  9934. + unsigned int break_lock;
  9935. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9936. + struct lockdep_map dep_map;
  9937. +#endif
  9938. +} spinlock_t;
  9939. +
  9940. +#ifdef CONFIG_DEBUG_RT_MUTEXES
  9941. +# define __RT_SPIN_INITIALIZER(name) \
  9942. + { \
  9943. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
  9944. + .save_state = 1, \
  9945. + .file = __FILE__, \
  9946. + .line = __LINE__ , \
  9947. + }
  9948. +#else
  9949. +# define __RT_SPIN_INITIALIZER(name) \
  9950. + { \
  9951. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
  9952. + .save_state = 1, \
  9953. + }
  9954. +#endif
  9955. +
  9956. +/*
  9957. +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock)
  9958. +*/
  9959. +
  9960. +#define __SPIN_LOCK_UNLOCKED(name) \
  9961. + { .lock = __RT_SPIN_INITIALIZER(name.lock), \
  9962. + SPIN_DEP_MAP_INIT(name) }
  9963. +
  9964. +#define DEFINE_SPINLOCK(name) \
  9965. + spinlock_t name = __SPIN_LOCK_UNLOCKED(name)
  9966. +
  9967. +#endif
  9968. diff --git a/include/linux/srcu.h b/include/linux/srcu.h
  9969. index dc8eb63c6568..e793d3a257da 100644
  9970. --- a/include/linux/srcu.h
  9971. +++ b/include/linux/srcu.h
  9972. @@ -84,10 +84,10 @@ int init_srcu_struct(struct srcu_struct *sp);
  9973. void process_srcu(struct work_struct *work);
  9974. -#define __SRCU_STRUCT_INIT(name) \
  9975. +#define __SRCU_STRUCT_INIT(name, pcpu_name) \
  9976. { \
  9977. .completed = -300, \
  9978. - .per_cpu_ref = &name##_srcu_array, \
  9979. + .per_cpu_ref = &pcpu_name, \
  9980. .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \
  9981. .running = false, \
  9982. .batch_queue = RCU_BATCH_INIT(name.batch_queue), \
  9983. @@ -119,7 +119,7 @@ void process_srcu(struct work_struct *work);
  9984. */
  9985. #define __DEFINE_SRCU(name, is_static) \
  9986. static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
  9987. - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
  9988. + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array)
  9989. #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
  9990. #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
  9991. diff --git a/include/linux/suspend.h b/include/linux/suspend.h
  9992. index 8b6ec7ef0854..9b77d4cc929f 100644
  9993. --- a/include/linux/suspend.h
  9994. +++ b/include/linux/suspend.h
  9995. @@ -194,6 +194,12 @@ struct platform_freeze_ops {
  9996. void (*end)(void);
  9997. };
  9998. +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
  9999. +extern bool pm_in_action;
  10000. +#else
  10001. +# define pm_in_action false
  10002. +#endif
  10003. +
  10004. #ifdef CONFIG_SUSPEND
  10005. /**
  10006. * suspend_set_ops - set platform dependent suspend operations
  10007. diff --git a/include/linux/swait.h b/include/linux/swait.h
  10008. index c1f9c62a8a50..83f004a72320 100644
  10009. --- a/include/linux/swait.h
  10010. +++ b/include/linux/swait.h
  10011. @@ -87,6 +87,7 @@ static inline int swait_active(struct swait_queue_head *q)
  10012. extern void swake_up(struct swait_queue_head *q);
  10013. extern void swake_up_all(struct swait_queue_head *q);
  10014. extern void swake_up_locked(struct swait_queue_head *q);
  10015. +extern void swake_up_all_locked(struct swait_queue_head *q);
  10016. extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
  10017. extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
  10018. diff --git a/include/linux/swap.h b/include/linux/swap.h
  10019. index ad220359f1b0..2b7721949dd5 100644
  10020. --- a/include/linux/swap.h
  10021. +++ b/include/linux/swap.h
  10022. @@ -11,6 +11,7 @@
  10023. #include <linux/fs.h>
  10024. #include <linux/atomic.h>
  10025. #include <linux/page-flags.h>
  10026. +#include <linux/locallock.h>
  10027. #include <asm/page.h>
  10028. struct notifier_block;
  10029. @@ -252,7 +253,8 @@ struct swap_info_struct {
  10030. void *workingset_eviction(struct address_space *mapping, struct page *page);
  10031. bool workingset_refault(void *shadow);
  10032. void workingset_activation(struct page *page);
  10033. -extern struct list_lru workingset_shadow_nodes;
  10034. +extern struct list_lru __workingset_shadow_nodes;
  10035. +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
  10036. static inline unsigned int workingset_node_pages(struct radix_tree_node *node)
  10037. {
  10038. @@ -295,6 +297,7 @@ extern unsigned long nr_free_pagecache_pages(void);
  10039. /* linux/mm/swap.c */
  10040. +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock);
  10041. extern void lru_cache_add(struct page *);
  10042. extern void lru_cache_add_anon(struct page *page);
  10043. extern void lru_cache_add_file(struct page *page);
  10044. diff --git a/include/linux/swork.h b/include/linux/swork.h
  10045. new file mode 100644
  10046. index 000000000000..f175fa9a6016
  10047. --- /dev/null
  10048. +++ b/include/linux/swork.h
  10049. @@ -0,0 +1,24 @@
  10050. +#ifndef _LINUX_SWORK_H
  10051. +#define _LINUX_SWORK_H
  10052. +
  10053. +#include <linux/list.h>
  10054. +
  10055. +struct swork_event {
  10056. + struct list_head item;
  10057. + unsigned long flags;
  10058. + void (*func)(struct swork_event *);
  10059. +};
  10060. +
  10061. +static inline void INIT_SWORK(struct swork_event *event,
  10062. + void (*func)(struct swork_event *))
  10063. +{
  10064. + event->flags = 0;
  10065. + event->func = func;
  10066. +}
  10067. +
  10068. +bool swork_queue(struct swork_event *sev);
  10069. +
  10070. +int swork_get(void);
  10071. +void swork_put(void);
  10072. +
  10073. +#endif /* _LINUX_SWORK_H */
  10074. diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
  10075. index b4c2a485b28a..5580c2c8410d 100644
  10076. --- a/include/linux/thread_info.h
  10077. +++ b/include/linux/thread_info.h
  10078. @@ -103,7 +103,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
  10079. #define test_thread_flag(flag) \
  10080. test_ti_thread_flag(current_thread_info(), flag)
  10081. -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
  10082. +#ifdef CONFIG_PREEMPT_LAZY
  10083. +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \
  10084. + test_thread_flag(TIF_NEED_RESCHED_LAZY))
  10085. +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
  10086. +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY))
  10087. +
  10088. +#else
  10089. +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
  10090. +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
  10091. +#define tif_need_resched_lazy() 0
  10092. +#endif
  10093. #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
  10094. /*
  10095. diff --git a/include/linux/timer.h b/include/linux/timer.h
  10096. index 61aa61dc410c..a915acbd0072 100644
  10097. --- a/include/linux/timer.h
  10098. +++ b/include/linux/timer.h
  10099. @@ -19,7 +19,6 @@ struct timer_list {
  10100. void (*function)(unsigned long);
  10101. unsigned long data;
  10102. u32 flags;
  10103. - int slack;
  10104. #ifdef CONFIG_TIMER_STATS
  10105. int start_pid;
  10106. @@ -58,11 +57,14 @@ struct timer_list {
  10107. * workqueue locking issues. It's not meant for executing random crap
  10108. * with interrupts disabled. Abuse is monitored!
  10109. */
  10110. -#define TIMER_CPUMASK 0x0007FFFF
  10111. -#define TIMER_MIGRATING 0x00080000
  10112. +#define TIMER_CPUMASK 0x0003FFFF
  10113. +#define TIMER_MIGRATING 0x00040000
  10114. #define TIMER_BASEMASK (TIMER_CPUMASK | TIMER_MIGRATING)
  10115. -#define TIMER_DEFERRABLE 0x00100000
  10116. +#define TIMER_DEFERRABLE 0x00080000
  10117. +#define TIMER_PINNED 0x00100000
  10118. #define TIMER_IRQSAFE 0x00200000
  10119. +#define TIMER_ARRAYSHIFT 22
  10120. +#define TIMER_ARRAYMASK 0xFFC00000
  10121. #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \
  10122. .entry = { .next = TIMER_ENTRY_STATIC }, \
  10123. @@ -70,7 +72,6 @@ struct timer_list {
  10124. .expires = (_expires), \
  10125. .data = (_data), \
  10126. .flags = (_flags), \
  10127. - .slack = -1, \
  10128. __TIMER_LOCKDEP_MAP_INITIALIZER( \
  10129. __FILE__ ":" __stringify(__LINE__)) \
  10130. }
  10131. @@ -78,9 +79,15 @@ struct timer_list {
  10132. #define TIMER_INITIALIZER(_function, _expires, _data) \
  10133. __TIMER_INITIALIZER((_function), (_expires), (_data), 0)
  10134. +#define TIMER_PINNED_INITIALIZER(_function, _expires, _data) \
  10135. + __TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_PINNED)
  10136. +
  10137. #define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) \
  10138. __TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_DEFERRABLE)
  10139. +#define TIMER_PINNED_DEFERRED_INITIALIZER(_function, _expires, _data) \
  10140. + __TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_DEFERRABLE | TIMER_PINNED)
  10141. +
  10142. #define DEFINE_TIMER(_name, _function, _expires, _data) \
  10143. struct timer_list _name = \
  10144. TIMER_INITIALIZER(_function, _expires, _data)
  10145. @@ -124,8 +131,12 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
  10146. #define init_timer(timer) \
  10147. __init_timer((timer), 0)
  10148. +#define init_timer_pinned(timer) \
  10149. + __init_timer((timer), TIMER_PINNED)
  10150. #define init_timer_deferrable(timer) \
  10151. __init_timer((timer), TIMER_DEFERRABLE)
  10152. +#define init_timer_pinned_deferrable(timer) \
  10153. + __init_timer((timer), TIMER_DEFERRABLE | TIMER_PINNED)
  10154. #define init_timer_on_stack(timer) \
  10155. __init_timer_on_stack((timer), 0)
  10156. @@ -145,10 +156,20 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
  10157. #define setup_timer(timer, fn, data) \
  10158. __setup_timer((timer), (fn), (data), 0)
  10159. +#define setup_pinned_timer(timer, fn, data) \
  10160. + __setup_timer((timer), (fn), (data), TIMER_PINNED)
  10161. +#define setup_deferrable_timer(timer, fn, data) \
  10162. + __setup_timer((timer), (fn), (data), TIMER_DEFERRABLE)
  10163. +#define setup_pinned_deferrable_timer(timer, fn, data) \
  10164. + __setup_timer((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED)
  10165. #define setup_timer_on_stack(timer, fn, data) \
  10166. __setup_timer_on_stack((timer), (fn), (data), 0)
  10167. +#define setup_pinned_timer_on_stack(timer, fn, data) \
  10168. + __setup_timer_on_stack((timer), (fn), (data), TIMER_PINNED)
  10169. #define setup_deferrable_timer_on_stack(timer, fn, data) \
  10170. __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE)
  10171. +#define setup_pinned_deferrable_timer_on_stack(timer, fn, data) \
  10172. + __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED)
  10173. /**
  10174. * timer_pending - is a timer pending?
  10175. @@ -169,12 +190,7 @@ extern void add_timer_on(struct timer_list *timer, int cpu);
  10176. extern int del_timer(struct timer_list * timer);
  10177. extern int mod_timer(struct timer_list *timer, unsigned long expires);
  10178. extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
  10179. -extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
  10180. -extern void set_timer_slack(struct timer_list *time, int slack_hz);
  10181. -
  10182. -#define TIMER_NOT_PINNED 0
  10183. -#define TIMER_PINNED 1
  10184. /*
  10185. * The jiffies value which is added to now, when there is no timer
  10186. * in the timer wheel:
  10187. @@ -225,7 +241,7 @@ extern void add_timer(struct timer_list *timer);
  10188. extern int try_to_del_timer_sync(struct timer_list *timer);
  10189. -#ifdef CONFIG_SMP
  10190. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  10191. extern int del_timer_sync(struct timer_list *timer);
  10192. #else
  10193. # define del_timer_sync(t) del_timer(t)
  10194. diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
  10195. index 0810f81b6db2..682bfdf20f16 100644
  10196. --- a/include/linux/trace_events.h
  10197. +++ b/include/linux/trace_events.h
  10198. @@ -56,6 +56,9 @@ struct trace_entry {
  10199. unsigned char flags;
  10200. unsigned char preempt_count;
  10201. int pid;
  10202. + unsigned short migrate_disable;
  10203. + unsigned short padding;
  10204. + unsigned char preempt_lazy_count;
  10205. };
  10206. #define TRACE_EVENT_TYPE_MAX \
  10207. diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
  10208. index 349557825428..1d65e4f0668e 100644
  10209. --- a/include/linux/uaccess.h
  10210. +++ b/include/linux/uaccess.h
  10211. @@ -24,6 +24,7 @@ static __always_inline void pagefault_disabled_dec(void)
  10212. */
  10213. static inline void pagefault_disable(void)
  10214. {
  10215. + migrate_disable();
  10216. pagefault_disabled_inc();
  10217. /*
  10218. * make sure to have issued the store before a pagefault
  10219. @@ -40,6 +41,7 @@ static inline void pagefault_enable(void)
  10220. */
  10221. barrier();
  10222. pagefault_disabled_dec();
  10223. + migrate_enable();
  10224. }
  10225. /*
  10226. diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
  10227. index 4a29c75b146e..0a294e950df8 100644
  10228. --- a/include/linux/uprobes.h
  10229. +++ b/include/linux/uprobes.h
  10230. @@ -27,6 +27,7 @@
  10231. #include <linux/errno.h>
  10232. #include <linux/rbtree.h>
  10233. #include <linux/types.h>
  10234. +#include <linux/wait.h>
  10235. struct vm_area_struct;
  10236. struct mm_struct;
  10237. diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
  10238. index 73fae8c4a5fb..de8a124fd65d 100644
  10239. --- a/include/linux/vmstat.h
  10240. +++ b/include/linux/vmstat.h
  10241. @@ -33,7 +33,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
  10242. */
  10243. static inline void __count_vm_event(enum vm_event_item item)
  10244. {
  10245. + preempt_disable_rt();
  10246. raw_cpu_inc(vm_event_states.event[item]);
  10247. + preempt_enable_rt();
  10248. }
  10249. static inline void count_vm_event(enum vm_event_item item)
  10250. @@ -43,7 +45,9 @@ static inline void count_vm_event(enum vm_event_item item)
  10251. static inline void __count_vm_events(enum vm_event_item item, long delta)
  10252. {
  10253. + preempt_disable_rt();
  10254. raw_cpu_add(vm_event_states.event[item], delta);
  10255. + preempt_enable_rt();
  10256. }
  10257. static inline void count_vm_events(enum vm_event_item item, long delta)
  10258. diff --git a/include/linux/wait.h b/include/linux/wait.h
  10259. index 27d7a0ab5da3..7b3542de6db2 100644
  10260. --- a/include/linux/wait.h
  10261. +++ b/include/linux/wait.h
  10262. @@ -8,6 +8,7 @@
  10263. #include <linux/spinlock.h>
  10264. #include <asm/current.h>
  10265. #include <uapi/linux/wait.h>
  10266. +#include <linux/atomic.h>
  10267. typedef struct __wait_queue wait_queue_t;
  10268. typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
  10269. diff --git a/include/net/dst.h b/include/net/dst.h
  10270. index 5c98443c1c9e..22b81c7cec27 100644
  10271. --- a/include/net/dst.h
  10272. +++ b/include/net/dst.h
  10273. @@ -449,7 +449,7 @@ static inline void dst_confirm(struct dst_entry *dst)
  10274. static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
  10275. struct sk_buff *skb)
  10276. {
  10277. - const struct hh_cache *hh;
  10278. + struct hh_cache *hh;
  10279. if (dst->pending_confirm) {
  10280. unsigned long now = jiffies;
  10281. diff --git a/include/net/neighbour.h b/include/net/neighbour.h
  10282. index 8b683841e574..bf656008f6e7 100644
  10283. --- a/include/net/neighbour.h
  10284. +++ b/include/net/neighbour.h
  10285. @@ -446,7 +446,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
  10286. }
  10287. #endif
  10288. -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
  10289. +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
  10290. {
  10291. unsigned int seq;
  10292. int hh_len;
  10293. @@ -501,7 +501,7 @@ struct neighbour_cb {
  10294. #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
  10295. -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
  10296. +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n,
  10297. const struct net_device *dev)
  10298. {
  10299. unsigned int seq;
  10300. diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
  10301. index a69cde3ce460..bee40aadee1f 100644
  10302. --- a/include/net/netns/ipv4.h
  10303. +++ b/include/net/netns/ipv4.h
  10304. @@ -70,6 +70,7 @@ struct netns_ipv4 {
  10305. int sysctl_icmp_echo_ignore_all;
  10306. int sysctl_icmp_echo_ignore_broadcasts;
  10307. + int sysctl_icmp_echo_sysrq;
  10308. int sysctl_icmp_ignore_bogus_error_responses;
  10309. int sysctl_icmp_ratelimit;
  10310. int sysctl_icmp_ratemask;
  10311. diff --git a/include/trace/events/hist.h b/include/trace/events/hist.h
  10312. new file mode 100644
  10313. index 000000000000..f7710de1b1f3
  10314. --- /dev/null
  10315. +++ b/include/trace/events/hist.h
  10316. @@ -0,0 +1,73 @@
  10317. +#undef TRACE_SYSTEM
  10318. +#define TRACE_SYSTEM hist
  10319. +
  10320. +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ)
  10321. +#define _TRACE_HIST_H
  10322. +
  10323. +#include "latency_hist.h"
  10324. +#include <linux/tracepoint.h>
  10325. +
  10326. +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST)
  10327. +#define trace_preemptirqsoff_hist(a, b)
  10328. +#define trace_preemptirqsoff_hist_rcuidle(a, b)
  10329. +#else
  10330. +TRACE_EVENT(preemptirqsoff_hist,
  10331. +
  10332. + TP_PROTO(int reason, int starthist),
  10333. +
  10334. + TP_ARGS(reason, starthist),
  10335. +
  10336. + TP_STRUCT__entry(
  10337. + __field(int, reason)
  10338. + __field(int, starthist)
  10339. + ),
  10340. +
  10341. + TP_fast_assign(
  10342. + __entry->reason = reason;
  10343. + __entry->starthist = starthist;
  10344. + ),
  10345. +
  10346. + TP_printk("reason=%s starthist=%s", getaction(__entry->reason),
  10347. + __entry->starthist ? "start" : "stop")
  10348. +);
  10349. +#endif
  10350. +
  10351. +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST
  10352. +#define trace_hrtimer_interrupt(a, b, c, d)
  10353. +#else
  10354. +TRACE_EVENT(hrtimer_interrupt,
  10355. +
  10356. + TP_PROTO(int cpu, long long offset, struct task_struct *curr,
  10357. + struct task_struct *task),
  10358. +
  10359. + TP_ARGS(cpu, offset, curr, task),
  10360. +
  10361. + TP_STRUCT__entry(
  10362. + __field(int, cpu)
  10363. + __field(long long, offset)
  10364. + __array(char, ccomm, TASK_COMM_LEN)
  10365. + __field(int, cprio)
  10366. + __array(char, tcomm, TASK_COMM_LEN)
  10367. + __field(int, tprio)
  10368. + ),
  10369. +
  10370. + TP_fast_assign(
  10371. + __entry->cpu = cpu;
  10372. + __entry->offset = offset;
  10373. + memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN);
  10374. + __entry->cprio = curr->prio;
  10375. + memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>",
  10376. + task != NULL ? TASK_COMM_LEN : 7);
  10377. + __entry->tprio = task != NULL ? task->prio : -1;
  10378. + ),
  10379. +
  10380. + TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]",
  10381. + __entry->cpu, __entry->offset, __entry->ccomm,
  10382. + __entry->cprio, __entry->tcomm, __entry->tprio)
  10383. +);
  10384. +#endif
  10385. +
  10386. +#endif /* _TRACE_HIST_H */
  10387. +
  10388. +/* This part must be outside protection */
  10389. +#include <trace/define_trace.h>
  10390. diff --git a/include/trace/events/latency_hist.h b/include/trace/events/latency_hist.h
  10391. new file mode 100644
  10392. index 000000000000..d3f2fbd560b1
  10393. --- /dev/null
  10394. +++ b/include/trace/events/latency_hist.h
  10395. @@ -0,0 +1,29 @@
  10396. +#ifndef _LATENCY_HIST_H
  10397. +#define _LATENCY_HIST_H
  10398. +
  10399. +enum hist_action {
  10400. + IRQS_ON,
  10401. + PREEMPT_ON,
  10402. + TRACE_STOP,
  10403. + IRQS_OFF,
  10404. + PREEMPT_OFF,
  10405. + TRACE_START,
  10406. +};
  10407. +
  10408. +static char *actions[] = {
  10409. + "IRQS_ON",
  10410. + "PREEMPT_ON",
  10411. + "TRACE_STOP",
  10412. + "IRQS_OFF",
  10413. + "PREEMPT_OFF",
  10414. + "TRACE_START",
  10415. +};
  10416. +
  10417. +static inline char *getaction(int action)
  10418. +{
  10419. + if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0]))
  10420. + return actions[action];
  10421. + return "unknown";
  10422. +}
  10423. +
  10424. +#endif /* _LATENCY_HIST_H */
  10425. diff --git a/init/Kconfig b/init/Kconfig
  10426. index 0dfd09d54c65..a74c5a0d8376 100644
  10427. --- a/init/Kconfig
  10428. +++ b/init/Kconfig
  10429. @@ -494,7 +494,7 @@ config TINY_RCU
  10430. config RCU_EXPERT
  10431. bool "Make expert-level adjustments to RCU configuration"
  10432. - default n
  10433. + default y if PREEMPT_RT_FULL
  10434. help
  10435. This option needs to be enabled if you wish to make
  10436. expert-level adjustments to RCU configuration. By default,
  10437. @@ -610,7 +610,7 @@ config RCU_FANOUT_LEAF
  10438. config RCU_FAST_NO_HZ
  10439. bool "Accelerate last non-dyntick-idle CPU's grace periods"
  10440. - depends on NO_HZ_COMMON && SMP && RCU_EXPERT
  10441. + depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL
  10442. default n
  10443. help
  10444. This option permits CPUs to enter dynticks-idle state even if
  10445. @@ -637,7 +637,7 @@ config TREE_RCU_TRACE
  10446. config RCU_BOOST
  10447. bool "Enable RCU priority boosting"
  10448. depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
  10449. - default n
  10450. + default y if PREEMPT_RT_FULL
  10451. help
  10452. This option boosts the priority of preempted RCU readers that
  10453. block the current preemptible RCU grace period for too long.
  10454. @@ -1029,6 +1029,7 @@ config CFS_BANDWIDTH
  10455. config RT_GROUP_SCHED
  10456. bool "Group scheduling for SCHED_RR/FIFO"
  10457. depends on CGROUP_SCHED
  10458. + depends on !PREEMPT_RT_FULL
  10459. default n
  10460. help
  10461. This feature lets you explicitly allocate real CPU bandwidth
  10462. @@ -1717,6 +1718,7 @@ choice
  10463. config SLAB
  10464. bool "SLAB"
  10465. + depends on !PREEMPT_RT_FULL
  10466. help
  10467. The regular slab allocator that is established and known to work
  10468. well in all environments. It organizes cache hot objects in
  10469. @@ -1735,6 +1737,7 @@ config SLUB
  10470. config SLOB
  10471. depends on EXPERT
  10472. bool "SLOB (Simple Allocator)"
  10473. + depends on !PREEMPT_RT_FULL
  10474. help
  10475. SLOB replaces the stock allocator with a drastically simpler
  10476. allocator. SLOB is generally more space efficient but
  10477. @@ -1744,7 +1747,7 @@ endchoice
  10478. config SLUB_CPU_PARTIAL
  10479. default y
  10480. - depends on SLUB && SMP
  10481. + depends on SLUB && SMP && !PREEMPT_RT_FULL
  10482. bool "SLUB per cpu partial cache"
  10483. help
  10484. Per cpu partial caches accellerate objects allocation and freeing
  10485. diff --git a/init/Makefile b/init/Makefile
  10486. index 7bc47ee31c36..88cf473554e0 100644
  10487. --- a/init/Makefile
  10488. +++ b/init/Makefile
  10489. @@ -33,4 +33,4 @@ $(obj)/version.o: include/generated/compile.h
  10490. include/generated/compile.h: FORCE
  10491. @$($(quiet)chk_compile.h)
  10492. $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
  10493. - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
  10494. + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)"
  10495. diff --git a/init/main.c b/init/main.c
  10496. index b3c6e363ae18..920f9ebd3d95 100644
  10497. --- a/init/main.c
  10498. +++ b/init/main.c
  10499. @@ -507,6 +507,7 @@ asmlinkage __visible void __init start_kernel(void)
  10500. setup_command_line(command_line);
  10501. setup_nr_cpu_ids();
  10502. setup_per_cpu_areas();
  10503. + softirq_early_init();
  10504. boot_cpu_state_init();
  10505. smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
  10506. diff --git a/ipc/msg.c b/ipc/msg.c
  10507. index 1471db9a7e61..b8c5e7f2bebc 100644
  10508. --- a/ipc/msg.c
  10509. +++ b/ipc/msg.c
  10510. @@ -183,20 +183,14 @@ static void ss_wakeup(struct list_head *h, int kill)
  10511. }
  10512. }
  10513. -static void expunge_all(struct msg_queue *msq, int res)
  10514. +static void expunge_all(struct msg_queue *msq, int res,
  10515. + struct wake_q_head *wake_q)
  10516. {
  10517. struct msg_receiver *msr, *t;
  10518. list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
  10519. - msr->r_msg = NULL; /* initialize expunge ordering */
  10520. - wake_up_process(msr->r_tsk);
  10521. - /*
  10522. - * Ensure that the wakeup is visible before setting r_msg as
  10523. - * the receiving end depends on it: either spinning on a nil,
  10524. - * or dealing with -EAGAIN cases. See lockless receive part 1
  10525. - * and 2 in do_msgrcv().
  10526. - */
  10527. - smp_wmb(); /* barrier (B) */
  10528. +
  10529. + wake_q_add(wake_q, msr->r_tsk);
  10530. msr->r_msg = ERR_PTR(res);
  10531. }
  10532. }
  10533. @@ -213,11 +207,13 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
  10534. {
  10535. struct msg_msg *msg, *t;
  10536. struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
  10537. + WAKE_Q(wake_q);
  10538. - expunge_all(msq, -EIDRM);
  10539. + expunge_all(msq, -EIDRM, &wake_q);
  10540. ss_wakeup(&msq->q_senders, 1);
  10541. msg_rmid(ns, msq);
  10542. ipc_unlock_object(&msq->q_perm);
  10543. + wake_up_q(&wake_q);
  10544. rcu_read_unlock();
  10545. list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
  10546. @@ -342,6 +338,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
  10547. struct kern_ipc_perm *ipcp;
  10548. struct msqid64_ds uninitialized_var(msqid64);
  10549. struct msg_queue *msq;
  10550. + WAKE_Q(wake_q);
  10551. int err;
  10552. if (cmd == IPC_SET) {
  10553. @@ -389,7 +386,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
  10554. /* sleeping receivers might be excluded by
  10555. * stricter permissions.
  10556. */
  10557. - expunge_all(msq, -EAGAIN);
  10558. + expunge_all(msq, -EAGAIN, &wake_q);
  10559. /* sleeping senders might be able to send
  10560. * due to a larger queue size.
  10561. */
  10562. @@ -402,6 +399,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
  10563. out_unlock0:
  10564. ipc_unlock_object(&msq->q_perm);
  10565. + wake_up_q(&wake_q);
  10566. out_unlock1:
  10567. rcu_read_unlock();
  10568. out_up:
  10569. @@ -566,7 +564,8 @@ static int testmsg(struct msg_msg *msg, long type, int mode)
  10570. return 0;
  10571. }
  10572. -static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
  10573. +static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
  10574. + struct wake_q_head *wake_q)
  10575. {
  10576. struct msg_receiver *msr, *t;
  10577. @@ -577,27 +576,13 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
  10578. list_del(&msr->r_list);
  10579. if (msr->r_maxsize < msg->m_ts) {
  10580. - /* initialize pipelined send ordering */
  10581. - msr->r_msg = NULL;
  10582. - wake_up_process(msr->r_tsk);
  10583. - /* barrier (B) see barrier comment below */
  10584. - smp_wmb();
  10585. + wake_q_add(wake_q, msr->r_tsk);
  10586. msr->r_msg = ERR_PTR(-E2BIG);
  10587. } else {
  10588. - msr->r_msg = NULL;
  10589. msq->q_lrpid = task_pid_vnr(msr->r_tsk);
  10590. msq->q_rtime = get_seconds();
  10591. - wake_up_process(msr->r_tsk);
  10592. - /*
  10593. - * Ensure that the wakeup is visible before
  10594. - * setting r_msg, as the receiving can otherwise
  10595. - * exit - once r_msg is set, the receiver can
  10596. - * continue. See lockless receive part 1 and 2
  10597. - * in do_msgrcv(). Barrier (B).
  10598. - */
  10599. - smp_wmb();
  10600. + wake_q_add(wake_q, msr->r_tsk);
  10601. msr->r_msg = msg;
  10602. -
  10603. return 1;
  10604. }
  10605. }
  10606. @@ -613,6 +598,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
  10607. struct msg_msg *msg;
  10608. int err;
  10609. struct ipc_namespace *ns;
  10610. + WAKE_Q(wake_q);
  10611. ns = current->nsproxy->ipc_ns;
  10612. @@ -698,7 +684,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
  10613. msq->q_lspid = task_tgid_vnr(current);
  10614. msq->q_stime = get_seconds();
  10615. - if (!pipelined_send(msq, msg)) {
  10616. + if (!pipelined_send(msq, msg, &wake_q)) {
  10617. /* no one is waiting for this message, enqueue it */
  10618. list_add_tail(&msg->m_list, &msq->q_messages);
  10619. msq->q_cbytes += msgsz;
  10620. @@ -712,6 +698,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
  10621. out_unlock0:
  10622. ipc_unlock_object(&msq->q_perm);
  10623. + wake_up_q(&wake_q);
  10624. out_unlock1:
  10625. rcu_read_unlock();
  10626. if (msg != NULL)
  10627. @@ -932,57 +919,25 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
  10628. rcu_read_lock();
  10629. /* Lockless receive, part 2:
  10630. - * Wait until pipelined_send or expunge_all are outside of
  10631. - * wake_up_process(). There is a race with exit(), see
  10632. - * ipc/mqueue.c for the details. The correct serialization
  10633. - * ensures that a receiver cannot continue without the wakeup
  10634. - * being visibible _before_ setting r_msg:
  10635. + * The work in pipelined_send() and expunge_all():
  10636. + * - Set pointer to message
  10637. + * - Queue the receiver task for later wakeup
  10638. + * - Wake up the process after the lock is dropped.
  10639. *
  10640. - * CPU 0 CPU 1
  10641. - * <loop receiver>
  10642. - * smp_rmb(); (A) <-- pair -. <waker thread>
  10643. - * <load ->r_msg> | msr->r_msg = NULL;
  10644. - * | wake_up_process();
  10645. - * <continue> `------> smp_wmb(); (B)
  10646. - * msr->r_msg = msg;
  10647. - *
  10648. - * Where (A) orders the message value read and where (B) orders
  10649. - * the write to the r_msg -- done in both pipelined_send and
  10650. - * expunge_all.
  10651. + * Should the process wake up before this wakeup (due to a
  10652. + * signal) it will either see the message and continue …
  10653. */
  10654. - for (;;) {
  10655. - /*
  10656. - * Pairs with writer barrier in pipelined_send
  10657. - * or expunge_all.
  10658. - */
  10659. - smp_rmb(); /* barrier (A) */
  10660. - msg = (struct msg_msg *)msr_d.r_msg;
  10661. - if (msg)
  10662. - break;
  10663. - /*
  10664. - * The cpu_relax() call is a compiler barrier
  10665. - * which forces everything in this loop to be
  10666. - * re-loaded.
  10667. - */
  10668. - cpu_relax();
  10669. - }
  10670. -
  10671. - /* Lockless receive, part 3:
  10672. - * If there is a message or an error then accept it without
  10673. - * locking.
  10674. - */
  10675. + msg = (struct msg_msg *)msr_d.r_msg;
  10676. if (msg != ERR_PTR(-EAGAIN))
  10677. goto out_unlock1;
  10678. - /* Lockless receive, part 3:
  10679. - * Acquire the queue spinlock.
  10680. - */
  10681. + /*
  10682. + * … or see -EAGAIN, acquire the lock to check the message
  10683. + * again.
  10684. + */
  10685. ipc_lock_object(&msq->q_perm);
  10686. - /* Lockless receive, part 4:
  10687. - * Repeat test after acquiring the spinlock.
  10688. - */
  10689. msg = (struct msg_msg *)msr_d.r_msg;
  10690. if (msg != ERR_PTR(-EAGAIN))
  10691. goto out_unlock0;
  10692. diff --git a/ipc/sem.c b/ipc/sem.c
  10693. index b3757ea0694b..981a60ac6b6c 100644
  10694. --- a/ipc/sem.c
  10695. +++ b/ipc/sem.c
  10696. @@ -697,6 +697,13 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
  10697. static void wake_up_sem_queue_prepare(struct list_head *pt,
  10698. struct sem_queue *q, int error)
  10699. {
  10700. +#ifdef CONFIG_PREEMPT_RT_BASE
  10701. + struct task_struct *p = q->sleeper;
  10702. + get_task_struct(p);
  10703. + q->status = error;
  10704. + wake_up_process(p);
  10705. + put_task_struct(p);
  10706. +#else
  10707. if (list_empty(pt)) {
  10708. /*
  10709. * Hold preempt off so that we don't get preempted and have the
  10710. @@ -708,6 +715,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt,
  10711. q->pid = error;
  10712. list_add_tail(&q->list, pt);
  10713. +#endif
  10714. }
  10715. /**
  10716. @@ -721,6 +729,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt,
  10717. */
  10718. static void wake_up_sem_queue_do(struct list_head *pt)
  10719. {
  10720. +#ifndef CONFIG_PREEMPT_RT_BASE
  10721. struct sem_queue *q, *t;
  10722. int did_something;
  10723. @@ -733,6 +742,7 @@ static void wake_up_sem_queue_do(struct list_head *pt)
  10724. }
  10725. if (did_something)
  10726. preempt_enable();
  10727. +#endif
  10728. }
  10729. static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
  10730. diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
  10731. index ebdb0043203a..b9e6aa7e5aa6 100644
  10732. --- a/kernel/Kconfig.locks
  10733. +++ b/kernel/Kconfig.locks
  10734. @@ -225,11 +225,11 @@ config ARCH_SUPPORTS_ATOMIC_RMW
  10735. config MUTEX_SPIN_ON_OWNER
  10736. def_bool y
  10737. - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
  10738. + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
  10739. config RWSEM_SPIN_ON_OWNER
  10740. def_bool y
  10741. - depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
  10742. + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
  10743. config LOCK_SPIN_ON_OWNER
  10744. def_bool y
  10745. diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
  10746. index 3f9c97419f02..11dbe26a8279 100644
  10747. --- a/kernel/Kconfig.preempt
  10748. +++ b/kernel/Kconfig.preempt
  10749. @@ -1,3 +1,16 @@
  10750. +config PREEMPT
  10751. + bool
  10752. + select PREEMPT_COUNT
  10753. +
  10754. +config PREEMPT_RT_BASE
  10755. + bool
  10756. + select PREEMPT
  10757. +
  10758. +config HAVE_PREEMPT_LAZY
  10759. + bool
  10760. +
  10761. +config PREEMPT_LAZY
  10762. + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL
  10763. choice
  10764. prompt "Preemption Model"
  10765. @@ -33,9 +46,9 @@ config PREEMPT_VOLUNTARY
  10766. Select this if you are building a kernel for a desktop system.
  10767. -config PREEMPT
  10768. +config PREEMPT__LL
  10769. bool "Preemptible Kernel (Low-Latency Desktop)"
  10770. - select PREEMPT_COUNT
  10771. + select PREEMPT
  10772. select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
  10773. help
  10774. This option reduces the latency of the kernel by making
  10775. @@ -52,6 +65,22 @@ config PREEMPT
  10776. embedded system with latency requirements in the milliseconds
  10777. range.
  10778. +config PREEMPT_RTB
  10779. + bool "Preemptible Kernel (Basic RT)"
  10780. + select PREEMPT_RT_BASE
  10781. + help
  10782. + This option is basically the same as (Low-Latency Desktop) but
  10783. + enables changes which are preliminary for the full preemptible
  10784. + RT kernel.
  10785. +
  10786. +config PREEMPT_RT_FULL
  10787. + bool "Fully Preemptible Kernel (RT)"
  10788. + depends on IRQ_FORCED_THREADING
  10789. + select PREEMPT_RT_BASE
  10790. + select PREEMPT_RCU
  10791. + help
  10792. + All and everything
  10793. +
  10794. endchoice
  10795. config PREEMPT_COUNT
  10796. diff --git a/kernel/cgroup.c b/kernel/cgroup.c
  10797. index 86cb5c6e8932..3920f49948f8 100644
  10798. --- a/kernel/cgroup.c
  10799. +++ b/kernel/cgroup.c
  10800. @@ -5005,10 +5005,10 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head)
  10801. queue_work(cgroup_destroy_wq, &css->destroy_work);
  10802. }
  10803. -static void css_release_work_fn(struct work_struct *work)
  10804. +static void css_release_work_fn(struct swork_event *sev)
  10805. {
  10806. struct cgroup_subsys_state *css =
  10807. - container_of(work, struct cgroup_subsys_state, destroy_work);
  10808. + container_of(sev, struct cgroup_subsys_state, destroy_swork);
  10809. struct cgroup_subsys *ss = css->ss;
  10810. struct cgroup *cgrp = css->cgroup;
  10811. @@ -5049,8 +5049,8 @@ static void css_release(struct percpu_ref *ref)
  10812. struct cgroup_subsys_state *css =
  10813. container_of(ref, struct cgroup_subsys_state, refcnt);
  10814. - INIT_WORK(&css->destroy_work, css_release_work_fn);
  10815. - queue_work(cgroup_destroy_wq, &css->destroy_work);
  10816. + INIT_SWORK(&css->destroy_swork, css_release_work_fn);
  10817. + swork_queue(&css->destroy_swork);
  10818. }
  10819. static void init_and_link_css(struct cgroup_subsys_state *css,
  10820. @@ -5694,6 +5694,7 @@ static int __init cgroup_wq_init(void)
  10821. */
  10822. cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
  10823. BUG_ON(!cgroup_destroy_wq);
  10824. + BUG_ON(swork_get());
  10825. /*
  10826. * Used to destroy pidlists and separate to serve as flush domain.
  10827. diff --git a/kernel/cpu.c b/kernel/cpu.c
  10828. index 3e3f6e49eabb..e4d7491c1c12 100644
  10829. --- a/kernel/cpu.c
  10830. +++ b/kernel/cpu.c
  10831. @@ -152,8 +152,8 @@ static struct {
  10832. #endif
  10833. } cpu_hotplug = {
  10834. .active_writer = NULL,
  10835. - .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
  10836. .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
  10837. + .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
  10838. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  10839. .dep_map = {.name = "cpu_hotplug.lock" },
  10840. #endif
  10841. @@ -166,6 +166,289 @@ static struct {
  10842. #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
  10843. #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
  10844. +/**
  10845. + * hotplug_pcp - per cpu hotplug descriptor
  10846. + * @unplug: set when pin_current_cpu() needs to sync tasks
  10847. + * @sync_tsk: the task that waits for tasks to finish pinned sections
  10848. + * @refcount: counter of tasks in pinned sections
  10849. + * @grab_lock: set when the tasks entering pinned sections should wait
  10850. + * @synced: notifier for @sync_tsk to tell cpu_down it's finished
  10851. + * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
  10852. + * @mutex_init: zero if the mutex hasn't been initialized yet.
  10853. + *
  10854. + * Although @unplug and @sync_tsk may point to the same task, the @unplug
  10855. + * is used as a flag and still exists after @sync_tsk has exited and
  10856. + * @sync_tsk set to NULL.
  10857. + */
  10858. +struct hotplug_pcp {
  10859. + struct task_struct *unplug;
  10860. + struct task_struct *sync_tsk;
  10861. + int refcount;
  10862. + int grab_lock;
  10863. + struct completion synced;
  10864. + struct completion unplug_wait;
  10865. +#ifdef CONFIG_PREEMPT_RT_FULL
  10866. + /*
  10867. + * Note, on PREEMPT_RT, the hotplug lock must save the state of
  10868. + * the task, otherwise the mutex will cause the task to fail
  10869. + * to sleep when required. (Because it's called from migrate_disable())
  10870. + *
  10871. + * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
  10872. + * state.
  10873. + */
  10874. + spinlock_t lock;
  10875. +#else
  10876. + struct mutex mutex;
  10877. +#endif
  10878. + int mutex_init;
  10879. +};
  10880. +
  10881. +#ifdef CONFIG_PREEMPT_RT_FULL
  10882. +# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock)
  10883. +# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock)
  10884. +#else
  10885. +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
  10886. +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
  10887. +#endif
  10888. +
  10889. +static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
  10890. +
  10891. +/**
  10892. + * pin_current_cpu - Prevent the current cpu from being unplugged
  10893. + *
  10894. + * Lightweight version of get_online_cpus() to prevent cpu from being
  10895. + * unplugged when code runs in a migration disabled region.
  10896. + *
  10897. + * Must be called with preemption disabled (preempt_count = 1)!
  10898. + */
  10899. +void pin_current_cpu(void)
  10900. +{
  10901. + struct hotplug_pcp *hp;
  10902. + int force = 0;
  10903. +
  10904. +retry:
  10905. + hp = this_cpu_ptr(&hotplug_pcp);
  10906. +
  10907. + if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
  10908. + hp->unplug == current) {
  10909. + hp->refcount++;
  10910. + return;
  10911. + }
  10912. + if (hp->grab_lock) {
  10913. + preempt_enable();
  10914. + hotplug_lock(hp);
  10915. + hotplug_unlock(hp);
  10916. + } else {
  10917. + preempt_enable();
  10918. + /*
  10919. + * Try to push this task off of this CPU.
  10920. + */
  10921. + if (!migrate_me()) {
  10922. + preempt_disable();
  10923. + hp = this_cpu_ptr(&hotplug_pcp);
  10924. + if (!hp->grab_lock) {
  10925. + /*
  10926. + * Just let it continue it's already pinned
  10927. + * or about to sleep.
  10928. + */
  10929. + force = 1;
  10930. + goto retry;
  10931. + }
  10932. + preempt_enable();
  10933. + }
  10934. + }
  10935. + preempt_disable();
  10936. + goto retry;
  10937. +}
  10938. +
  10939. +/**
  10940. + * unpin_current_cpu - Allow unplug of current cpu
  10941. + *
  10942. + * Must be called with preemption or interrupts disabled!
  10943. + */
  10944. +void unpin_current_cpu(void)
  10945. +{
  10946. + struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
  10947. +
  10948. + WARN_ON(hp->refcount <= 0);
  10949. +
  10950. + /* This is safe. sync_unplug_thread is pinned to this cpu */
  10951. + if (!--hp->refcount && hp->unplug && hp->unplug != current)
  10952. + wake_up_process(hp->unplug);
  10953. +}
  10954. +
  10955. +static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
  10956. +{
  10957. + set_current_state(TASK_UNINTERRUPTIBLE);
  10958. + while (hp->refcount) {
  10959. + schedule_preempt_disabled();
  10960. + set_current_state(TASK_UNINTERRUPTIBLE);
  10961. + }
  10962. +}
  10963. +
  10964. +static int sync_unplug_thread(void *data)
  10965. +{
  10966. + struct hotplug_pcp *hp = data;
  10967. +
  10968. + wait_for_completion(&hp->unplug_wait);
  10969. + preempt_disable();
  10970. + hp->unplug = current;
  10971. + wait_for_pinned_cpus(hp);
  10972. +
  10973. + /*
  10974. + * This thread will synchronize the cpu_down() with threads
  10975. + * that have pinned the CPU. When the pinned CPU count reaches
  10976. + * zero, we inform the cpu_down code to continue to the next step.
  10977. + */
  10978. + set_current_state(TASK_UNINTERRUPTIBLE);
  10979. + preempt_enable();
  10980. + complete(&hp->synced);
  10981. +
  10982. + /*
  10983. + * If all succeeds, the next step will need tasks to wait till
  10984. + * the CPU is offline before continuing. To do this, the grab_lock
  10985. + * is set and tasks going into pin_current_cpu() will block on the
  10986. + * mutex. But we still need to wait for those that are already in
  10987. + * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
  10988. + * will kick this thread out.
  10989. + */
  10990. + while (!hp->grab_lock && !kthread_should_stop()) {
  10991. + schedule();
  10992. + set_current_state(TASK_UNINTERRUPTIBLE);
  10993. + }
  10994. +
  10995. + /* Make sure grab_lock is seen before we see a stale completion */
  10996. + smp_mb();
  10997. +
  10998. + /*
  10999. + * Now just before cpu_down() enters stop machine, we need to make
  11000. + * sure all tasks that are in pinned CPU sections are out, and new
  11001. + * tasks will now grab the lock, keeping them from entering pinned
  11002. + * CPU sections.
  11003. + */
  11004. + if (!kthread_should_stop()) {
  11005. + preempt_disable();
  11006. + wait_for_pinned_cpus(hp);
  11007. + preempt_enable();
  11008. + complete(&hp->synced);
  11009. + }
  11010. +
  11011. + set_current_state(TASK_UNINTERRUPTIBLE);
  11012. + while (!kthread_should_stop()) {
  11013. + schedule();
  11014. + set_current_state(TASK_UNINTERRUPTIBLE);
  11015. + }
  11016. + set_current_state(TASK_RUNNING);
  11017. +
  11018. + /*
  11019. + * Force this thread off this CPU as it's going down and
  11020. + * we don't want any more work on this CPU.
  11021. + */
  11022. + current->flags &= ~PF_NO_SETAFFINITY;
  11023. + set_cpus_allowed_ptr(current, cpu_present_mask);
  11024. + migrate_me();
  11025. + return 0;
  11026. +}
  11027. +
  11028. +static void __cpu_unplug_sync(struct hotplug_pcp *hp)
  11029. +{
  11030. + wake_up_process(hp->sync_tsk);
  11031. + wait_for_completion(&hp->synced);
  11032. +}
  11033. +
  11034. +static void __cpu_unplug_wait(unsigned int cpu)
  11035. +{
  11036. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  11037. +
  11038. + complete(&hp->unplug_wait);
  11039. + wait_for_completion(&hp->synced);
  11040. +}
  11041. +
  11042. +/*
  11043. + * Start the sync_unplug_thread on the target cpu and wait for it to
  11044. + * complete.
  11045. + */
  11046. +static int cpu_unplug_begin(unsigned int cpu)
  11047. +{
  11048. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  11049. + int err;
  11050. +
  11051. + /* Protected by cpu_hotplug.lock */
  11052. + if (!hp->mutex_init) {
  11053. +#ifdef CONFIG_PREEMPT_RT_FULL
  11054. + spin_lock_init(&hp->lock);
  11055. +#else
  11056. + mutex_init(&hp->mutex);
  11057. +#endif
  11058. + hp->mutex_init = 1;
  11059. + }
  11060. +
  11061. + /* Inform the scheduler to migrate tasks off this CPU */
  11062. + tell_sched_cpu_down_begin(cpu);
  11063. +
  11064. + init_completion(&hp->synced);
  11065. + init_completion(&hp->unplug_wait);
  11066. +
  11067. + hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
  11068. + if (IS_ERR(hp->sync_tsk)) {
  11069. + err = PTR_ERR(hp->sync_tsk);
  11070. + hp->sync_tsk = NULL;
  11071. + return err;
  11072. + }
  11073. + kthread_bind(hp->sync_tsk, cpu);
  11074. +
  11075. + /*
  11076. + * Wait for tasks to get out of the pinned sections,
  11077. + * it's still OK if new tasks enter. Some CPU notifiers will
  11078. + * wait for tasks that are going to enter these sections and
  11079. + * we must not have them block.
  11080. + */
  11081. + wake_up_process(hp->sync_tsk);
  11082. + return 0;
  11083. +}
  11084. +
  11085. +static void cpu_unplug_sync(unsigned int cpu)
  11086. +{
  11087. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  11088. +
  11089. + init_completion(&hp->synced);
  11090. + /* The completion needs to be initialzied before setting grab_lock */
  11091. + smp_wmb();
  11092. +
  11093. + /* Grab the mutex before setting grab_lock */
  11094. + hotplug_lock(hp);
  11095. + hp->grab_lock = 1;
  11096. +
  11097. + /*
  11098. + * The CPU notifiers have been completed.
  11099. + * Wait for tasks to get out of pinned CPU sections and have new
  11100. + * tasks block until the CPU is completely down.
  11101. + */
  11102. + __cpu_unplug_sync(hp);
  11103. +
  11104. + /* All done with the sync thread */
  11105. + kthread_stop(hp->sync_tsk);
  11106. + hp->sync_tsk = NULL;
  11107. +}
  11108. +
  11109. +static void cpu_unplug_done(unsigned int cpu)
  11110. +{
  11111. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  11112. +
  11113. + hp->unplug = NULL;
  11114. + /* Let all tasks know cpu unplug is finished before cleaning up */
  11115. + smp_wmb();
  11116. +
  11117. + if (hp->sync_tsk)
  11118. + kthread_stop(hp->sync_tsk);
  11119. +
  11120. + if (hp->grab_lock) {
  11121. + hotplug_unlock(hp);
  11122. + /* protected by cpu_hotplug.lock */
  11123. + hp->grab_lock = 0;
  11124. + }
  11125. + tell_sched_cpu_down_done(cpu);
  11126. +}
  11127. void get_online_cpus(void)
  11128. {
  11129. @@ -718,10 +1001,14 @@ static int takedown_cpu(unsigned int cpu)
  11130. else
  11131. synchronize_rcu();
  11132. + __cpu_unplug_wait(cpu);
  11133. /* Park the smpboot threads */
  11134. kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
  11135. smpboot_park_threads(cpu);
  11136. + /* Notifiers are done. Don't let any more tasks pin this CPU. */
  11137. + cpu_unplug_sync(cpu);
  11138. +
  11139. /*
  11140. * Prevent irq alloc/free while the dying cpu reorganizes the
  11141. * interrupt affinities.
  11142. @@ -807,6 +1094,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
  11143. struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
  11144. int prev_state, ret = 0;
  11145. bool hasdied = false;
  11146. + int mycpu;
  11147. + cpumask_var_t cpumask;
  11148. + cpumask_var_t cpumask_org;
  11149. if (num_online_cpus() == 1)
  11150. return -EBUSY;
  11151. @@ -814,7 +1104,34 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
  11152. if (!cpu_present(cpu))
  11153. return -EINVAL;
  11154. + /* Move the downtaker off the unplug cpu */
  11155. + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
  11156. + return -ENOMEM;
  11157. + if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) {
  11158. + free_cpumask_var(cpumask);
  11159. + return -ENOMEM;
  11160. + }
  11161. +
  11162. + cpumask_copy(cpumask_org, tsk_cpus_allowed(current));
  11163. + cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
  11164. + set_cpus_allowed_ptr(current, cpumask);
  11165. + free_cpumask_var(cpumask);
  11166. + migrate_disable();
  11167. + mycpu = smp_processor_id();
  11168. + if (mycpu == cpu) {
  11169. + printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
  11170. + migrate_enable();
  11171. + ret = -EBUSY;
  11172. + goto restore_cpus;
  11173. + }
  11174. +
  11175. + migrate_enable();
  11176. cpu_hotplug_begin();
  11177. + ret = cpu_unplug_begin(cpu);
  11178. + if (ret) {
  11179. + printk("cpu_unplug_begin(%d) failed\n", cpu);
  11180. + goto out_cancel;
  11181. + }
  11182. cpuhp_tasks_frozen = tasks_frozen;
  11183. @@ -853,10 +1170,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
  11184. hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
  11185. out:
  11186. + cpu_unplug_done(cpu);
  11187. +out_cancel:
  11188. cpu_hotplug_done();
  11189. /* This post dead nonsense must die */
  11190. if (!ret && hasdied)
  11191. cpu_notify_nofail(CPU_POST_DEAD, cpu);
  11192. +restore_cpus:
  11193. + set_cpus_allowed_ptr(current, cpumask_org);
  11194. + free_cpumask_var(cpumask_org);
  11195. return ret;
  11196. }
  11197. diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
  11198. index fc1ef736253c..83c666537a7a 100644
  11199. --- a/kernel/debug/kdb/kdb_io.c
  11200. +++ b/kernel/debug/kdb/kdb_io.c
  11201. @@ -554,7 +554,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
  11202. int linecount;
  11203. int colcount;
  11204. int logging, saved_loglevel = 0;
  11205. - int saved_trap_printk;
  11206. int got_printf_lock = 0;
  11207. int retlen = 0;
  11208. int fnd, len;
  11209. @@ -565,8 +564,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
  11210. unsigned long uninitialized_var(flags);
  11211. preempt_disable();
  11212. - saved_trap_printk = kdb_trap_printk;
  11213. - kdb_trap_printk = 0;
  11214. /* Serialize kdb_printf if multiple cpus try to write at once.
  11215. * But if any cpu goes recursive in kdb, just print the output,
  11216. @@ -855,7 +852,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
  11217. } else {
  11218. __release(kdb_printf_lock);
  11219. }
  11220. - kdb_trap_printk = saved_trap_printk;
  11221. preempt_enable();
  11222. return retlen;
  11223. }
  11224. @@ -865,9 +861,11 @@ int kdb_printf(const char *fmt, ...)
  11225. va_list ap;
  11226. int r;
  11227. + kdb_trap_printk++;
  11228. va_start(ap, fmt);
  11229. r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap);
  11230. va_end(ap);
  11231. + kdb_trap_printk--;
  11232. return r;
  11233. }
  11234. diff --git a/kernel/events/core.c b/kernel/events/core.c
  11235. index a69c90cea05d..dc7b7c094abb 100644
  11236. --- a/kernel/events/core.c
  11237. +++ b/kernel/events/core.c
  11238. @@ -963,6 +963,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
  11239. raw_spin_lock_init(&cpuctx->hrtimer_lock);
  11240. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
  11241. timer->function = perf_mux_hrtimer_handler;
  11242. + timer->irqsafe = 1;
  11243. }
  11244. static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
  11245. @@ -7261,6 +7262,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
  11246. hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  11247. hwc->hrtimer.function = perf_swevent_hrtimer;
  11248. + hwc->hrtimer.irqsafe = 1;
  11249. /*
  11250. * Since hrtimers have a fixed rate, we can do a static freq->period
  11251. diff --git a/kernel/exit.c b/kernel/exit.c
  11252. index 79c7e38a203b..12da2d0ba120 100644
  11253. --- a/kernel/exit.c
  11254. +++ b/kernel/exit.c
  11255. @@ -143,7 +143,7 @@ static void __exit_signal(struct task_struct *tsk)
  11256. * Do this under ->siglock, we can race with another thread
  11257. * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
  11258. */
  11259. - flush_sigqueue(&tsk->pending);
  11260. + flush_task_sigqueue(tsk);
  11261. tsk->sighand = NULL;
  11262. spin_unlock(&sighand->siglock);
  11263. diff --git a/kernel/fork.c b/kernel/fork.c
  11264. index d277e83ed3e0..18cc63048297 100644
  11265. --- a/kernel/fork.c
  11266. +++ b/kernel/fork.c
  11267. @@ -253,7 +253,9 @@ static inline void put_signal_struct(struct signal_struct *sig)
  11268. if (atomic_dec_and_test(&sig->sigcnt))
  11269. free_signal_struct(sig);
  11270. }
  11271. -
  11272. +#ifdef CONFIG_PREEMPT_RT_BASE
  11273. +static
  11274. +#endif
  11275. void __put_task_struct(struct task_struct *tsk)
  11276. {
  11277. WARN_ON(!tsk->exit_state);
  11278. @@ -270,7 +272,18 @@ void __put_task_struct(struct task_struct *tsk)
  11279. if (!profile_handoff_task(tsk))
  11280. free_task(tsk);
  11281. }
  11282. +#ifndef CONFIG_PREEMPT_RT_BASE
  11283. EXPORT_SYMBOL_GPL(__put_task_struct);
  11284. +#else
  11285. +void __put_task_struct_cb(struct rcu_head *rhp)
  11286. +{
  11287. + struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu);
  11288. +
  11289. + __put_task_struct(tsk);
  11290. +
  11291. +}
  11292. +EXPORT_SYMBOL_GPL(__put_task_struct_cb);
  11293. +#endif
  11294. void __init __weak arch_task_cache_init(void) { }
  11295. @@ -699,6 +712,19 @@ void __mmdrop(struct mm_struct *mm)
  11296. }
  11297. EXPORT_SYMBOL_GPL(__mmdrop);
  11298. +#ifdef CONFIG_PREEMPT_RT_BASE
  11299. +/*
  11300. + * RCU callback for delayed mm drop. Not strictly rcu, but we don't
  11301. + * want another facility to make this work.
  11302. + */
  11303. +void __mmdrop_delayed(struct rcu_head *rhp)
  11304. +{
  11305. + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
  11306. +
  11307. + __mmdrop(mm);
  11308. +}
  11309. +#endif
  11310. +
  11311. /*
  11312. * Decrement the use count and release all resources for an mm.
  11313. */
  11314. @@ -1228,6 +1254,9 @@ static void rt_mutex_init_task(struct task_struct *p)
  11315. */
  11316. static void posix_cpu_timers_init(struct task_struct *tsk)
  11317. {
  11318. +#ifdef CONFIG_PREEMPT_RT_BASE
  11319. + tsk->posix_timer_list = NULL;
  11320. +#endif
  11321. tsk->cputime_expires.prof_exp = 0;
  11322. tsk->cputime_expires.virt_exp = 0;
  11323. tsk->cputime_expires.sched_exp = 0;
  11324. @@ -1352,6 +1381,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
  11325. spin_lock_init(&p->alloc_lock);
  11326. init_sigpending(&p->pending);
  11327. + p->sigqueue_cache = NULL;
  11328. p->utime = p->stime = p->gtime = 0;
  11329. p->utimescaled = p->stimescaled = 0;
  11330. diff --git a/kernel/futex.c b/kernel/futex.c
  11331. index 6555d5459e98..2d572edcac53 100644
  11332. --- a/kernel/futex.c
  11333. +++ b/kernel/futex.c
  11334. @@ -874,7 +874,9 @@ void exit_pi_state_list(struct task_struct *curr)
  11335. * task still owns the PI-state:
  11336. */
  11337. if (head->next != next) {
  11338. + raw_spin_unlock_irq(&curr->pi_lock);
  11339. spin_unlock(&hb->lock);
  11340. + raw_spin_lock_irq(&curr->pi_lock);
  11341. continue;
  11342. }
  11343. @@ -1269,6 +1271,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
  11344. struct futex_pi_state *pi_state = this->pi_state;
  11345. u32 uninitialized_var(curval), newval;
  11346. WAKE_Q(wake_q);
  11347. + WAKE_Q(wake_sleeper_q);
  11348. bool deboost;
  11349. int ret = 0;
  11350. @@ -1335,7 +1338,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
  11351. raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
  11352. - deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
  11353. + deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
  11354. + &wake_sleeper_q);
  11355. /*
  11356. * First unlock HB so the waiter does not spin on it once he got woken
  11357. @@ -1345,6 +1349,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
  11358. */
  11359. spin_unlock(&hb->lock);
  11360. wake_up_q(&wake_q);
  11361. + wake_up_q_sleeper(&wake_sleeper_q);
  11362. if (deboost)
  11363. rt_mutex_adjust_prio(current);
  11364. @@ -1894,6 +1899,16 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
  11365. requeue_pi_wake_futex(this, &key2, hb2);
  11366. drop_count++;
  11367. continue;
  11368. + } else if (ret == -EAGAIN) {
  11369. + /*
  11370. + * Waiter was woken by timeout or
  11371. + * signal and has set pi_blocked_on to
  11372. + * PI_WAKEUP_INPROGRESS before we
  11373. + * tried to enqueue it on the rtmutex.
  11374. + */
  11375. + this->pi_state = NULL;
  11376. + put_pi_state(pi_state);
  11377. + continue;
  11378. } else if (ret) {
  11379. /*
  11380. * rt_mutex_start_proxy_lock() detected a
  11381. @@ -2784,7 +2799,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  11382. struct hrtimer_sleeper timeout, *to = NULL;
  11383. struct rt_mutex_waiter rt_waiter;
  11384. struct rt_mutex *pi_mutex = NULL;
  11385. - struct futex_hash_bucket *hb;
  11386. + struct futex_hash_bucket *hb, *hb2;
  11387. union futex_key key2 = FUTEX_KEY_INIT;
  11388. struct futex_q q = futex_q_init;
  11389. int res, ret;
  11390. @@ -2809,10 +2824,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  11391. * The waiter is allocated on our stack, manipulated by the requeue
  11392. * code while we sleep on uaddr.
  11393. */
  11394. - debug_rt_mutex_init_waiter(&rt_waiter);
  11395. - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
  11396. - RB_CLEAR_NODE(&rt_waiter.tree_entry);
  11397. - rt_waiter.task = NULL;
  11398. + rt_mutex_init_waiter(&rt_waiter, false);
  11399. ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
  11400. if (unlikely(ret != 0))
  11401. @@ -2843,20 +2855,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  11402. /* Queue the futex_q, drop the hb lock, wait for wakeup. */
  11403. futex_wait_queue_me(hb, &q, to);
  11404. - spin_lock(&hb->lock);
  11405. - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  11406. - spin_unlock(&hb->lock);
  11407. - if (ret)
  11408. - goto out_put_keys;
  11409. + /*
  11410. + * On RT we must avoid races with requeue and trying to block
  11411. + * on two mutexes (hb->lock and uaddr2's rtmutex) by
  11412. + * serializing access to pi_blocked_on with pi_lock.
  11413. + */
  11414. + raw_spin_lock_irq(&current->pi_lock);
  11415. + if (current->pi_blocked_on) {
  11416. + /*
  11417. + * We have been requeued or are in the process of
  11418. + * being requeued.
  11419. + */
  11420. + raw_spin_unlock_irq(&current->pi_lock);
  11421. + } else {
  11422. + /*
  11423. + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS
  11424. + * prevents a concurrent requeue from moving us to the
  11425. + * uaddr2 rtmutex. After that we can safely acquire
  11426. + * (and possibly block on) hb->lock.
  11427. + */
  11428. + current->pi_blocked_on = PI_WAKEUP_INPROGRESS;
  11429. + raw_spin_unlock_irq(&current->pi_lock);
  11430. +
  11431. + spin_lock(&hb->lock);
  11432. +
  11433. + /*
  11434. + * Clean up pi_blocked_on. We might leak it otherwise
  11435. + * when we succeeded with the hb->lock in the fast
  11436. + * path.
  11437. + */
  11438. + raw_spin_lock_irq(&current->pi_lock);
  11439. + current->pi_blocked_on = NULL;
  11440. + raw_spin_unlock_irq(&current->pi_lock);
  11441. +
  11442. + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  11443. + spin_unlock(&hb->lock);
  11444. + if (ret)
  11445. + goto out_put_keys;
  11446. + }
  11447. /*
  11448. - * In order for us to be here, we know our q.key == key2, and since
  11449. - * we took the hb->lock above, we also know that futex_requeue() has
  11450. - * completed and we no longer have to concern ourselves with a wakeup
  11451. - * race with the atomic proxy lock acquisition by the requeue code. The
  11452. - * futex_requeue dropped our key1 reference and incremented our key2
  11453. - * reference count.
  11454. + * In order to be here, we have either been requeued, are in
  11455. + * the process of being requeued, or requeue successfully
  11456. + * acquired uaddr2 on our behalf. If pi_blocked_on was
  11457. + * non-null above, we may be racing with a requeue. Do not
  11458. + * rely on q->lock_ptr to be hb2->lock until after blocking on
  11459. + * hb->lock or hb2->lock. The futex_requeue dropped our key1
  11460. + * reference and incremented our key2 reference count.
  11461. */
  11462. + hb2 = hash_futex(&key2);
  11463. /* Check if the requeue code acquired the second futex for us. */
  11464. if (!q.rt_waiter) {
  11465. @@ -2865,14 +2912,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  11466. * did a lock-steal - fix up the PI-state in that case.
  11467. */
  11468. if (q.pi_state && (q.pi_state->owner != current)) {
  11469. - spin_lock(q.lock_ptr);
  11470. + spin_lock(&hb2->lock);
  11471. + BUG_ON(&hb2->lock != q.lock_ptr);
  11472. ret = fixup_pi_state_owner(uaddr2, &q, current);
  11473. /*
  11474. * Drop the reference to the pi state which
  11475. * the requeue_pi() code acquired for us.
  11476. */
  11477. put_pi_state(q.pi_state);
  11478. - spin_unlock(q.lock_ptr);
  11479. + spin_unlock(&hb2->lock);
  11480. }
  11481. } else {
  11482. /*
  11483. @@ -2885,7 +2933,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  11484. ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
  11485. debug_rt_mutex_free_waiter(&rt_waiter);
  11486. - spin_lock(q.lock_ptr);
  11487. + spin_lock(&hb2->lock);
  11488. + BUG_ON(&hb2->lock != q.lock_ptr);
  11489. /*
  11490. * Fixup the pi_state owner and possibly acquire the lock if we
  11491. * haven't already.
  11492. diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
  11493. index a15b5485b446..28c39c16f989 100644
  11494. --- a/kernel/irq/handle.c
  11495. +++ b/kernel/irq/handle.c
  11496. @@ -134,6 +134,8 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
  11497. irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
  11498. {
  11499. + struct pt_regs *regs = get_irq_regs();
  11500. + u64 ip = regs ? instruction_pointer(regs) : 0;
  11501. irqreturn_t retval = IRQ_NONE;
  11502. unsigned int flags = 0, irq = desc->irq_data.irq;
  11503. struct irqaction *action;
  11504. @@ -174,7 +176,11 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
  11505. retval |= res;
  11506. }
  11507. - add_interrupt_randomness(irq, flags);
  11508. +#ifdef CONFIG_PREEMPT_RT_FULL
  11509. + desc->random_ip = ip;
  11510. +#else
  11511. + add_interrupt_randomness(irq, flags, ip);
  11512. +#endif
  11513. if (!noirqdebug)
  11514. note_interrupt(desc, retval);
  11515. diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
  11516. index cc1cc641d653..f5e9fd5408d6 100644
  11517. --- a/kernel/irq/manage.c
  11518. +++ b/kernel/irq/manage.c
  11519. @@ -22,6 +22,7 @@
  11520. #include "internals.h"
  11521. #ifdef CONFIG_IRQ_FORCED_THREADING
  11522. +# ifndef CONFIG_PREEMPT_RT_BASE
  11523. __read_mostly bool force_irqthreads;
  11524. static int __init setup_forced_irqthreads(char *arg)
  11525. @@ -30,6 +31,7 @@ static int __init setup_forced_irqthreads(char *arg)
  11526. return 0;
  11527. }
  11528. early_param("threadirqs", setup_forced_irqthreads);
  11529. +# endif
  11530. #endif
  11531. static void __synchronize_hardirq(struct irq_desc *desc)
  11532. @@ -179,6 +181,62 @@ static inline void
  11533. irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
  11534. #endif
  11535. +#ifdef CONFIG_PREEMPT_RT_FULL
  11536. +static void _irq_affinity_notify(struct irq_affinity_notify *notify);
  11537. +static struct task_struct *set_affinity_helper;
  11538. +static LIST_HEAD(affinity_list);
  11539. +static DEFINE_RAW_SPINLOCK(affinity_list_lock);
  11540. +
  11541. +static int set_affinity_thread(void *unused)
  11542. +{
  11543. + while (1) {
  11544. + struct irq_affinity_notify *notify;
  11545. + int empty;
  11546. +
  11547. + set_current_state(TASK_INTERRUPTIBLE);
  11548. +
  11549. + raw_spin_lock_irq(&affinity_list_lock);
  11550. + empty = list_empty(&affinity_list);
  11551. + raw_spin_unlock_irq(&affinity_list_lock);
  11552. +
  11553. + if (empty)
  11554. + schedule();
  11555. + if (kthread_should_stop())
  11556. + break;
  11557. + set_current_state(TASK_RUNNING);
  11558. +try_next:
  11559. + notify = NULL;
  11560. +
  11561. + raw_spin_lock_irq(&affinity_list_lock);
  11562. + if (!list_empty(&affinity_list)) {
  11563. + notify = list_first_entry(&affinity_list,
  11564. + struct irq_affinity_notify, list);
  11565. + list_del_init(&notify->list);
  11566. + }
  11567. + raw_spin_unlock_irq(&affinity_list_lock);
  11568. +
  11569. + if (!notify)
  11570. + continue;
  11571. + _irq_affinity_notify(notify);
  11572. + goto try_next;
  11573. + }
  11574. + return 0;
  11575. +}
  11576. +
  11577. +static void init_helper_thread(void)
  11578. +{
  11579. + if (set_affinity_helper)
  11580. + return;
  11581. + set_affinity_helper = kthread_run(set_affinity_thread, NULL,
  11582. + "affinity-cb");
  11583. + WARN_ON(IS_ERR(set_affinity_helper));
  11584. +}
  11585. +#else
  11586. +
  11587. +static inline void init_helper_thread(void) { }
  11588. +
  11589. +#endif
  11590. +
  11591. int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
  11592. bool force)
  11593. {
  11594. @@ -218,7 +276,17 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
  11595. if (desc->affinity_notify) {
  11596. kref_get(&desc->affinity_notify->kref);
  11597. +
  11598. +#ifdef CONFIG_PREEMPT_RT_FULL
  11599. + raw_spin_lock(&affinity_list_lock);
  11600. + if (list_empty(&desc->affinity_notify->list))
  11601. + list_add_tail(&affinity_list,
  11602. + &desc->affinity_notify->list);
  11603. + raw_spin_unlock(&affinity_list_lock);
  11604. + wake_up_process(set_affinity_helper);
  11605. +#else
  11606. schedule_work(&desc->affinity_notify->work);
  11607. +#endif
  11608. }
  11609. irqd_set(data, IRQD_AFFINITY_SET);
  11610. @@ -256,10 +324,8 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
  11611. }
  11612. EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
  11613. -static void irq_affinity_notify(struct work_struct *work)
  11614. +static void _irq_affinity_notify(struct irq_affinity_notify *notify)
  11615. {
  11616. - struct irq_affinity_notify *notify =
  11617. - container_of(work, struct irq_affinity_notify, work);
  11618. struct irq_desc *desc = irq_to_desc(notify->irq);
  11619. cpumask_var_t cpumask;
  11620. unsigned long flags;
  11621. @@ -281,6 +347,13 @@ static void irq_affinity_notify(struct work_struct *work)
  11622. kref_put(&notify->kref, notify->release);
  11623. }
  11624. +static void irq_affinity_notify(struct work_struct *work)
  11625. +{
  11626. + struct irq_affinity_notify *notify =
  11627. + container_of(work, struct irq_affinity_notify, work);
  11628. + _irq_affinity_notify(notify);
  11629. +}
  11630. +
  11631. /**
  11632. * irq_set_affinity_notifier - control notification of IRQ affinity changes
  11633. * @irq: Interrupt for which to enable/disable notification
  11634. @@ -310,6 +383,8 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
  11635. notify->irq = irq;
  11636. kref_init(&notify->kref);
  11637. INIT_WORK(&notify->work, irq_affinity_notify);
  11638. + INIT_LIST_HEAD(&notify->list);
  11639. + init_helper_thread();
  11640. }
  11641. raw_spin_lock_irqsave(&desc->lock, flags);
  11642. @@ -863,7 +938,15 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
  11643. local_bh_disable();
  11644. ret = action->thread_fn(action->irq, action->dev_id);
  11645. irq_finalize_oneshot(desc, action);
  11646. - local_bh_enable();
  11647. + /*
  11648. + * Interrupts which have real time requirements can be set up
  11649. + * to avoid softirq processing in the thread handler. This is
  11650. + * safe as these interrupts do not raise soft interrupts.
  11651. + */
  11652. + if (irq_settings_no_softirq_call(desc))
  11653. + _local_bh_enable();
  11654. + else
  11655. + local_bh_enable();
  11656. return ret;
  11657. }
  11658. @@ -960,6 +1043,12 @@ static int irq_thread(void *data)
  11659. if (action_ret == IRQ_WAKE_THREAD)
  11660. irq_wake_secondary(desc, action);
  11661. +#ifdef CONFIG_PREEMPT_RT_FULL
  11662. + migrate_disable();
  11663. + add_interrupt_randomness(action->irq, 0,
  11664. + desc->random_ip ^ (unsigned long) action);
  11665. + migrate_enable();
  11666. +#endif
  11667. wake_threads_waitq(desc);
  11668. }
  11669. @@ -1313,6 +1402,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
  11670. irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
  11671. }
  11672. + if (new->flags & IRQF_NO_SOFTIRQ_CALL)
  11673. + irq_settings_set_no_softirq_call(desc);
  11674. +
  11675. /* Set default affinity mask once everything is setup */
  11676. setup_affinity(desc, mask);
  11677. @@ -1998,7 +2090,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state);
  11678. * This call sets the internal irqchip state of an interrupt,
  11679. * depending on the value of @which.
  11680. *
  11681. - * This function should be called with preemption disabled if the
  11682. + * This function should be called with migration disabled if the
  11683. * interrupt controller has per-cpu registers.
  11684. */
  11685. int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
  11686. diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
  11687. index 320579d89091..2df2d4445b1e 100644
  11688. --- a/kernel/irq/settings.h
  11689. +++ b/kernel/irq/settings.h
  11690. @@ -16,6 +16,7 @@ enum {
  11691. _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID,
  11692. _IRQ_IS_POLLED = IRQ_IS_POLLED,
  11693. _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY,
  11694. + _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL,
  11695. _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
  11696. };
  11697. @@ -30,6 +31,7 @@ enum {
  11698. #define IRQ_PER_CPU_DEVID GOT_YOU_MORON
  11699. #define IRQ_IS_POLLED GOT_YOU_MORON
  11700. #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON
  11701. +#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON
  11702. #undef IRQF_MODIFY_MASK
  11703. #define IRQF_MODIFY_MASK GOT_YOU_MORON
  11704. @@ -40,6 +42,16 @@ irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set)
  11705. desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
  11706. }
  11707. +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc)
  11708. +{
  11709. + return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL;
  11710. +}
  11711. +
  11712. +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc)
  11713. +{
  11714. + desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL;
  11715. +}
  11716. +
  11717. static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
  11718. {
  11719. return desc->status_use_accessors & _IRQ_PER_CPU;
  11720. diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
  11721. index 5707f97a3e6a..73f38dc7a7fb 100644
  11722. --- a/kernel/irq/spurious.c
  11723. +++ b/kernel/irq/spurious.c
  11724. @@ -442,6 +442,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
  11725. static int __init irqfixup_setup(char *str)
  11726. {
  11727. +#ifdef CONFIG_PREEMPT_RT_BASE
  11728. + pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
  11729. + return 1;
  11730. +#endif
  11731. irqfixup = 1;
  11732. printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
  11733. printk(KERN_WARNING "This may impact system performance.\n");
  11734. @@ -454,6 +458,10 @@ module_param(irqfixup, int, 0644);
  11735. static int __init irqpoll_setup(char *str)
  11736. {
  11737. +#ifdef CONFIG_PREEMPT_RT_BASE
  11738. + pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
  11739. + return 1;
  11740. +#endif
  11741. irqfixup = 2;
  11742. printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
  11743. "enabled\n");
  11744. diff --git a/kernel/irq_work.c b/kernel/irq_work.c
  11745. index bcf107ce0854..2899ba0d23d1 100644
  11746. --- a/kernel/irq_work.c
  11747. +++ b/kernel/irq_work.c
  11748. @@ -17,6 +17,7 @@
  11749. #include <linux/cpu.h>
  11750. #include <linux/notifier.h>
  11751. #include <linux/smp.h>
  11752. +#include <linux/interrupt.h>
  11753. #include <asm/processor.h>
  11754. @@ -65,6 +66,8 @@ void __weak arch_irq_work_raise(void)
  11755. */
  11756. bool irq_work_queue_on(struct irq_work *work, int cpu)
  11757. {
  11758. + struct llist_head *list;
  11759. +
  11760. /* All work should have been flushed before going offline */
  11761. WARN_ON_ONCE(cpu_is_offline(cpu));
  11762. @@ -75,7 +78,12 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
  11763. if (!irq_work_claim(work))
  11764. return false;
  11765. - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
  11766. + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ))
  11767. + list = &per_cpu(lazy_list, cpu);
  11768. + else
  11769. + list = &per_cpu(raised_list, cpu);
  11770. +
  11771. + if (llist_add(&work->llnode, list))
  11772. arch_send_call_function_single_ipi(cpu);
  11773. return true;
  11774. @@ -86,6 +94,9 @@ EXPORT_SYMBOL_GPL(irq_work_queue_on);
  11775. /* Enqueue the irq work @work on the current CPU */
  11776. bool irq_work_queue(struct irq_work *work)
  11777. {
  11778. + struct llist_head *list;
  11779. + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL);
  11780. +
  11781. /* Only queue if not already pending */
  11782. if (!irq_work_claim(work))
  11783. return false;
  11784. @@ -93,13 +104,15 @@ bool irq_work_queue(struct irq_work *work)
  11785. /* Queue the entry and raise the IPI if needed. */
  11786. preempt_disable();
  11787. - /* If the work is "lazy", handle it from next tick if any */
  11788. - if (work->flags & IRQ_WORK_LAZY) {
  11789. - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
  11790. - tick_nohz_tick_stopped())
  11791. - arch_irq_work_raise();
  11792. - } else {
  11793. - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
  11794. + lazy_work = work->flags & IRQ_WORK_LAZY;
  11795. +
  11796. + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ)))
  11797. + list = this_cpu_ptr(&lazy_list);
  11798. + else
  11799. + list = this_cpu_ptr(&raised_list);
  11800. +
  11801. + if (llist_add(&work->llnode, list)) {
  11802. + if (!lazy_work || tick_nohz_tick_stopped())
  11803. arch_irq_work_raise();
  11804. }
  11805. @@ -116,9 +129,8 @@ bool irq_work_needs_cpu(void)
  11806. raised = this_cpu_ptr(&raised_list);
  11807. lazy = this_cpu_ptr(&lazy_list);
  11808. - if (llist_empty(raised) || arch_irq_work_has_interrupt())
  11809. - if (llist_empty(lazy))
  11810. - return false;
  11811. + if (llist_empty(raised) && llist_empty(lazy))
  11812. + return false;
  11813. /* All work should have been flushed before going offline */
  11814. WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
  11815. @@ -132,7 +144,7 @@ static void irq_work_run_list(struct llist_head *list)
  11816. struct irq_work *work;
  11817. struct llist_node *llnode;
  11818. - BUG_ON(!irqs_disabled());
  11819. + BUG_ON_NONRT(!irqs_disabled());
  11820. if (llist_empty(list))
  11821. return;
  11822. @@ -169,7 +181,16 @@ static void irq_work_run_list(struct llist_head *list)
  11823. void irq_work_run(void)
  11824. {
  11825. irq_work_run_list(this_cpu_ptr(&raised_list));
  11826. - irq_work_run_list(this_cpu_ptr(&lazy_list));
  11827. + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) {
  11828. + /*
  11829. + * NOTE: we raise softirq via IPI for safety,
  11830. + * and execute in irq_work_tick() to move the
  11831. + * overhead from hard to soft irq context.
  11832. + */
  11833. + if (!llist_empty(this_cpu_ptr(&lazy_list)))
  11834. + raise_softirq(TIMER_SOFTIRQ);
  11835. + } else
  11836. + irq_work_run_list(this_cpu_ptr(&lazy_list));
  11837. }
  11838. EXPORT_SYMBOL_GPL(irq_work_run);
  11839. @@ -179,8 +200,17 @@ void irq_work_tick(void)
  11840. if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
  11841. irq_work_run_list(raised);
  11842. +
  11843. + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
  11844. + irq_work_run_list(this_cpu_ptr(&lazy_list));
  11845. +}
  11846. +
  11847. +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
  11848. +void irq_work_tick_soft(void)
  11849. +{
  11850. irq_work_run_list(this_cpu_ptr(&lazy_list));
  11851. }
  11852. +#endif
  11853. /*
  11854. * Synchronize against the irq_work @entry, ensures the entry is not
  11855. diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
  11856. index 152da4a48867..9fe46356ed47 100644
  11857. --- a/kernel/ksysfs.c
  11858. +++ b/kernel/ksysfs.c
  11859. @@ -136,6 +136,15 @@ KERNEL_ATTR_RO(vmcoreinfo);
  11860. #endif /* CONFIG_KEXEC_CORE */
  11861. +#if defined(CONFIG_PREEMPT_RT_FULL)
  11862. +static ssize_t realtime_show(struct kobject *kobj,
  11863. + struct kobj_attribute *attr, char *buf)
  11864. +{
  11865. + return sprintf(buf, "%d\n", 1);
  11866. +}
  11867. +KERNEL_ATTR_RO(realtime);
  11868. +#endif
  11869. +
  11870. /* whether file capabilities are enabled */
  11871. static ssize_t fscaps_show(struct kobject *kobj,
  11872. struct kobj_attribute *attr, char *buf)
  11873. @@ -225,6 +234,9 @@ static struct attribute * kernel_attrs[] = {
  11874. &rcu_expedited_attr.attr,
  11875. &rcu_normal_attr.attr,
  11876. #endif
  11877. +#ifdef CONFIG_PREEMPT_RT_FULL
  11878. + &realtime_attr.attr,
  11879. +#endif
  11880. NULL
  11881. };
  11882. diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
  11883. index 31322a4275cd..c6bba9299d8b 100644
  11884. --- a/kernel/locking/Makefile
  11885. +++ b/kernel/locking/Makefile
  11886. @@ -2,7 +2,7 @@
  11887. # and is generally not a function of system call inputs.
  11888. KCOV_INSTRUMENT := n
  11889. -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
  11890. +obj-y += semaphore.o percpu-rwsem.o
  11891. ifdef CONFIG_FUNCTION_TRACER
  11892. CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
  11893. @@ -11,7 +11,11 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE)
  11894. CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE)
  11895. endif
  11896. +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
  11897. +obj-y += mutex.o
  11898. obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
  11899. +obj-y += rwsem.o
  11900. +endif
  11901. obj-$(CONFIG_LOCKDEP) += lockdep.o
  11902. ifeq ($(CONFIG_PROC_FS),y)
  11903. obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
  11904. @@ -25,7 +29,10 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
  11905. obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
  11906. obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
  11907. obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
  11908. +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
  11909. obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
  11910. obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
  11911. +endif
  11912. +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o
  11913. obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
  11914. obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
  11915. diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c
  11916. index 951cfcd10b4a..57e0ea72c28a 100644
  11917. --- a/kernel/locking/lglock.c
  11918. +++ b/kernel/locking/lglock.c
  11919. @@ -4,6 +4,15 @@
  11920. #include <linux/cpu.h>
  11921. #include <linux/string.h>
  11922. +#ifndef CONFIG_PREEMPT_RT_FULL
  11923. +# define lg_lock_ptr arch_spinlock_t
  11924. +# define lg_do_lock(l) arch_spin_lock(l)
  11925. +# define lg_do_unlock(l) arch_spin_unlock(l)
  11926. +#else
  11927. +# define lg_lock_ptr struct rt_mutex
  11928. +# define lg_do_lock(l) __rt_spin_lock__no_mg(l)
  11929. +# define lg_do_unlock(l) __rt_spin_unlock(l)
  11930. +#endif
  11931. /*
  11932. * Note there is no uninit, so lglocks cannot be defined in
  11933. * modules (but it's fine to use them from there)
  11934. @@ -12,51 +21,60 @@
  11935. void lg_lock_init(struct lglock *lg, char *name)
  11936. {
  11937. +#ifdef CONFIG_PREEMPT_RT_FULL
  11938. + int i;
  11939. +
  11940. + for_each_possible_cpu(i) {
  11941. + struct rt_mutex *lock = per_cpu_ptr(lg->lock, i);
  11942. +
  11943. + rt_mutex_init(lock);
  11944. + }
  11945. +#endif
  11946. LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
  11947. }
  11948. EXPORT_SYMBOL(lg_lock_init);
  11949. void lg_local_lock(struct lglock *lg)
  11950. {
  11951. - arch_spinlock_t *lock;
  11952. + lg_lock_ptr *lock;
  11953. - preempt_disable();
  11954. + migrate_disable();
  11955. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  11956. lock = this_cpu_ptr(lg->lock);
  11957. - arch_spin_lock(lock);
  11958. + lg_do_lock(lock);
  11959. }
  11960. EXPORT_SYMBOL(lg_local_lock);
  11961. void lg_local_unlock(struct lglock *lg)
  11962. {
  11963. - arch_spinlock_t *lock;
  11964. + lg_lock_ptr *lock;
  11965. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  11966. lock = this_cpu_ptr(lg->lock);
  11967. - arch_spin_unlock(lock);
  11968. - preempt_enable();
  11969. + lg_do_unlock(lock);
  11970. + migrate_enable();
  11971. }
  11972. EXPORT_SYMBOL(lg_local_unlock);
  11973. void lg_local_lock_cpu(struct lglock *lg, int cpu)
  11974. {
  11975. - arch_spinlock_t *lock;
  11976. + lg_lock_ptr *lock;
  11977. - preempt_disable();
  11978. + preempt_disable_nort();
  11979. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  11980. lock = per_cpu_ptr(lg->lock, cpu);
  11981. - arch_spin_lock(lock);
  11982. + lg_do_lock(lock);
  11983. }
  11984. EXPORT_SYMBOL(lg_local_lock_cpu);
  11985. void lg_local_unlock_cpu(struct lglock *lg, int cpu)
  11986. {
  11987. - arch_spinlock_t *lock;
  11988. + lg_lock_ptr *lock;
  11989. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  11990. lock = per_cpu_ptr(lg->lock, cpu);
  11991. - arch_spin_unlock(lock);
  11992. - preempt_enable();
  11993. + lg_do_unlock(lock);
  11994. + preempt_enable_nort();
  11995. }
  11996. EXPORT_SYMBOL(lg_local_unlock_cpu);
  11997. @@ -68,30 +86,30 @@ void lg_double_lock(struct lglock *lg, int cpu1, int cpu2)
  11998. if (cpu2 < cpu1)
  11999. swap(cpu1, cpu2);
  12000. - preempt_disable();
  12001. + preempt_disable_nort();
  12002. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  12003. - arch_spin_lock(per_cpu_ptr(lg->lock, cpu1));
  12004. - arch_spin_lock(per_cpu_ptr(lg->lock, cpu2));
  12005. + lg_do_lock(per_cpu_ptr(lg->lock, cpu1));
  12006. + lg_do_lock(per_cpu_ptr(lg->lock, cpu2));
  12007. }
  12008. void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2)
  12009. {
  12010. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  12011. - arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1));
  12012. - arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2));
  12013. - preempt_enable();
  12014. + lg_do_unlock(per_cpu_ptr(lg->lock, cpu1));
  12015. + lg_do_unlock(per_cpu_ptr(lg->lock, cpu2));
  12016. + preempt_enable_nort();
  12017. }
  12018. void lg_global_lock(struct lglock *lg)
  12019. {
  12020. int i;
  12021. - preempt_disable();
  12022. + preempt_disable_nort();
  12023. lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  12024. for_each_possible_cpu(i) {
  12025. - arch_spinlock_t *lock;
  12026. + lg_lock_ptr *lock;
  12027. lock = per_cpu_ptr(lg->lock, i);
  12028. - arch_spin_lock(lock);
  12029. + lg_do_lock(lock);
  12030. }
  12031. }
  12032. EXPORT_SYMBOL(lg_global_lock);
  12033. @@ -102,10 +120,35 @@ void lg_global_unlock(struct lglock *lg)
  12034. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  12035. for_each_possible_cpu(i) {
  12036. - arch_spinlock_t *lock;
  12037. + lg_lock_ptr *lock;
  12038. lock = per_cpu_ptr(lg->lock, i);
  12039. - arch_spin_unlock(lock);
  12040. + lg_do_unlock(lock);
  12041. }
  12042. - preempt_enable();
  12043. + preempt_enable_nort();
  12044. }
  12045. EXPORT_SYMBOL(lg_global_unlock);
  12046. +
  12047. +#ifdef CONFIG_PREEMPT_RT_FULL
  12048. +/*
  12049. + * HACK: If you use this, you get to keep the pieces.
  12050. + * Used in queue_stop_cpus_work() when stop machinery
  12051. + * is called from inactive CPU, so we can't schedule.
  12052. + */
  12053. +# define lg_do_trylock_relax(l) \
  12054. + do { \
  12055. + while (!__rt_spin_trylock(l)) \
  12056. + cpu_relax(); \
  12057. + } while (0)
  12058. +
  12059. +void lg_global_trylock_relax(struct lglock *lg)
  12060. +{
  12061. + int i;
  12062. +
  12063. + lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  12064. + for_each_possible_cpu(i) {
  12065. + lg_lock_ptr *lock;
  12066. + lock = per_cpu_ptr(lg->lock, i);
  12067. + lg_do_trylock_relax(lock);
  12068. + }
  12069. +}
  12070. +#endif
  12071. diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
  12072. index 78c1c0ee6dc1..839175acc1c6 100644
  12073. --- a/kernel/locking/lockdep.c
  12074. +++ b/kernel/locking/lockdep.c
  12075. @@ -3648,6 +3648,7 @@ static void check_flags(unsigned long flags)
  12076. }
  12077. }
  12078. +#ifndef CONFIG_PREEMPT_RT_FULL
  12079. /*
  12080. * We dont accurately track softirq state in e.g.
  12081. * hardirq contexts (such as on 4KSTACKS), so only
  12082. @@ -3662,6 +3663,7 @@ static void check_flags(unsigned long flags)
  12083. DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
  12084. }
  12085. }
  12086. +#endif
  12087. if (!debug_locks)
  12088. print_irqtrace_events(current);
  12089. diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
  12090. index 8ef1919d63b2..291fc19e28e0 100644
  12091. --- a/kernel/locking/locktorture.c
  12092. +++ b/kernel/locking/locktorture.c
  12093. @@ -26,7 +26,6 @@
  12094. #include <linux/kthread.h>
  12095. #include <linux/sched/rt.h>
  12096. #include <linux/spinlock.h>
  12097. -#include <linux/rwlock.h>
  12098. #include <linux/mutex.h>
  12099. #include <linux/rwsem.h>
  12100. #include <linux/smp.h>
  12101. diff --git a/kernel/locking/rt.c b/kernel/locking/rt.c
  12102. new file mode 100644
  12103. index 000000000000..d4ab61c1848b
  12104. --- /dev/null
  12105. +++ b/kernel/locking/rt.c
  12106. @@ -0,0 +1,474 @@
  12107. +/*
  12108. + * kernel/rt.c
  12109. + *
  12110. + * Real-Time Preemption Support
  12111. + *
  12112. + * started by Ingo Molnar:
  12113. + *
  12114. + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  12115. + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  12116. + *
  12117. + * historic credit for proving that Linux spinlocks can be implemented via
  12118. + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow
  12119. + * and others) who prototyped it on 2.4 and did lots of comparative
  12120. + * research and analysis; TimeSys, for proving that you can implement a
  12121. + * fully preemptible kernel via the use of IRQ threading and mutexes;
  12122. + * Bill Huey for persuasively arguing on lkml that the mutex model is the
  12123. + * right one; and to MontaVista, who ported pmutexes to 2.6.
  12124. + *
  12125. + * This code is a from-scratch implementation and is not based on pmutexes,
  12126. + * but the idea of converting spinlocks to mutexes is used here too.
  12127. + *
  12128. + * lock debugging, locking tree, deadlock detection:
  12129. + *
  12130. + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
  12131. + * Released under the General Public License (GPL).
  12132. + *
  12133. + * Includes portions of the generic R/W semaphore implementation from:
  12134. + *
  12135. + * Copyright (c) 2001 David Howells (dhowells@redhat.com).
  12136. + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
  12137. + * - Derived also from comments by Linus
  12138. + *
  12139. + * Pending ownership of locks and ownership stealing:
  12140. + *
  12141. + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt
  12142. + *
  12143. + * (also by Steven Rostedt)
  12144. + * - Converted single pi_lock to individual task locks.
  12145. + *
  12146. + * By Esben Nielsen:
  12147. + * Doing priority inheritance with help of the scheduler.
  12148. + *
  12149. + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  12150. + * - major rework based on Esben Nielsens initial patch
  12151. + * - replaced thread_info references by task_struct refs
  12152. + * - removed task->pending_owner dependency
  12153. + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks
  12154. + * in the scheduler return path as discussed with Steven Rostedt
  12155. + *
  12156. + * Copyright (C) 2006, Kihon Technologies Inc.
  12157. + * Steven Rostedt <rostedt@goodmis.org>
  12158. + * - debugged and patched Thomas Gleixner's rework.
  12159. + * - added back the cmpxchg to the rework.
  12160. + * - turned atomic require back on for SMP.
  12161. + */
  12162. +
  12163. +#include <linux/spinlock.h>
  12164. +#include <linux/rtmutex.h>
  12165. +#include <linux/sched.h>
  12166. +#include <linux/delay.h>
  12167. +#include <linux/module.h>
  12168. +#include <linux/kallsyms.h>
  12169. +#include <linux/syscalls.h>
  12170. +#include <linux/interrupt.h>
  12171. +#include <linux/plist.h>
  12172. +#include <linux/fs.h>
  12173. +#include <linux/futex.h>
  12174. +#include <linux/hrtimer.h>
  12175. +
  12176. +#include "rtmutex_common.h"
  12177. +
  12178. +/*
  12179. + * struct mutex functions
  12180. + */
  12181. +void __mutex_do_init(struct mutex *mutex, const char *name,
  12182. + struct lock_class_key *key)
  12183. +{
  12184. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12185. + /*
  12186. + * Make sure we are not reinitializing a held lock:
  12187. + */
  12188. + debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
  12189. + lockdep_init_map(&mutex->dep_map, name, key, 0);
  12190. +#endif
  12191. + mutex->lock.save_state = 0;
  12192. +}
  12193. +EXPORT_SYMBOL(__mutex_do_init);
  12194. +
  12195. +void __lockfunc _mutex_lock(struct mutex *lock)
  12196. +{
  12197. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  12198. + rt_mutex_lock(&lock->lock);
  12199. +}
  12200. +EXPORT_SYMBOL(_mutex_lock);
  12201. +
  12202. +int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
  12203. +{
  12204. + int ret;
  12205. +
  12206. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  12207. + ret = rt_mutex_lock_interruptible(&lock->lock);
  12208. + if (ret)
  12209. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12210. + return ret;
  12211. +}
  12212. +EXPORT_SYMBOL(_mutex_lock_interruptible);
  12213. +
  12214. +int __lockfunc _mutex_lock_killable(struct mutex *lock)
  12215. +{
  12216. + int ret;
  12217. +
  12218. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  12219. + ret = rt_mutex_lock_killable(&lock->lock);
  12220. + if (ret)
  12221. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12222. + return ret;
  12223. +}
  12224. +EXPORT_SYMBOL(_mutex_lock_killable);
  12225. +
  12226. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12227. +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
  12228. +{
  12229. + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
  12230. + rt_mutex_lock(&lock->lock);
  12231. +}
  12232. +EXPORT_SYMBOL(_mutex_lock_nested);
  12233. +
  12234. +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
  12235. +{
  12236. + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
  12237. + rt_mutex_lock(&lock->lock);
  12238. +}
  12239. +EXPORT_SYMBOL(_mutex_lock_nest_lock);
  12240. +
  12241. +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass)
  12242. +{
  12243. + int ret;
  12244. +
  12245. + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
  12246. + ret = rt_mutex_lock_interruptible(&lock->lock);
  12247. + if (ret)
  12248. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12249. + return ret;
  12250. +}
  12251. +EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
  12252. +
  12253. +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
  12254. +{
  12255. + int ret;
  12256. +
  12257. + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
  12258. + ret = rt_mutex_lock_killable(&lock->lock);
  12259. + if (ret)
  12260. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12261. + return ret;
  12262. +}
  12263. +EXPORT_SYMBOL(_mutex_lock_killable_nested);
  12264. +#endif
  12265. +
  12266. +int __lockfunc _mutex_trylock(struct mutex *lock)
  12267. +{
  12268. + int ret = rt_mutex_trylock(&lock->lock);
  12269. +
  12270. + if (ret)
  12271. + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  12272. +
  12273. + return ret;
  12274. +}
  12275. +EXPORT_SYMBOL(_mutex_trylock);
  12276. +
  12277. +void __lockfunc _mutex_unlock(struct mutex *lock)
  12278. +{
  12279. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12280. + rt_mutex_unlock(&lock->lock);
  12281. +}
  12282. +EXPORT_SYMBOL(_mutex_unlock);
  12283. +
  12284. +/*
  12285. + * rwlock_t functions
  12286. + */
  12287. +int __lockfunc rt_write_trylock(rwlock_t *rwlock)
  12288. +{
  12289. + int ret;
  12290. +
  12291. + migrate_disable();
  12292. + ret = rt_mutex_trylock(&rwlock->lock);
  12293. + if (ret)
  12294. + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
  12295. + else
  12296. + migrate_enable();
  12297. +
  12298. + return ret;
  12299. +}
  12300. +EXPORT_SYMBOL(rt_write_trylock);
  12301. +
  12302. +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
  12303. +{
  12304. + int ret;
  12305. +
  12306. + *flags = 0;
  12307. + ret = rt_write_trylock(rwlock);
  12308. + return ret;
  12309. +}
  12310. +EXPORT_SYMBOL(rt_write_trylock_irqsave);
  12311. +
  12312. +int __lockfunc rt_read_trylock(rwlock_t *rwlock)
  12313. +{
  12314. + struct rt_mutex *lock = &rwlock->lock;
  12315. + int ret = 1;
  12316. +
  12317. + /*
  12318. + * recursive read locks succeed when current owns the lock,
  12319. + * but not when read_depth == 0 which means that the lock is
  12320. + * write locked.
  12321. + */
  12322. + if (rt_mutex_owner(lock) != current) {
  12323. + migrate_disable();
  12324. + ret = rt_mutex_trylock(lock);
  12325. + if (ret)
  12326. + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
  12327. + else
  12328. + migrate_enable();
  12329. +
  12330. + } else if (!rwlock->read_depth) {
  12331. + ret = 0;
  12332. + }
  12333. +
  12334. + if (ret)
  12335. + rwlock->read_depth++;
  12336. +
  12337. + return ret;
  12338. +}
  12339. +EXPORT_SYMBOL(rt_read_trylock);
  12340. +
  12341. +void __lockfunc rt_write_lock(rwlock_t *rwlock)
  12342. +{
  12343. + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
  12344. + __rt_spin_lock(&rwlock->lock);
  12345. +}
  12346. +EXPORT_SYMBOL(rt_write_lock);
  12347. +
  12348. +void __lockfunc rt_read_lock(rwlock_t *rwlock)
  12349. +{
  12350. + struct rt_mutex *lock = &rwlock->lock;
  12351. +
  12352. +
  12353. + /*
  12354. + * recursive read locks succeed when current owns the lock
  12355. + */
  12356. + if (rt_mutex_owner(lock) != current) {
  12357. + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
  12358. + __rt_spin_lock(lock);
  12359. + }
  12360. + rwlock->read_depth++;
  12361. +}
  12362. +
  12363. +EXPORT_SYMBOL(rt_read_lock);
  12364. +
  12365. +void __lockfunc rt_write_unlock(rwlock_t *rwlock)
  12366. +{
  12367. + /* NOTE: we always pass in '1' for nested, for simplicity */
  12368. + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
  12369. + __rt_spin_unlock(&rwlock->lock);
  12370. + migrate_enable();
  12371. +}
  12372. +EXPORT_SYMBOL(rt_write_unlock);
  12373. +
  12374. +void __lockfunc rt_read_unlock(rwlock_t *rwlock)
  12375. +{
  12376. + /* Release the lock only when read_depth is down to 0 */
  12377. + if (--rwlock->read_depth == 0) {
  12378. + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
  12379. + __rt_spin_unlock(&rwlock->lock);
  12380. + migrate_enable();
  12381. + }
  12382. +}
  12383. +EXPORT_SYMBOL(rt_read_unlock);
  12384. +
  12385. +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
  12386. +{
  12387. + rt_write_lock(rwlock);
  12388. +
  12389. + return 0;
  12390. +}
  12391. +EXPORT_SYMBOL(rt_write_lock_irqsave);
  12392. +
  12393. +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
  12394. +{
  12395. + rt_read_lock(rwlock);
  12396. +
  12397. + return 0;
  12398. +}
  12399. +EXPORT_SYMBOL(rt_read_lock_irqsave);
  12400. +
  12401. +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
  12402. +{
  12403. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12404. + /*
  12405. + * Make sure we are not reinitializing a held lock:
  12406. + */
  12407. + debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
  12408. + lockdep_init_map(&rwlock->dep_map, name, key, 0);
  12409. +#endif
  12410. + rwlock->lock.save_state = 1;
  12411. + rwlock->read_depth = 0;
  12412. +}
  12413. +EXPORT_SYMBOL(__rt_rwlock_init);
  12414. +
  12415. +/*
  12416. + * rw_semaphores
  12417. + */
  12418. +
  12419. +void rt_up_write(struct rw_semaphore *rwsem)
  12420. +{
  12421. + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
  12422. + rt_mutex_unlock(&rwsem->lock);
  12423. +}
  12424. +EXPORT_SYMBOL(rt_up_write);
  12425. +
  12426. +void __rt_up_read(struct rw_semaphore *rwsem)
  12427. +{
  12428. + if (--rwsem->read_depth == 0)
  12429. + rt_mutex_unlock(&rwsem->lock);
  12430. +}
  12431. +
  12432. +void rt_up_read(struct rw_semaphore *rwsem)
  12433. +{
  12434. + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
  12435. + __rt_up_read(rwsem);
  12436. +}
  12437. +EXPORT_SYMBOL(rt_up_read);
  12438. +
  12439. +/*
  12440. + * downgrade a write lock into a read lock
  12441. + * - just wake up any readers at the front of the queue
  12442. + */
  12443. +void rt_downgrade_write(struct rw_semaphore *rwsem)
  12444. +{
  12445. + BUG_ON(rt_mutex_owner(&rwsem->lock) != current);
  12446. + rwsem->read_depth = 1;
  12447. +}
  12448. +EXPORT_SYMBOL(rt_downgrade_write);
  12449. +
  12450. +int rt_down_write_trylock(struct rw_semaphore *rwsem)
  12451. +{
  12452. + int ret = rt_mutex_trylock(&rwsem->lock);
  12453. +
  12454. + if (ret)
  12455. + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
  12456. + return ret;
  12457. +}
  12458. +EXPORT_SYMBOL(rt_down_write_trylock);
  12459. +
  12460. +void rt_down_write(struct rw_semaphore *rwsem)
  12461. +{
  12462. + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_);
  12463. + rt_mutex_lock(&rwsem->lock);
  12464. +}
  12465. +EXPORT_SYMBOL(rt_down_write);
  12466. +
  12467. +void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass)
  12468. +{
  12469. + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
  12470. + rt_mutex_lock(&rwsem->lock);
  12471. +}
  12472. +EXPORT_SYMBOL(rt_down_write_nested);
  12473. +
  12474. +void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
  12475. + struct lockdep_map *nest)
  12476. +{
  12477. + rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_);
  12478. + rt_mutex_lock(&rwsem->lock);
  12479. +}
  12480. +EXPORT_SYMBOL(rt_down_write_nested_lock);
  12481. +
  12482. +int rt__down_read_trylock(struct rw_semaphore *rwsem)
  12483. +{
  12484. + struct rt_mutex *lock = &rwsem->lock;
  12485. + int ret = 1;
  12486. +
  12487. + /*
  12488. + * recursive read locks succeed when current owns the rwsem,
  12489. + * but not when read_depth == 0 which means that the rwsem is
  12490. + * write locked.
  12491. + */
  12492. + if (rt_mutex_owner(lock) != current)
  12493. + ret = rt_mutex_trylock(&rwsem->lock);
  12494. + else if (!rwsem->read_depth)
  12495. + ret = 0;
  12496. +
  12497. + if (ret)
  12498. + rwsem->read_depth++;
  12499. + return ret;
  12500. +
  12501. +}
  12502. +
  12503. +int rt_down_read_trylock(struct rw_semaphore *rwsem)
  12504. +{
  12505. + int ret;
  12506. +
  12507. + ret = rt__down_read_trylock(rwsem);
  12508. + if (ret)
  12509. + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
  12510. +
  12511. + return ret;
  12512. +}
  12513. +EXPORT_SYMBOL(rt_down_read_trylock);
  12514. +
  12515. +void rt__down_read(struct rw_semaphore *rwsem)
  12516. +{
  12517. + struct rt_mutex *lock = &rwsem->lock;
  12518. +
  12519. + if (rt_mutex_owner(lock) != current)
  12520. + rt_mutex_lock(&rwsem->lock);
  12521. + rwsem->read_depth++;
  12522. +}
  12523. +EXPORT_SYMBOL(rt__down_read);
  12524. +
  12525. +static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
  12526. +{
  12527. + rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_);
  12528. + rt__down_read(rwsem);
  12529. +}
  12530. +
  12531. +void rt_down_read(struct rw_semaphore *rwsem)
  12532. +{
  12533. + __rt_down_read(rwsem, 0);
  12534. +}
  12535. +EXPORT_SYMBOL(rt_down_read);
  12536. +
  12537. +void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass)
  12538. +{
  12539. + __rt_down_read(rwsem, subclass);
  12540. +}
  12541. +EXPORT_SYMBOL(rt_down_read_nested);
  12542. +
  12543. +void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
  12544. + struct lock_class_key *key)
  12545. +{
  12546. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12547. + /*
  12548. + * Make sure we are not reinitializing a held lock:
  12549. + */
  12550. + debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
  12551. + lockdep_init_map(&rwsem->dep_map, name, key, 0);
  12552. +#endif
  12553. + rwsem->read_depth = 0;
  12554. + rwsem->lock.save_state = 0;
  12555. +}
  12556. +EXPORT_SYMBOL(__rt_rwsem_init);
  12557. +
  12558. +/**
  12559. + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
  12560. + * @cnt: the atomic which we are to dec
  12561. + * @lock: the mutex to return holding if we dec to 0
  12562. + *
  12563. + * return true and hold lock if we dec to 0, return false otherwise
  12564. + */
  12565. +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
  12566. +{
  12567. + /* dec if we can't possibly hit 0 */
  12568. + if (atomic_add_unless(cnt, -1, 1))
  12569. + return 0;
  12570. + /* we might hit 0, so take the lock */
  12571. + mutex_lock(lock);
  12572. + if (!atomic_dec_and_test(cnt)) {
  12573. + /* when we actually did the dec, we didn't hit 0 */
  12574. + mutex_unlock(lock);
  12575. + return 0;
  12576. + }
  12577. + /* we hit 0, and we hold the lock */
  12578. + return 1;
  12579. +}
  12580. +EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
  12581. diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
  12582. index 3e746607abe5..fde5e54f1096 100644
  12583. --- a/kernel/locking/rtmutex.c
  12584. +++ b/kernel/locking/rtmutex.c
  12585. @@ -7,6 +7,11 @@
  12586. * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  12587. * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
  12588. * Copyright (C) 2006 Esben Nielsen
  12589. + * Adaptive Spinlocks:
  12590. + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
  12591. + * and Peter Morreale,
  12592. + * Adaptive Spinlocks simplification:
  12593. + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
  12594. *
  12595. * See Documentation/locking/rt-mutex-design.txt for details.
  12596. */
  12597. @@ -16,6 +21,7 @@
  12598. #include <linux/sched/rt.h>
  12599. #include <linux/sched/deadline.h>
  12600. #include <linux/timer.h>
  12601. +#include <linux/ww_mutex.h>
  12602. #include "rtmutex_common.h"
  12603. @@ -69,6 +75,12 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
  12604. clear_rt_mutex_waiters(lock);
  12605. }
  12606. +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
  12607. +{
  12608. + return waiter && waiter != PI_WAKEUP_INPROGRESS &&
  12609. + waiter != PI_REQUEUE_INPROGRESS;
  12610. +}
  12611. +
  12612. /*
  12613. * We can speed up the acquire/release, if there's no debugging state to be
  12614. * set up.
  12615. @@ -350,6 +362,14 @@ static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
  12616. return debug_rt_mutex_detect_deadlock(waiter, chwalk);
  12617. }
  12618. +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
  12619. +{
  12620. + if (waiter->savestate)
  12621. + wake_up_lock_sleeper(waiter->task);
  12622. + else
  12623. + wake_up_process(waiter->task);
  12624. +}
  12625. +
  12626. /*
  12627. * Max number of times we'll walk the boosting chain:
  12628. */
  12629. @@ -357,7 +377,8 @@ int max_lock_depth = 1024;
  12630. static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
  12631. {
  12632. - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
  12633. + return rt_mutex_real_waiter(p->pi_blocked_on) ?
  12634. + p->pi_blocked_on->lock : NULL;
  12635. }
  12636. /*
  12637. @@ -493,7 +514,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  12638. * reached or the state of the chain has changed while we
  12639. * dropped the locks.
  12640. */
  12641. - if (!waiter)
  12642. + if (!rt_mutex_real_waiter(waiter))
  12643. goto out_unlock_pi;
  12644. /*
  12645. @@ -655,13 +676,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  12646. * follow here. This is the end of the chain we are walking.
  12647. */
  12648. if (!rt_mutex_owner(lock)) {
  12649. + struct rt_mutex_waiter *lock_top_waiter;
  12650. +
  12651. /*
  12652. * If the requeue [7] above changed the top waiter,
  12653. * then we need to wake the new top waiter up to try
  12654. * to get the lock.
  12655. */
  12656. - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
  12657. - wake_up_process(rt_mutex_top_waiter(lock)->task);
  12658. + lock_top_waiter = rt_mutex_top_waiter(lock);
  12659. + if (prerequeue_top_waiter != lock_top_waiter)
  12660. + rt_mutex_wake_waiter(lock_top_waiter);
  12661. raw_spin_unlock_irq(&lock->wait_lock);
  12662. return 0;
  12663. }
  12664. @@ -754,6 +778,25 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  12665. return ret;
  12666. }
  12667. +
  12668. +#define STEAL_NORMAL 0
  12669. +#define STEAL_LATERAL 1
  12670. +
  12671. +/*
  12672. + * Note that RT tasks are excluded from lateral-steals to prevent the
  12673. + * introduction of an unbounded latency
  12674. + */
  12675. +static inline int lock_is_stealable(struct task_struct *task,
  12676. + struct task_struct *pendowner, int mode)
  12677. +{
  12678. + if (mode == STEAL_NORMAL || rt_task(task)) {
  12679. + if (task->prio >= pendowner->prio)
  12680. + return 0;
  12681. + } else if (task->prio > pendowner->prio)
  12682. + return 0;
  12683. + return 1;
  12684. +}
  12685. +
  12686. /*
  12687. * Try to take an rt-mutex
  12688. *
  12689. @@ -764,8 +807,9 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  12690. * @waiter: The waiter that is queued to the lock's wait tree if the
  12691. * callsite called task_blocked_on_lock(), otherwise NULL
  12692. */
  12693. -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  12694. - struct rt_mutex_waiter *waiter)
  12695. +static int __try_to_take_rt_mutex(struct rt_mutex *lock,
  12696. + struct task_struct *task,
  12697. + struct rt_mutex_waiter *waiter, int mode)
  12698. {
  12699. /*
  12700. * Before testing whether we can acquire @lock, we set the
  12701. @@ -802,8 +846,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  12702. * If waiter is not the highest priority waiter of
  12703. * @lock, give up.
  12704. */
  12705. - if (waiter != rt_mutex_top_waiter(lock))
  12706. + if (waiter != rt_mutex_top_waiter(lock)) {
  12707. + /* XXX lock_is_stealable() ? */
  12708. return 0;
  12709. + }
  12710. /*
  12711. * We can acquire the lock. Remove the waiter from the
  12712. @@ -821,14 +867,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  12713. * not need to be dequeued.
  12714. */
  12715. if (rt_mutex_has_waiters(lock)) {
  12716. - /*
  12717. - * If @task->prio is greater than or equal to
  12718. - * the top waiter priority (kernel view),
  12719. - * @task lost.
  12720. - */
  12721. - if (task->prio >= rt_mutex_top_waiter(lock)->prio)
  12722. - return 0;
  12723. + struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
  12724. + if (task != pown && !lock_is_stealable(task, pown, mode))
  12725. + return 0;
  12726. /*
  12727. * The current top waiter stays enqueued. We
  12728. * don't have to change anything in the lock
  12729. @@ -877,6 +919,399 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  12730. return 1;
  12731. }
  12732. +#ifdef CONFIG_PREEMPT_RT_FULL
  12733. +/*
  12734. + * preemptible spin_lock functions:
  12735. + */
  12736. +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
  12737. + void (*slowfn)(struct rt_mutex *lock,
  12738. + bool mg_off),
  12739. + bool do_mig_dis)
  12740. +{
  12741. + might_sleep_no_state_check();
  12742. +
  12743. + if (do_mig_dis)
  12744. + migrate_disable();
  12745. +
  12746. + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
  12747. + rt_mutex_deadlock_account_lock(lock, current);
  12748. + else
  12749. + slowfn(lock, do_mig_dis);
  12750. +}
  12751. +
  12752. +static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
  12753. + void (*slowfn)(struct rt_mutex *lock))
  12754. +{
  12755. + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
  12756. + rt_mutex_deadlock_account_unlock(current);
  12757. + else
  12758. + slowfn(lock);
  12759. +}
  12760. +#ifdef CONFIG_SMP
  12761. +/*
  12762. + * Note that owner is a speculative pointer and dereferencing relies
  12763. + * on rcu_read_lock() and the check against the lock owner.
  12764. + */
  12765. +static int adaptive_wait(struct rt_mutex *lock,
  12766. + struct task_struct *owner)
  12767. +{
  12768. + int res = 0;
  12769. +
  12770. + rcu_read_lock();
  12771. + for (;;) {
  12772. + if (owner != rt_mutex_owner(lock))
  12773. + break;
  12774. + /*
  12775. + * Ensure that owner->on_cpu is dereferenced _after_
  12776. + * checking the above to be valid.
  12777. + */
  12778. + barrier();
  12779. + if (!owner->on_cpu) {
  12780. + res = 1;
  12781. + break;
  12782. + }
  12783. + cpu_relax();
  12784. + }
  12785. + rcu_read_unlock();
  12786. + return res;
  12787. +}
  12788. +#else
  12789. +static int adaptive_wait(struct rt_mutex *lock,
  12790. + struct task_struct *orig_owner)
  12791. +{
  12792. + return 1;
  12793. +}
  12794. +#endif
  12795. +
  12796. +static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  12797. + struct rt_mutex_waiter *waiter,
  12798. + struct task_struct *task,
  12799. + enum rtmutex_chainwalk chwalk);
  12800. +/*
  12801. + * Slow path lock function spin_lock style: this variant is very
  12802. + * careful not to miss any non-lock wakeups.
  12803. + *
  12804. + * We store the current state under p->pi_lock in p->saved_state and
  12805. + * the try_to_wake_up() code handles this accordingly.
  12806. + */
  12807. +static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock,
  12808. + bool mg_off)
  12809. +{
  12810. + struct task_struct *lock_owner, *self = current;
  12811. + struct rt_mutex_waiter waiter, *top_waiter;
  12812. + unsigned long flags;
  12813. + int ret;
  12814. +
  12815. + rt_mutex_init_waiter(&waiter, true);
  12816. +
  12817. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  12818. +
  12819. + if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
  12820. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  12821. + return;
  12822. + }
  12823. +
  12824. + BUG_ON(rt_mutex_owner(lock) == self);
  12825. +
  12826. + /*
  12827. + * We save whatever state the task is in and we'll restore it
  12828. + * after acquiring the lock taking real wakeups into account
  12829. + * as well. We are serialized via pi_lock against wakeups. See
  12830. + * try_to_wake_up().
  12831. + */
  12832. + raw_spin_lock(&self->pi_lock);
  12833. + self->saved_state = self->state;
  12834. + __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
  12835. + raw_spin_unlock(&self->pi_lock);
  12836. +
  12837. + ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK);
  12838. + BUG_ON(ret);
  12839. +
  12840. + for (;;) {
  12841. + /* Try to acquire the lock again. */
  12842. + if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
  12843. + break;
  12844. +
  12845. + top_waiter = rt_mutex_top_waiter(lock);
  12846. + lock_owner = rt_mutex_owner(lock);
  12847. +
  12848. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  12849. +
  12850. + debug_rt_mutex_print_deadlock(&waiter);
  12851. +
  12852. + if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) {
  12853. + if (mg_off)
  12854. + migrate_enable();
  12855. + schedule();
  12856. + if (mg_off)
  12857. + migrate_disable();
  12858. + }
  12859. +
  12860. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  12861. +
  12862. + raw_spin_lock(&self->pi_lock);
  12863. + __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
  12864. + raw_spin_unlock(&self->pi_lock);
  12865. + }
  12866. +
  12867. + /*
  12868. + * Restore the task state to current->saved_state. We set it
  12869. + * to the original state above and the try_to_wake_up() code
  12870. + * has possibly updated it when a real (non-rtmutex) wakeup
  12871. + * happened while we were blocked. Clear saved_state so
  12872. + * try_to_wakeup() does not get confused.
  12873. + */
  12874. + raw_spin_lock(&self->pi_lock);
  12875. + __set_current_state_no_track(self->saved_state);
  12876. + self->saved_state = TASK_RUNNING;
  12877. + raw_spin_unlock(&self->pi_lock);
  12878. +
  12879. + /*
  12880. + * try_to_take_rt_mutex() sets the waiter bit
  12881. + * unconditionally. We might have to fix that up:
  12882. + */
  12883. + fixup_rt_mutex_waiters(lock);
  12884. +
  12885. + BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
  12886. + BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
  12887. +
  12888. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  12889. +
  12890. + debug_rt_mutex_free_waiter(&waiter);
  12891. +}
  12892. +
  12893. +static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
  12894. + struct wake_q_head *wake_sleeper_q,
  12895. + struct rt_mutex *lock);
  12896. +/*
  12897. + * Slow path to release a rt_mutex spin_lock style
  12898. + */
  12899. +static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
  12900. +{
  12901. + unsigned long flags;
  12902. + WAKE_Q(wake_q);
  12903. + WAKE_Q(wake_sleeper_q);
  12904. +
  12905. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  12906. +
  12907. + debug_rt_mutex_unlock(lock);
  12908. +
  12909. + rt_mutex_deadlock_account_unlock(current);
  12910. +
  12911. + if (!rt_mutex_has_waiters(lock)) {
  12912. + lock->owner = NULL;
  12913. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  12914. + return;
  12915. + }
  12916. +
  12917. + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
  12918. +
  12919. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  12920. + wake_up_q(&wake_q);
  12921. + wake_up_q_sleeper(&wake_sleeper_q);
  12922. +
  12923. + /* Undo pi boosting.when necessary */
  12924. + rt_mutex_adjust_prio(current);
  12925. +}
  12926. +
  12927. +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
  12928. +{
  12929. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false);
  12930. + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  12931. +}
  12932. +EXPORT_SYMBOL(rt_spin_lock__no_mg);
  12933. +
  12934. +void __lockfunc rt_spin_lock(spinlock_t *lock)
  12935. +{
  12936. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
  12937. + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  12938. +}
  12939. +EXPORT_SYMBOL(rt_spin_lock);
  12940. +
  12941. +void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
  12942. +{
  12943. + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true);
  12944. +}
  12945. +EXPORT_SYMBOL(__rt_spin_lock);
  12946. +
  12947. +void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock)
  12948. +{
  12949. + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false);
  12950. +}
  12951. +EXPORT_SYMBOL(__rt_spin_lock__no_mg);
  12952. +
  12953. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12954. +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
  12955. +{
  12956. + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
  12957. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
  12958. +}
  12959. +EXPORT_SYMBOL(rt_spin_lock_nested);
  12960. +#endif
  12961. +
  12962. +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock)
  12963. +{
  12964. + /* NOTE: we always pass in '1' for nested, for simplicity */
  12965. + spin_release(&lock->dep_map, 1, _RET_IP_);
  12966. + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
  12967. +}
  12968. +EXPORT_SYMBOL(rt_spin_unlock__no_mg);
  12969. +
  12970. +void __lockfunc rt_spin_unlock(spinlock_t *lock)
  12971. +{
  12972. + /* NOTE: we always pass in '1' for nested, for simplicity */
  12973. + spin_release(&lock->dep_map, 1, _RET_IP_);
  12974. + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
  12975. + migrate_enable();
  12976. +}
  12977. +EXPORT_SYMBOL(rt_spin_unlock);
  12978. +
  12979. +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
  12980. +{
  12981. + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
  12982. +}
  12983. +EXPORT_SYMBOL(__rt_spin_unlock);
  12984. +
  12985. +/*
  12986. + * Wait for the lock to get unlocked: instead of polling for an unlock
  12987. + * (like raw spinlocks do), we lock and unlock, to force the kernel to
  12988. + * schedule if there's contention:
  12989. + */
  12990. +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
  12991. +{
  12992. + spin_lock(lock);
  12993. + spin_unlock(lock);
  12994. +}
  12995. +EXPORT_SYMBOL(rt_spin_unlock_wait);
  12996. +
  12997. +int __lockfunc __rt_spin_trylock(struct rt_mutex *lock)
  12998. +{
  12999. + return rt_mutex_trylock(lock);
  13000. +}
  13001. +
  13002. +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock)
  13003. +{
  13004. + int ret;
  13005. +
  13006. + ret = rt_mutex_trylock(&lock->lock);
  13007. + if (ret)
  13008. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  13009. + return ret;
  13010. +}
  13011. +EXPORT_SYMBOL(rt_spin_trylock__no_mg);
  13012. +
  13013. +int __lockfunc rt_spin_trylock(spinlock_t *lock)
  13014. +{
  13015. + int ret;
  13016. +
  13017. + migrate_disable();
  13018. + ret = rt_mutex_trylock(&lock->lock);
  13019. + if (ret)
  13020. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  13021. + else
  13022. + migrate_enable();
  13023. + return ret;
  13024. +}
  13025. +EXPORT_SYMBOL(rt_spin_trylock);
  13026. +
  13027. +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
  13028. +{
  13029. + int ret;
  13030. +
  13031. + local_bh_disable();
  13032. + ret = rt_mutex_trylock(&lock->lock);
  13033. + if (ret) {
  13034. + migrate_disable();
  13035. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  13036. + } else
  13037. + local_bh_enable();
  13038. + return ret;
  13039. +}
  13040. +EXPORT_SYMBOL(rt_spin_trylock_bh);
  13041. +
  13042. +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
  13043. +{
  13044. + int ret;
  13045. +
  13046. + *flags = 0;
  13047. + ret = rt_mutex_trylock(&lock->lock);
  13048. + if (ret) {
  13049. + migrate_disable();
  13050. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  13051. + }
  13052. + return ret;
  13053. +}
  13054. +EXPORT_SYMBOL(rt_spin_trylock_irqsave);
  13055. +
  13056. +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
  13057. +{
  13058. + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
  13059. + if (atomic_add_unless(atomic, -1, 1))
  13060. + return 0;
  13061. + rt_spin_lock(lock);
  13062. + if (atomic_dec_and_test(atomic))
  13063. + return 1;
  13064. + rt_spin_unlock(lock);
  13065. + return 0;
  13066. +}
  13067. +EXPORT_SYMBOL(atomic_dec_and_spin_lock);
  13068. +
  13069. + void
  13070. +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
  13071. +{
  13072. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13073. + /*
  13074. + * Make sure we are not reinitializing a held lock:
  13075. + */
  13076. + debug_check_no_locks_freed((void *)lock, sizeof(*lock));
  13077. + lockdep_init_map(&lock->dep_map, name, key, 0);
  13078. +#endif
  13079. +}
  13080. +EXPORT_SYMBOL(__rt_spin_lock_init);
  13081. +
  13082. +#endif /* PREEMPT_RT_FULL */
  13083. +
  13084. +#ifdef CONFIG_PREEMPT_RT_FULL
  13085. + static inline int __sched
  13086. +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
  13087. +{
  13088. + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
  13089. + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
  13090. +
  13091. + if (!hold_ctx)
  13092. + return 0;
  13093. +
  13094. + if (unlikely(ctx == hold_ctx))
  13095. + return -EALREADY;
  13096. +
  13097. + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
  13098. + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
  13099. +#ifdef CONFIG_DEBUG_MUTEXES
  13100. + DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
  13101. + ctx->contending_lock = ww;
  13102. +#endif
  13103. + return -EDEADLK;
  13104. + }
  13105. +
  13106. + return 0;
  13107. +}
  13108. +#else
  13109. + static inline int __sched
  13110. +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
  13111. +{
  13112. + BUG();
  13113. + return 0;
  13114. +}
  13115. +
  13116. +#endif
  13117. +
  13118. +static inline int
  13119. +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  13120. + struct rt_mutex_waiter *waiter)
  13121. +{
  13122. + return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
  13123. +}
  13124. +
  13125. /*
  13126. * Task blocks on lock.
  13127. *
  13128. @@ -907,6 +1342,23 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  13129. return -EDEADLK;
  13130. raw_spin_lock(&task->pi_lock);
  13131. +
  13132. + /*
  13133. + * In the case of futex requeue PI, this will be a proxy
  13134. + * lock. The task will wake unaware that it is enqueueed on
  13135. + * this lock. Avoid blocking on two locks and corrupting
  13136. + * pi_blocked_on via the PI_WAKEUP_INPROGRESS
  13137. + * flag. futex_wait_requeue_pi() sets this when it wakes up
  13138. + * before requeue (due to a signal or timeout). Do not enqueue
  13139. + * the task if PI_WAKEUP_INPROGRESS is set.
  13140. + */
  13141. + if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
  13142. + raw_spin_unlock(&task->pi_lock);
  13143. + return -EAGAIN;
  13144. + }
  13145. +
  13146. + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
  13147. +
  13148. __rt_mutex_adjust_prio(task);
  13149. waiter->task = task;
  13150. waiter->lock = lock;
  13151. @@ -930,7 +1382,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  13152. rt_mutex_enqueue_pi(owner, waiter);
  13153. __rt_mutex_adjust_prio(owner);
  13154. - if (owner->pi_blocked_on)
  13155. + if (rt_mutex_real_waiter(owner->pi_blocked_on))
  13156. chain_walk = 1;
  13157. } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
  13158. chain_walk = 1;
  13159. @@ -972,6 +1424,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  13160. * Called with lock->wait_lock held and interrupts disabled.
  13161. */
  13162. static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
  13163. + struct wake_q_head *wake_sleeper_q,
  13164. struct rt_mutex *lock)
  13165. {
  13166. struct rt_mutex_waiter *waiter;
  13167. @@ -1000,7 +1453,10 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
  13168. raw_spin_unlock(&current->pi_lock);
  13169. - wake_q_add(wake_q, waiter->task);
  13170. + if (waiter->savestate)
  13171. + wake_q_add(wake_sleeper_q, waiter->task);
  13172. + else
  13173. + wake_q_add(wake_q, waiter->task);
  13174. }
  13175. /*
  13176. @@ -1014,7 +1470,7 @@ static void remove_waiter(struct rt_mutex *lock,
  13177. {
  13178. bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
  13179. struct task_struct *owner = rt_mutex_owner(lock);
  13180. - struct rt_mutex *next_lock;
  13181. + struct rt_mutex *next_lock = NULL;
  13182. raw_spin_lock(&current->pi_lock);
  13183. rt_mutex_dequeue(lock, waiter);
  13184. @@ -1038,7 +1494,8 @@ static void remove_waiter(struct rt_mutex *lock,
  13185. __rt_mutex_adjust_prio(owner);
  13186. /* Store the lock on which owner is blocked or NULL */
  13187. - next_lock = task_blocked_on_lock(owner);
  13188. + if (rt_mutex_real_waiter(owner->pi_blocked_on))
  13189. + next_lock = task_blocked_on_lock(owner);
  13190. raw_spin_unlock(&owner->pi_lock);
  13191. @@ -1074,17 +1531,17 @@ void rt_mutex_adjust_pi(struct task_struct *task)
  13192. raw_spin_lock_irqsave(&task->pi_lock, flags);
  13193. waiter = task->pi_blocked_on;
  13194. - if (!waiter || (waiter->prio == task->prio &&
  13195. + if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio &&
  13196. !dl_prio(task->prio))) {
  13197. raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  13198. return;
  13199. }
  13200. next_lock = waiter->lock;
  13201. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  13202. /* gets dropped in rt_mutex_adjust_prio_chain()! */
  13203. get_task_struct(task);
  13204. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  13205. rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
  13206. next_lock, NULL, task);
  13207. }
  13208. @@ -1102,7 +1559,8 @@ void rt_mutex_adjust_pi(struct task_struct *task)
  13209. static int __sched
  13210. __rt_mutex_slowlock(struct rt_mutex *lock, int state,
  13211. struct hrtimer_sleeper *timeout,
  13212. - struct rt_mutex_waiter *waiter)
  13213. + struct rt_mutex_waiter *waiter,
  13214. + struct ww_acquire_ctx *ww_ctx)
  13215. {
  13216. int ret = 0;
  13217. @@ -1125,6 +1583,12 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
  13218. break;
  13219. }
  13220. + if (ww_ctx && ww_ctx->acquired > 0) {
  13221. + ret = __mutex_lock_check_stamp(lock, ww_ctx);
  13222. + if (ret)
  13223. + break;
  13224. + }
  13225. +
  13226. raw_spin_unlock_irq(&lock->wait_lock);
  13227. debug_rt_mutex_print_deadlock(waiter);
  13228. @@ -1159,21 +1623,96 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
  13229. }
  13230. }
  13231. +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
  13232. + struct ww_acquire_ctx *ww_ctx)
  13233. +{
  13234. +#ifdef CONFIG_DEBUG_MUTEXES
  13235. + /*
  13236. + * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
  13237. + * but released with a normal mutex_unlock in this call.
  13238. + *
  13239. + * This should never happen, always use ww_mutex_unlock.
  13240. + */
  13241. + DEBUG_LOCKS_WARN_ON(ww->ctx);
  13242. +
  13243. + /*
  13244. + * Not quite done after calling ww_acquire_done() ?
  13245. + */
  13246. + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
  13247. +
  13248. + if (ww_ctx->contending_lock) {
  13249. + /*
  13250. + * After -EDEADLK you tried to
  13251. + * acquire a different ww_mutex? Bad!
  13252. + */
  13253. + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
  13254. +
  13255. + /*
  13256. + * You called ww_mutex_lock after receiving -EDEADLK,
  13257. + * but 'forgot' to unlock everything else first?
  13258. + */
  13259. + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
  13260. + ww_ctx->contending_lock = NULL;
  13261. + }
  13262. +
  13263. + /*
  13264. + * Naughty, using a different class will lead to undefined behavior!
  13265. + */
  13266. + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
  13267. +#endif
  13268. + ww_ctx->acquired++;
  13269. +}
  13270. +
  13271. +#ifdef CONFIG_PREEMPT_RT_FULL
  13272. +static void ww_mutex_account_lock(struct rt_mutex *lock,
  13273. + struct ww_acquire_ctx *ww_ctx)
  13274. +{
  13275. + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
  13276. + struct rt_mutex_waiter *waiter, *n;
  13277. +
  13278. + /*
  13279. + * This branch gets optimized out for the common case,
  13280. + * and is only important for ww_mutex_lock.
  13281. + */
  13282. + ww_mutex_lock_acquired(ww, ww_ctx);
  13283. + ww->ctx = ww_ctx;
  13284. +
  13285. + /*
  13286. + * Give any possible sleeping processes the chance to wake up,
  13287. + * so they can recheck if they have to back off.
  13288. + */
  13289. + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters,
  13290. + tree_entry) {
  13291. + /* XXX debug rt mutex waiter wakeup */
  13292. +
  13293. + BUG_ON(waiter->lock != lock);
  13294. + rt_mutex_wake_waiter(waiter);
  13295. + }
  13296. +}
  13297. +
  13298. +#else
  13299. +
  13300. +static void ww_mutex_account_lock(struct rt_mutex *lock,
  13301. + struct ww_acquire_ctx *ww_ctx)
  13302. +{
  13303. + BUG();
  13304. +}
  13305. +#endif
  13306. +
  13307. /*
  13308. * Slow path lock function:
  13309. */
  13310. static int __sched
  13311. rt_mutex_slowlock(struct rt_mutex *lock, int state,
  13312. struct hrtimer_sleeper *timeout,
  13313. - enum rtmutex_chainwalk chwalk)
  13314. + enum rtmutex_chainwalk chwalk,
  13315. + struct ww_acquire_ctx *ww_ctx)
  13316. {
  13317. struct rt_mutex_waiter waiter;
  13318. unsigned long flags;
  13319. int ret = 0;
  13320. - debug_rt_mutex_init_waiter(&waiter);
  13321. - RB_CLEAR_NODE(&waiter.pi_tree_entry);
  13322. - RB_CLEAR_NODE(&waiter.tree_entry);
  13323. + rt_mutex_init_waiter(&waiter, false);
  13324. /*
  13325. * Technically we could use raw_spin_[un]lock_irq() here, but this can
  13326. @@ -1187,6 +1726,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
  13327. /* Try to acquire the lock again: */
  13328. if (try_to_take_rt_mutex(lock, current, NULL)) {
  13329. + if (ww_ctx)
  13330. + ww_mutex_account_lock(lock, ww_ctx);
  13331. raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13332. return 0;
  13333. }
  13334. @@ -1201,13 +1742,23 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
  13335. if (likely(!ret))
  13336. /* sleep on the mutex */
  13337. - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
  13338. + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
  13339. + ww_ctx);
  13340. + else if (ww_ctx) {
  13341. + /* ww_mutex received EDEADLK, let it become EALREADY */
  13342. + ret = __mutex_lock_check_stamp(lock, ww_ctx);
  13343. + BUG_ON(!ret);
  13344. + }
  13345. if (unlikely(ret)) {
  13346. __set_current_state(TASK_RUNNING);
  13347. if (rt_mutex_has_waiters(lock))
  13348. remove_waiter(lock, &waiter);
  13349. - rt_mutex_handle_deadlock(ret, chwalk, &waiter);
  13350. + /* ww_mutex want to report EDEADLK/EALREADY, let them */
  13351. + if (!ww_ctx)
  13352. + rt_mutex_handle_deadlock(ret, chwalk, &waiter);
  13353. + } else if (ww_ctx) {
  13354. + ww_mutex_account_lock(lock, ww_ctx);
  13355. }
  13356. /*
  13357. @@ -1267,7 +1818,8 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
  13358. * Return whether the current task needs to undo a potential priority boosting.
  13359. */
  13360. static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
  13361. - struct wake_q_head *wake_q)
  13362. + struct wake_q_head *wake_q,
  13363. + struct wake_q_head *wake_sleeper_q)
  13364. {
  13365. unsigned long flags;
  13366. @@ -1323,7 +1875,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
  13367. *
  13368. * Queue the next waiter for wakeup once we release the wait_lock.
  13369. */
  13370. - mark_wakeup_next_waiter(wake_q, lock);
  13371. + mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock);
  13372. raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13373. @@ -1339,31 +1891,36 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
  13374. */
  13375. static inline int
  13376. rt_mutex_fastlock(struct rt_mutex *lock, int state,
  13377. + struct ww_acquire_ctx *ww_ctx,
  13378. int (*slowfn)(struct rt_mutex *lock, int state,
  13379. struct hrtimer_sleeper *timeout,
  13380. - enum rtmutex_chainwalk chwalk))
  13381. + enum rtmutex_chainwalk chwalk,
  13382. + struct ww_acquire_ctx *ww_ctx))
  13383. {
  13384. if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
  13385. rt_mutex_deadlock_account_lock(lock, current);
  13386. return 0;
  13387. } else
  13388. - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
  13389. + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK,
  13390. + ww_ctx);
  13391. }
  13392. static inline int
  13393. rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
  13394. struct hrtimer_sleeper *timeout,
  13395. enum rtmutex_chainwalk chwalk,
  13396. + struct ww_acquire_ctx *ww_ctx,
  13397. int (*slowfn)(struct rt_mutex *lock, int state,
  13398. struct hrtimer_sleeper *timeout,
  13399. - enum rtmutex_chainwalk chwalk))
  13400. + enum rtmutex_chainwalk chwalk,
  13401. + struct ww_acquire_ctx *ww_ctx))
  13402. {
  13403. if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
  13404. likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
  13405. rt_mutex_deadlock_account_lock(lock, current);
  13406. return 0;
  13407. } else
  13408. - return slowfn(lock, state, timeout, chwalk);
  13409. + return slowfn(lock, state, timeout, chwalk, ww_ctx);
  13410. }
  13411. static inline int
  13412. @@ -1380,17 +1937,20 @@ rt_mutex_fasttrylock(struct rt_mutex *lock,
  13413. static inline void
  13414. rt_mutex_fastunlock(struct rt_mutex *lock,
  13415. bool (*slowfn)(struct rt_mutex *lock,
  13416. - struct wake_q_head *wqh))
  13417. + struct wake_q_head *wqh,
  13418. + struct wake_q_head *wq_sleeper))
  13419. {
  13420. WAKE_Q(wake_q);
  13421. + WAKE_Q(wake_sleeper_q);
  13422. if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
  13423. rt_mutex_deadlock_account_unlock(current);
  13424. } else {
  13425. - bool deboost = slowfn(lock, &wake_q);
  13426. + bool deboost = slowfn(lock, &wake_q, &wake_sleeper_q);
  13427. wake_up_q(&wake_q);
  13428. + wake_up_q_sleeper(&wake_sleeper_q);
  13429. /* Undo pi boosting if necessary: */
  13430. if (deboost)
  13431. @@ -1407,7 +1967,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock)
  13432. {
  13433. might_sleep();
  13434. - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
  13435. + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock);
  13436. }
  13437. EXPORT_SYMBOL_GPL(rt_mutex_lock);
  13438. @@ -1424,7 +1984,7 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
  13439. {
  13440. might_sleep();
  13441. - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
  13442. + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock);
  13443. }
  13444. EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
  13445. @@ -1437,11 +1997,30 @@ int rt_mutex_timed_futex_lock(struct rt_mutex *lock,
  13446. might_sleep();
  13447. return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  13448. - RT_MUTEX_FULL_CHAINWALK,
  13449. + RT_MUTEX_FULL_CHAINWALK, NULL,
  13450. rt_mutex_slowlock);
  13451. }
  13452. /**
  13453. + * rt_mutex_lock_killable - lock a rt_mutex killable
  13454. + *
  13455. + * @lock: the rt_mutex to be locked
  13456. + * @detect_deadlock: deadlock detection on/off
  13457. + *
  13458. + * Returns:
  13459. + * 0 on success
  13460. + * -EINTR when interrupted by a signal
  13461. + * -EDEADLK when the lock would deadlock (when deadlock detection is on)
  13462. + */
  13463. +int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
  13464. +{
  13465. + might_sleep();
  13466. +
  13467. + return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock);
  13468. +}
  13469. +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
  13470. +
  13471. +/**
  13472. * rt_mutex_timed_lock - lock a rt_mutex interruptible
  13473. * the timeout structure is provided
  13474. * by the caller
  13475. @@ -1461,6 +2040,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
  13476. return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  13477. RT_MUTEX_MIN_CHAINWALK,
  13478. + NULL,
  13479. rt_mutex_slowlock);
  13480. }
  13481. EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  13482. @@ -1478,7 +2058,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  13483. */
  13484. int __sched rt_mutex_trylock(struct rt_mutex *lock)
  13485. {
  13486. +#ifdef CONFIG_PREEMPT_RT_FULL
  13487. + if (WARN_ON_ONCE(in_irq() || in_nmi()))
  13488. +#else
  13489. if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
  13490. +#endif
  13491. return 0;
  13492. return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
  13493. @@ -1504,13 +2088,14 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock);
  13494. * required or not.
  13495. */
  13496. bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
  13497. - struct wake_q_head *wqh)
  13498. + struct wake_q_head *wqh,
  13499. + struct wake_q_head *wq_sleeper)
  13500. {
  13501. if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
  13502. rt_mutex_deadlock_account_unlock(current);
  13503. return false;
  13504. }
  13505. - return rt_mutex_slowunlock(lock, wqh);
  13506. + return rt_mutex_slowunlock(lock, wqh, wq_sleeper);
  13507. }
  13508. /**
  13509. @@ -1543,13 +2128,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
  13510. void __rt_mutex_init(struct rt_mutex *lock, const char *name)
  13511. {
  13512. lock->owner = NULL;
  13513. - raw_spin_lock_init(&lock->wait_lock);
  13514. lock->waiters = RB_ROOT;
  13515. lock->waiters_leftmost = NULL;
  13516. debug_rt_mutex_init(lock, name);
  13517. }
  13518. -EXPORT_SYMBOL_GPL(__rt_mutex_init);
  13519. +EXPORT_SYMBOL(__rt_mutex_init);
  13520. /**
  13521. * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
  13522. @@ -1564,7 +2148,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
  13523. void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  13524. struct task_struct *proxy_owner)
  13525. {
  13526. - __rt_mutex_init(lock, NULL);
  13527. + rt_mutex_init(lock);
  13528. debug_rt_mutex_proxy_lock(lock, proxy_owner);
  13529. rt_mutex_set_owner(lock, proxy_owner);
  13530. rt_mutex_deadlock_account_lock(lock, proxy_owner);
  13531. @@ -1612,6 +2196,35 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  13532. return 1;
  13533. }
  13534. +#ifdef CONFIG_PREEMPT_RT_FULL
  13535. + /*
  13536. + * In PREEMPT_RT there's an added race.
  13537. + * If the task, that we are about to requeue, times out,
  13538. + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
  13539. + * to skip this task. But right after the task sets
  13540. + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
  13541. + * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
  13542. + * This will replace the PI_WAKEUP_INPROGRESS with the actual
  13543. + * lock that it blocks on. We *must not* place this task
  13544. + * on this proxy lock in that case.
  13545. + *
  13546. + * To prevent this race, we first take the task's pi_lock
  13547. + * and check if it has updated its pi_blocked_on. If it has,
  13548. + * we assume that it woke up and we return -EAGAIN.
  13549. + * Otherwise, we set the task's pi_blocked_on to
  13550. + * PI_REQUEUE_INPROGRESS, so that if the task is waking up
  13551. + * it will know that we are in the process of requeuing it.
  13552. + */
  13553. + raw_spin_lock(&task->pi_lock);
  13554. + if (task->pi_blocked_on) {
  13555. + raw_spin_unlock(&task->pi_lock);
  13556. + raw_spin_unlock_irq(&lock->wait_lock);
  13557. + return -EAGAIN;
  13558. + }
  13559. + task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
  13560. + raw_spin_unlock(&task->pi_lock);
  13561. +#endif
  13562. +
  13563. /* We enforce deadlock detection for futexes */
  13564. ret = task_blocks_on_rt_mutex(lock, waiter, task,
  13565. RT_MUTEX_FULL_CHAINWALK);
  13566. @@ -1626,7 +2239,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  13567. ret = 0;
  13568. }
  13569. - if (unlikely(ret))
  13570. + if (ret && rt_mutex_has_waiters(lock))
  13571. remove_waiter(lock, waiter);
  13572. raw_spin_unlock_irq(&lock->wait_lock);
  13573. @@ -1682,7 +2295,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
  13574. set_current_state(TASK_INTERRUPTIBLE);
  13575. /* sleep on the mutex */
  13576. - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
  13577. + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
  13578. if (unlikely(ret))
  13579. remove_waiter(lock, waiter);
  13580. @@ -1697,3 +2310,89 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
  13581. return ret;
  13582. }
  13583. +
  13584. +static inline int
  13585. +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
  13586. +{
  13587. +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
  13588. + unsigned tmp;
  13589. +
  13590. + if (ctx->deadlock_inject_countdown-- == 0) {
  13591. + tmp = ctx->deadlock_inject_interval;
  13592. + if (tmp > UINT_MAX/4)
  13593. + tmp = UINT_MAX;
  13594. + else
  13595. + tmp = tmp*2 + tmp + tmp/2;
  13596. +
  13597. + ctx->deadlock_inject_interval = tmp;
  13598. + ctx->deadlock_inject_countdown = tmp;
  13599. + ctx->contending_lock = lock;
  13600. +
  13601. + ww_mutex_unlock(lock);
  13602. +
  13603. + return -EDEADLK;
  13604. + }
  13605. +#endif
  13606. +
  13607. + return 0;
  13608. +}
  13609. +
  13610. +#ifdef CONFIG_PREEMPT_RT_FULL
  13611. +int __sched
  13612. +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
  13613. +{
  13614. + int ret;
  13615. +
  13616. + might_sleep();
  13617. +
  13618. + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
  13619. + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx);
  13620. + if (ret)
  13621. + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
  13622. + else if (!ret && ww_ctx->acquired > 1)
  13623. + return ww_mutex_deadlock_injection(lock, ww_ctx);
  13624. +
  13625. + return ret;
  13626. +}
  13627. +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
  13628. +
  13629. +int __sched
  13630. +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
  13631. +{
  13632. + int ret;
  13633. +
  13634. + might_sleep();
  13635. +
  13636. + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
  13637. + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx);
  13638. + if (ret)
  13639. + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
  13640. + else if (!ret && ww_ctx->acquired > 1)
  13641. + return ww_mutex_deadlock_injection(lock, ww_ctx);
  13642. +
  13643. + return ret;
  13644. +}
  13645. +EXPORT_SYMBOL_GPL(__ww_mutex_lock);
  13646. +
  13647. +void __sched ww_mutex_unlock(struct ww_mutex *lock)
  13648. +{
  13649. + int nest = !!lock->ctx;
  13650. +
  13651. + /*
  13652. + * The unlocking fastpath is the 0->1 transition from 'locked'
  13653. + * into 'unlocked' state:
  13654. + */
  13655. + if (nest) {
  13656. +#ifdef CONFIG_DEBUG_MUTEXES
  13657. + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
  13658. +#endif
  13659. + if (lock->ctx->acquired > 0)
  13660. + lock->ctx->acquired--;
  13661. + lock->ctx = NULL;
  13662. + }
  13663. +
  13664. + mutex_release(&lock->base.dep_map, nest, _RET_IP_);
  13665. + rt_mutex_unlock(&lock->base.lock);
  13666. +}
  13667. +EXPORT_SYMBOL(ww_mutex_unlock);
  13668. +#endif
  13669. diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
  13670. index 4f5f83c7d2d3..289f062f26cd 100644
  13671. --- a/kernel/locking/rtmutex_common.h
  13672. +++ b/kernel/locking/rtmutex_common.h
  13673. @@ -27,6 +27,7 @@ struct rt_mutex_waiter {
  13674. struct rb_node pi_tree_entry;
  13675. struct task_struct *task;
  13676. struct rt_mutex *lock;
  13677. + bool savestate;
  13678. #ifdef CONFIG_DEBUG_RT_MUTEXES
  13679. unsigned long ip;
  13680. struct pid *deadlock_task_pid;
  13681. @@ -97,6 +98,9 @@ enum rtmutex_chainwalk {
  13682. /*
  13683. * PI-futex support (proxy locking functions, etc.):
  13684. */
  13685. +#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
  13686. +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
  13687. +
  13688. extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
  13689. extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  13690. struct task_struct *proxy_owner);
  13691. @@ -110,7 +114,8 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
  13692. struct rt_mutex_waiter *waiter);
  13693. extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
  13694. extern bool rt_mutex_futex_unlock(struct rt_mutex *lock,
  13695. - struct wake_q_head *wqh);
  13696. + struct wake_q_head *wqh,
  13697. + struct wake_q_head *wq_sleeper);
  13698. extern void rt_mutex_adjust_prio(struct task_struct *task);
  13699. #ifdef CONFIG_DEBUG_RT_MUTEXES
  13700. @@ -119,4 +124,14 @@ extern void rt_mutex_adjust_prio(struct task_struct *task);
  13701. # include "rtmutex.h"
  13702. #endif
  13703. +static inline void
  13704. +rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
  13705. +{
  13706. + debug_rt_mutex_init_waiter(waiter);
  13707. + waiter->task = NULL;
  13708. + waiter->savestate = savestate;
  13709. + RB_CLEAR_NODE(&waiter->pi_tree_entry);
  13710. + RB_CLEAR_NODE(&waiter->tree_entry);
  13711. +}
  13712. +
  13713. #endif
  13714. diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
  13715. index db3ccb1dd614..909779647bd1 100644
  13716. --- a/kernel/locking/spinlock.c
  13717. +++ b/kernel/locking/spinlock.c
  13718. @@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
  13719. * __[spin|read|write]_lock_bh()
  13720. */
  13721. BUILD_LOCK_OPS(spin, raw_spinlock);
  13722. +
  13723. +#ifndef CONFIG_PREEMPT_RT_FULL
  13724. BUILD_LOCK_OPS(read, rwlock);
  13725. BUILD_LOCK_OPS(write, rwlock);
  13726. +#endif
  13727. #endif
  13728. @@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
  13729. EXPORT_SYMBOL(_raw_spin_unlock_bh);
  13730. #endif
  13731. +#ifndef CONFIG_PREEMPT_RT_FULL
  13732. +
  13733. #ifndef CONFIG_INLINE_READ_TRYLOCK
  13734. int __lockfunc _raw_read_trylock(rwlock_t *lock)
  13735. {
  13736. @@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
  13737. EXPORT_SYMBOL(_raw_write_unlock_bh);
  13738. #endif
  13739. +#endif /* !PREEMPT_RT_FULL */
  13740. +
  13741. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  13742. void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
  13743. diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
  13744. index 0374a596cffa..94970338d518 100644
  13745. --- a/kernel/locking/spinlock_debug.c
  13746. +++ b/kernel/locking/spinlock_debug.c
  13747. @@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
  13748. EXPORT_SYMBOL(__raw_spin_lock_init);
  13749. +#ifndef CONFIG_PREEMPT_RT_FULL
  13750. void __rwlock_init(rwlock_t *lock, const char *name,
  13751. struct lock_class_key *key)
  13752. {
  13753. @@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
  13754. }
  13755. EXPORT_SYMBOL(__rwlock_init);
  13756. +#endif
  13757. static void spin_dump(raw_spinlock_t *lock, const char *msg)
  13758. {
  13759. @@ -159,6 +161,7 @@ void do_raw_spin_unlock(raw_spinlock_t *lock)
  13760. arch_spin_unlock(&lock->raw_lock);
  13761. }
  13762. +#ifndef CONFIG_PREEMPT_RT_FULL
  13763. static void rwlock_bug(rwlock_t *lock, const char *msg)
  13764. {
  13765. if (!debug_locks_off())
  13766. @@ -300,3 +303,5 @@ void do_raw_write_unlock(rwlock_t *lock)
  13767. debug_write_unlock(lock);
  13768. arch_write_unlock(&lock->raw_lock);
  13769. }
  13770. +
  13771. +#endif
  13772. diff --git a/kernel/panic.c b/kernel/panic.c
  13773. index 535c96510a44..3373a70ac3f0 100644
  13774. --- a/kernel/panic.c
  13775. +++ b/kernel/panic.c
  13776. @@ -444,9 +444,11 @@ static u64 oops_id;
  13777. static int init_oops_id(void)
  13778. {
  13779. +#ifndef CONFIG_PREEMPT_RT_FULL
  13780. if (!oops_id)
  13781. get_random_bytes(&oops_id, sizeof(oops_id));
  13782. else
  13783. +#endif
  13784. oops_id++;
  13785. return 0;
  13786. diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
  13787. index fca9254280ee..4e4063e7b5cc 100644
  13788. --- a/kernel/power/hibernate.c
  13789. +++ b/kernel/power/hibernate.c
  13790. @@ -285,6 +285,8 @@ static int create_image(int platform_mode)
  13791. local_irq_disable();
  13792. + system_state = SYSTEM_SUSPEND;
  13793. +
  13794. error = syscore_suspend();
  13795. if (error) {
  13796. printk(KERN_ERR "PM: Some system devices failed to power down, "
  13797. @@ -314,6 +316,7 @@ static int create_image(int platform_mode)
  13798. syscore_resume();
  13799. Enable_irqs:
  13800. + system_state = SYSTEM_RUNNING;
  13801. local_irq_enable();
  13802. Enable_cpus:
  13803. @@ -438,6 +441,7 @@ static int resume_target_kernel(bool platform_mode)
  13804. goto Enable_cpus;
  13805. local_irq_disable();
  13806. + system_state = SYSTEM_SUSPEND;
  13807. error = syscore_suspend();
  13808. if (error)
  13809. @@ -471,6 +475,7 @@ static int resume_target_kernel(bool platform_mode)
  13810. syscore_resume();
  13811. Enable_irqs:
  13812. + system_state = SYSTEM_RUNNING;
  13813. local_irq_enable();
  13814. Enable_cpus:
  13815. @@ -556,6 +561,7 @@ int hibernation_platform_enter(void)
  13816. goto Enable_cpus;
  13817. local_irq_disable();
  13818. + system_state = SYSTEM_SUSPEND;
  13819. syscore_suspend();
  13820. if (pm_wakeup_pending()) {
  13821. error = -EAGAIN;
  13822. @@ -568,6 +574,7 @@ int hibernation_platform_enter(void)
  13823. Power_up:
  13824. syscore_resume();
  13825. + system_state = SYSTEM_RUNNING;
  13826. local_irq_enable();
  13827. Enable_cpus:
  13828. @@ -642,6 +649,10 @@ static void power_down(void)
  13829. cpu_relax();
  13830. }
  13831. +#ifndef CONFIG_SUSPEND
  13832. +bool pm_in_action;
  13833. +#endif
  13834. +
  13835. /**
  13836. * hibernate - Carry out system hibernation, including saving the image.
  13837. */
  13838. @@ -654,6 +665,8 @@ int hibernate(void)
  13839. return -EPERM;
  13840. }
  13841. + pm_in_action = true;
  13842. +
  13843. lock_system_sleep();
  13844. /* The snapshot device should not be opened while we're running */
  13845. if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
  13846. @@ -719,6 +732,7 @@ int hibernate(void)
  13847. atomic_inc(&snapshot_device_available);
  13848. Unlock:
  13849. unlock_system_sleep();
  13850. + pm_in_action = false;
  13851. return error;
  13852. }
  13853. diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
  13854. index 5b70d64b871e..e25382cfb249 100644
  13855. --- a/kernel/power/suspend.c
  13856. +++ b/kernel/power/suspend.c
  13857. @@ -359,6 +359,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
  13858. arch_suspend_disable_irqs();
  13859. BUG_ON(!irqs_disabled());
  13860. + system_state = SYSTEM_SUSPEND;
  13861. +
  13862. error = syscore_suspend();
  13863. if (!error) {
  13864. *wakeup = pm_wakeup_pending();
  13865. @@ -375,6 +377,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
  13866. syscore_resume();
  13867. }
  13868. + system_state = SYSTEM_RUNNING;
  13869. +
  13870. arch_suspend_enable_irqs();
  13871. BUG_ON(irqs_disabled());
  13872. @@ -517,6 +521,8 @@ static int enter_state(suspend_state_t state)
  13873. return error;
  13874. }
  13875. +bool pm_in_action;
  13876. +
  13877. /**
  13878. * pm_suspend - Externally visible function for suspending the system.
  13879. * @state: System sleep state to enter.
  13880. @@ -531,6 +537,8 @@ int pm_suspend(suspend_state_t state)
  13881. if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
  13882. return -EINVAL;
  13883. + pm_in_action = true;
  13884. +
  13885. error = enter_state(state);
  13886. if (error) {
  13887. suspend_stats.fail++;
  13888. @@ -538,6 +546,7 @@ int pm_suspend(suspend_state_t state)
  13889. } else {
  13890. suspend_stats.success++;
  13891. }
  13892. + pm_in_action = false;
  13893. return error;
  13894. }
  13895. EXPORT_SYMBOL(pm_suspend);
  13896. diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
  13897. index bfbf284e4218..ba5e3381a8cc 100644
  13898. --- a/kernel/printk/printk.c
  13899. +++ b/kernel/printk/printk.c
  13900. @@ -246,6 +246,65 @@ __packed __aligned(4)
  13901. */
  13902. static DEFINE_RAW_SPINLOCK(logbuf_lock);
  13903. +#ifdef CONFIG_EARLY_PRINTK
  13904. +struct console *early_console;
  13905. +
  13906. +static void early_vprintk(const char *fmt, va_list ap)
  13907. +{
  13908. + if (early_console) {
  13909. + char buf[512];
  13910. + int n = vscnprintf(buf, sizeof(buf), fmt, ap);
  13911. +
  13912. + early_console->write(early_console, buf, n);
  13913. + }
  13914. +}
  13915. +
  13916. +asmlinkage void early_printk(const char *fmt, ...)
  13917. +{
  13918. + va_list ap;
  13919. +
  13920. + va_start(ap, fmt);
  13921. + early_vprintk(fmt, ap);
  13922. + va_end(ap);
  13923. +}
  13924. +
  13925. +/*
  13926. + * This is independent of any log levels - a global
  13927. + * kill switch that turns off all of printk.
  13928. + *
  13929. + * Used by the NMI watchdog if early-printk is enabled.
  13930. + */
  13931. +static bool __read_mostly printk_killswitch;
  13932. +
  13933. +static int __init force_early_printk_setup(char *str)
  13934. +{
  13935. + printk_killswitch = true;
  13936. + return 0;
  13937. +}
  13938. +early_param("force_early_printk", force_early_printk_setup);
  13939. +
  13940. +void printk_kill(void)
  13941. +{
  13942. + printk_killswitch = true;
  13943. +}
  13944. +
  13945. +#ifdef CONFIG_PRINTK
  13946. +static int forced_early_printk(const char *fmt, va_list ap)
  13947. +{
  13948. + if (!printk_killswitch)
  13949. + return 0;
  13950. + early_vprintk(fmt, ap);
  13951. + return 1;
  13952. +}
  13953. +#endif
  13954. +
  13955. +#else
  13956. +static inline int forced_early_printk(const char *fmt, va_list ap)
  13957. +{
  13958. + return 0;
  13959. +}
  13960. +#endif
  13961. +
  13962. #ifdef CONFIG_PRINTK
  13963. DECLARE_WAIT_QUEUE_HEAD(log_wait);
  13964. /* the next printk record to read by syslog(READ) or /proc/kmsg */
  13965. @@ -1209,6 +1268,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  13966. {
  13967. char *text;
  13968. int len = 0;
  13969. + int attempts = 0;
  13970. text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
  13971. if (!text)
  13972. @@ -1220,6 +1280,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  13973. u64 seq;
  13974. u32 idx;
  13975. enum log_flags prev;
  13976. + int num_msg;
  13977. +try_again:
  13978. + attempts++;
  13979. + if (attempts > 10) {
  13980. + len = -EBUSY;
  13981. + goto out;
  13982. + }
  13983. + num_msg = 0;
  13984. /*
  13985. * Find first record that fits, including all following records,
  13986. @@ -1235,6 +1303,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  13987. prev = msg->flags;
  13988. idx = log_next(idx);
  13989. seq++;
  13990. + num_msg++;
  13991. + if (num_msg > 5) {
  13992. + num_msg = 0;
  13993. + raw_spin_unlock_irq(&logbuf_lock);
  13994. + raw_spin_lock_irq(&logbuf_lock);
  13995. + if (clear_seq < log_first_seq)
  13996. + goto try_again;
  13997. + }
  13998. }
  13999. /* move first record forward until length fits into the buffer */
  14000. @@ -1248,6 +1324,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  14001. prev = msg->flags;
  14002. idx = log_next(idx);
  14003. seq++;
  14004. + num_msg++;
  14005. + if (num_msg > 5) {
  14006. + num_msg = 0;
  14007. + raw_spin_unlock_irq(&logbuf_lock);
  14008. + raw_spin_lock_irq(&logbuf_lock);
  14009. + if (clear_seq < log_first_seq)
  14010. + goto try_again;
  14011. + }
  14012. }
  14013. /* last message fitting into this dump */
  14014. @@ -1288,6 +1372,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  14015. clear_seq = log_next_seq;
  14016. clear_idx = log_next_idx;
  14017. }
  14018. +out:
  14019. raw_spin_unlock_irq(&logbuf_lock);
  14020. kfree(text);
  14021. @@ -1443,6 +1528,12 @@ static void call_console_drivers(int level,
  14022. if (!console_drivers)
  14023. return;
  14024. + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
  14025. + if (in_irq() || in_nmi())
  14026. + return;
  14027. + }
  14028. +
  14029. + migrate_disable();
  14030. for_each_console(con) {
  14031. if (exclusive_console && con != exclusive_console)
  14032. continue;
  14033. @@ -1458,6 +1549,7 @@ static void call_console_drivers(int level,
  14034. else
  14035. con->write(con, text, len);
  14036. }
  14037. + migrate_enable();
  14038. }
  14039. /*
  14040. @@ -1620,6 +1712,13 @@ asmlinkage int vprintk_emit(int facility, int level,
  14041. /* cpu currently holding logbuf_lock in this function */
  14042. static unsigned int logbuf_cpu = UINT_MAX;
  14043. + /*
  14044. + * Fall back to early_printk if a debugging subsystem has
  14045. + * killed printk output
  14046. + */
  14047. + if (unlikely(forced_early_printk(fmt, args)))
  14048. + return 1;
  14049. +
  14050. if (level == LOGLEVEL_SCHED) {
  14051. level = LOGLEVEL_DEFAULT;
  14052. in_sched = true;
  14053. @@ -1755,13 +1854,23 @@ asmlinkage int vprintk_emit(int facility, int level,
  14054. /* If called from the scheduler, we can not call up(). */
  14055. if (!in_sched) {
  14056. + int may_trylock = 1;
  14057. +
  14058. lockdep_off();
  14059. +#ifdef CONFIG_PREEMPT_RT_FULL
  14060. + /*
  14061. + * we can't take a sleeping lock with IRQs or preeption disabled
  14062. + * so we can't print in these contexts
  14063. + */
  14064. + if (!(preempt_count() == 0 && !irqs_disabled()))
  14065. + may_trylock = 0;
  14066. +#endif
  14067. /*
  14068. * Try to acquire and then immediately release the console
  14069. * semaphore. The release will print out buffers and wake up
  14070. * /dev/kmsg and syslog() users.
  14071. */
  14072. - if (console_trylock())
  14073. + if (may_trylock && console_trylock())
  14074. console_unlock();
  14075. lockdep_on();
  14076. }
  14077. @@ -1901,26 +2010,6 @@ DEFINE_PER_CPU(printk_func_t, printk_func);
  14078. #endif /* CONFIG_PRINTK */
  14079. -#ifdef CONFIG_EARLY_PRINTK
  14080. -struct console *early_console;
  14081. -
  14082. -asmlinkage __visible void early_printk(const char *fmt, ...)
  14083. -{
  14084. - va_list ap;
  14085. - char buf[512];
  14086. - int n;
  14087. -
  14088. - if (!early_console)
  14089. - return;
  14090. -
  14091. - va_start(ap, fmt);
  14092. - n = vscnprintf(buf, sizeof(buf), fmt, ap);
  14093. - va_end(ap);
  14094. -
  14095. - early_console->write(early_console, buf, n);
  14096. -}
  14097. -#endif
  14098. -
  14099. static int __add_preferred_console(char *name, int idx, char *options,
  14100. char *brl_options)
  14101. {
  14102. @@ -2183,11 +2272,16 @@ static void console_cont_flush(char *text, size_t size)
  14103. goto out;
  14104. len = cont_print_text(text, size);
  14105. +#ifdef CONFIG_PREEMPT_RT_FULL
  14106. + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  14107. + call_console_drivers(cont.level, NULL, 0, text, len);
  14108. +#else
  14109. raw_spin_unlock(&logbuf_lock);
  14110. stop_critical_timings();
  14111. call_console_drivers(cont.level, NULL, 0, text, len);
  14112. start_critical_timings();
  14113. local_irq_restore(flags);
  14114. +#endif
  14115. return;
  14116. out:
  14117. raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  14118. @@ -2309,13 +2403,17 @@ void console_unlock(void)
  14119. console_idx = log_next(console_idx);
  14120. console_seq++;
  14121. console_prev = msg->flags;
  14122. +#ifdef CONFIG_PREEMPT_RT_FULL
  14123. + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  14124. + call_console_drivers(level, ext_text, ext_len, text, len);
  14125. +#else
  14126. raw_spin_unlock(&logbuf_lock);
  14127. stop_critical_timings(); /* don't trace print latency */
  14128. call_console_drivers(level, ext_text, ext_len, text, len);
  14129. start_critical_timings();
  14130. local_irq_restore(flags);
  14131. -
  14132. +#endif
  14133. if (do_cond_resched)
  14134. cond_resched();
  14135. }
  14136. @@ -2367,6 +2465,11 @@ void console_unblank(void)
  14137. {
  14138. struct console *c;
  14139. + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
  14140. + if (in_irq() || in_nmi())
  14141. + return;
  14142. + }
  14143. +
  14144. /*
  14145. * console_unblank can no longer be called in interrupt context unless
  14146. * oops_in_progress is set to 1..
  14147. diff --git a/kernel/ptrace.c b/kernel/ptrace.c
  14148. index d49bfa1e53e6..b8cf7a82f4e2 100644
  14149. --- a/kernel/ptrace.c
  14150. +++ b/kernel/ptrace.c
  14151. @@ -128,7 +128,14 @@ static bool ptrace_freeze_traced(struct task_struct *task)
  14152. spin_lock_irq(&task->sighand->siglock);
  14153. if (task_is_traced(task) && !__fatal_signal_pending(task)) {
  14154. - task->state = __TASK_TRACED;
  14155. + unsigned long flags;
  14156. +
  14157. + raw_spin_lock_irqsave(&task->pi_lock, flags);
  14158. + if (task->state & __TASK_TRACED)
  14159. + task->state = __TASK_TRACED;
  14160. + else
  14161. + task->saved_state = __TASK_TRACED;
  14162. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  14163. ret = true;
  14164. }
  14165. spin_unlock_irq(&task->sighand->siglock);
  14166. diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
  14167. index 250ea67c1615..6349c9affa4d 100644
  14168. --- a/kernel/rcu/rcutorture.c
  14169. +++ b/kernel/rcu/rcutorture.c
  14170. @@ -409,6 +409,7 @@ static struct rcu_torture_ops rcu_ops = {
  14171. .name = "rcu"
  14172. };
  14173. +#ifndef CONFIG_PREEMPT_RT_FULL
  14174. /*
  14175. * Definitions for rcu_bh torture testing.
  14176. */
  14177. @@ -448,6 +449,12 @@ static struct rcu_torture_ops rcu_bh_ops = {
  14178. .name = "rcu_bh"
  14179. };
  14180. +#else
  14181. +static struct rcu_torture_ops rcu_bh_ops = {
  14182. + .ttype = INVALID_RCU_FLAVOR,
  14183. +};
  14184. +#endif
  14185. +
  14186. /*
  14187. * Don't even think about trying any of these in real life!!!
  14188. * The names includes "busted", and they really means it!
  14189. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
  14190. index 9a535a86e732..ae568fd4885b 100644
  14191. --- a/kernel/rcu/tree.c
  14192. +++ b/kernel/rcu/tree.c
  14193. @@ -56,6 +56,11 @@
  14194. #include <linux/random.h>
  14195. #include <linux/trace_events.h>
  14196. #include <linux/suspend.h>
  14197. +#include <linux/delay.h>
  14198. +#include <linux/gfp.h>
  14199. +#include <linux/oom.h>
  14200. +#include <linux/smpboot.h>
  14201. +#include "../time/tick-internal.h"
  14202. #include "tree.h"
  14203. #include "rcu.h"
  14204. @@ -254,6 +259,19 @@ void rcu_sched_qs(void)
  14205. this_cpu_ptr(&rcu_sched_data), true);
  14206. }
  14207. +#ifdef CONFIG_PREEMPT_RT_FULL
  14208. +static void rcu_preempt_qs(void);
  14209. +
  14210. +void rcu_bh_qs(void)
  14211. +{
  14212. + unsigned long flags;
  14213. +
  14214. + /* Callers to this function, rcu_preempt_qs(), must disable irqs. */
  14215. + local_irq_save(flags);
  14216. + rcu_preempt_qs();
  14217. + local_irq_restore(flags);
  14218. +}
  14219. +#else
  14220. void rcu_bh_qs(void)
  14221. {
  14222. if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {
  14223. @@ -263,6 +281,7 @@ void rcu_bh_qs(void)
  14224. __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false);
  14225. }
  14226. }
  14227. +#endif
  14228. static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
  14229. @@ -426,11 +445,13 @@ EXPORT_SYMBOL_GPL(rcu_batches_started_sched);
  14230. /*
  14231. * Return the number of RCU BH batches started thus far for debug & stats.
  14232. */
  14233. +#ifndef CONFIG_PREEMPT_RT_FULL
  14234. unsigned long rcu_batches_started_bh(void)
  14235. {
  14236. return rcu_bh_state.gpnum;
  14237. }
  14238. EXPORT_SYMBOL_GPL(rcu_batches_started_bh);
  14239. +#endif
  14240. /*
  14241. * Return the number of RCU batches completed thus far for debug & stats.
  14242. @@ -450,6 +471,7 @@ unsigned long rcu_batches_completed_sched(void)
  14243. }
  14244. EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
  14245. +#ifndef CONFIG_PREEMPT_RT_FULL
  14246. /*
  14247. * Return the number of RCU BH batches completed thus far for debug & stats.
  14248. */
  14249. @@ -477,6 +499,13 @@ void rcu_bh_force_quiescent_state(void)
  14250. }
  14251. EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
  14252. +#else
  14253. +void rcu_force_quiescent_state(void)
  14254. +{
  14255. +}
  14256. +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
  14257. +#endif
  14258. +
  14259. /*
  14260. * Force a quiescent state for RCU-sched.
  14261. */
  14262. @@ -527,9 +556,11 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
  14263. case RCU_FLAVOR:
  14264. rsp = rcu_state_p;
  14265. break;
  14266. +#ifndef CONFIG_PREEMPT_RT_FULL
  14267. case RCU_BH_FLAVOR:
  14268. rsp = &rcu_bh_state;
  14269. break;
  14270. +#endif
  14271. case RCU_SCHED_FLAVOR:
  14272. rsp = &rcu_sched_state;
  14273. break;
  14274. @@ -2920,18 +2951,17 @@ __rcu_process_callbacks(struct rcu_state *rsp)
  14275. /*
  14276. * Do RCU core processing for the current CPU.
  14277. */
  14278. -static void rcu_process_callbacks(struct softirq_action *unused)
  14279. +static void rcu_process_callbacks(void)
  14280. {
  14281. struct rcu_state *rsp;
  14282. if (cpu_is_offline(smp_processor_id()))
  14283. return;
  14284. - trace_rcu_utilization(TPS("Start RCU core"));
  14285. for_each_rcu_flavor(rsp)
  14286. __rcu_process_callbacks(rsp);
  14287. - trace_rcu_utilization(TPS("End RCU core"));
  14288. }
  14289. +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
  14290. /*
  14291. * Schedule RCU callback invocation. If the specified type of RCU
  14292. * does not support RCU priority boosting, just do a direct call,
  14293. @@ -2943,19 +2973,106 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
  14294. {
  14295. if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
  14296. return;
  14297. - if (likely(!rsp->boost)) {
  14298. - rcu_do_batch(rsp, rdp);
  14299. - return;
  14300. - }
  14301. - invoke_rcu_callbacks_kthread();
  14302. + rcu_do_batch(rsp, rdp);
  14303. }
  14304. +static void rcu_wake_cond(struct task_struct *t, int status)
  14305. +{
  14306. + /*
  14307. + * If the thread is yielding, only wake it when this
  14308. + * is invoked from idle
  14309. + */
  14310. + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
  14311. + wake_up_process(t);
  14312. +}
  14313. +
  14314. +/*
  14315. + * Wake up this CPU's rcuc kthread to do RCU core processing.
  14316. + */
  14317. static void invoke_rcu_core(void)
  14318. {
  14319. - if (cpu_online(smp_processor_id()))
  14320. - raise_softirq(RCU_SOFTIRQ);
  14321. + unsigned long flags;
  14322. + struct task_struct *t;
  14323. +
  14324. + if (!cpu_online(smp_processor_id()))
  14325. + return;
  14326. + local_irq_save(flags);
  14327. + __this_cpu_write(rcu_cpu_has_work, 1);
  14328. + t = __this_cpu_read(rcu_cpu_kthread_task);
  14329. + if (t != NULL && current != t)
  14330. + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
  14331. + local_irq_restore(flags);
  14332. }
  14333. +static void rcu_cpu_kthread_park(unsigned int cpu)
  14334. +{
  14335. + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  14336. +}
  14337. +
  14338. +static int rcu_cpu_kthread_should_run(unsigned int cpu)
  14339. +{
  14340. + return __this_cpu_read(rcu_cpu_has_work);
  14341. +}
  14342. +
  14343. +/*
  14344. + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  14345. + * RCU softirq used in flavors and configurations of RCU that do not
  14346. + * support RCU priority boosting.
  14347. + */
  14348. +static void rcu_cpu_kthread(unsigned int cpu)
  14349. +{
  14350. + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
  14351. + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
  14352. + int spincnt;
  14353. +
  14354. + for (spincnt = 0; spincnt < 10; spincnt++) {
  14355. + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
  14356. + local_bh_disable();
  14357. + *statusp = RCU_KTHREAD_RUNNING;
  14358. + this_cpu_inc(rcu_cpu_kthread_loops);
  14359. + local_irq_disable();
  14360. + work = *workp;
  14361. + *workp = 0;
  14362. + local_irq_enable();
  14363. + if (work)
  14364. + rcu_process_callbacks();
  14365. + local_bh_enable();
  14366. + if (*workp == 0) {
  14367. + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
  14368. + *statusp = RCU_KTHREAD_WAITING;
  14369. + return;
  14370. + }
  14371. + }
  14372. + *statusp = RCU_KTHREAD_YIELDING;
  14373. + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
  14374. + schedule_timeout_interruptible(2);
  14375. + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
  14376. + *statusp = RCU_KTHREAD_WAITING;
  14377. +}
  14378. +
  14379. +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  14380. + .store = &rcu_cpu_kthread_task,
  14381. + .thread_should_run = rcu_cpu_kthread_should_run,
  14382. + .thread_fn = rcu_cpu_kthread,
  14383. + .thread_comm = "rcuc/%u",
  14384. + .setup = rcu_cpu_kthread_setup,
  14385. + .park = rcu_cpu_kthread_park,
  14386. +};
  14387. +
  14388. +/*
  14389. + * Spawn per-CPU RCU core processing kthreads.
  14390. + */
  14391. +static int __init rcu_spawn_core_kthreads(void)
  14392. +{
  14393. + int cpu;
  14394. +
  14395. + for_each_possible_cpu(cpu)
  14396. + per_cpu(rcu_cpu_has_work, cpu) = 0;
  14397. + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  14398. + return 0;
  14399. +}
  14400. +early_initcall(rcu_spawn_core_kthreads);
  14401. +
  14402. /*
  14403. * Handle any core-RCU processing required by a call_rcu() invocation.
  14404. */
  14405. @@ -3099,6 +3216,7 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
  14406. }
  14407. EXPORT_SYMBOL_GPL(call_rcu_sched);
  14408. +#ifndef CONFIG_PREEMPT_RT_FULL
  14409. /*
  14410. * Queue an RCU callback for invocation after a quicker grace period.
  14411. */
  14412. @@ -3107,6 +3225,7 @@ void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
  14413. __call_rcu(head, func, &rcu_bh_state, -1, 0);
  14414. }
  14415. EXPORT_SYMBOL_GPL(call_rcu_bh);
  14416. +#endif
  14417. /*
  14418. * Queue an RCU callback for lazy invocation after a grace period.
  14419. @@ -3198,6 +3317,7 @@ void synchronize_sched(void)
  14420. }
  14421. EXPORT_SYMBOL_GPL(synchronize_sched);
  14422. +#ifndef CONFIG_PREEMPT_RT_FULL
  14423. /**
  14424. * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
  14425. *
  14426. @@ -3224,6 +3344,7 @@ void synchronize_rcu_bh(void)
  14427. wait_rcu_gp(call_rcu_bh);
  14428. }
  14429. EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
  14430. +#endif
  14431. /**
  14432. * get_state_synchronize_rcu - Snapshot current RCU state
  14433. @@ -4104,6 +4225,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
  14434. mutex_unlock(&rsp->barrier_mutex);
  14435. }
  14436. +#ifndef CONFIG_PREEMPT_RT_FULL
  14437. /**
  14438. * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
  14439. */
  14440. @@ -4112,6 +4234,7 @@ void rcu_barrier_bh(void)
  14441. _rcu_barrier(&rcu_bh_state);
  14442. }
  14443. EXPORT_SYMBOL_GPL(rcu_barrier_bh);
  14444. +#endif
  14445. /**
  14446. * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
  14447. @@ -4609,12 +4732,13 @@ void __init rcu_init(void)
  14448. rcu_bootup_announce();
  14449. rcu_init_geometry();
  14450. +#ifndef CONFIG_PREEMPT_RT_FULL
  14451. rcu_init_one(&rcu_bh_state);
  14452. +#endif
  14453. rcu_init_one(&rcu_sched_state);
  14454. if (dump_tree)
  14455. rcu_dump_rcu_node_tree(&rcu_sched_state);
  14456. __rcu_init_preempt();
  14457. - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
  14458. /*
  14459. * We don't need protection against CPU-hotplug here because
  14460. diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
  14461. index df668c0f9e64..dd3efa3402fc 100644
  14462. --- a/kernel/rcu/tree.h
  14463. +++ b/kernel/rcu/tree.h
  14464. @@ -572,18 +572,18 @@ extern struct list_head rcu_struct_flavors;
  14465. */
  14466. extern struct rcu_state rcu_sched_state;
  14467. +#ifndef CONFIG_PREEMPT_RT_FULL
  14468. extern struct rcu_state rcu_bh_state;
  14469. +#endif
  14470. #ifdef CONFIG_PREEMPT_RCU
  14471. extern struct rcu_state rcu_preempt_state;
  14472. #endif /* #ifdef CONFIG_PREEMPT_RCU */
  14473. -#ifdef CONFIG_RCU_BOOST
  14474. DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  14475. DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
  14476. DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  14477. DECLARE_PER_CPU(char, rcu_cpu_has_work);
  14478. -#endif /* #ifdef CONFIG_RCU_BOOST */
  14479. #ifndef RCU_TREE_NONCORE
  14480. @@ -603,10 +603,9 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
  14481. static void __init __rcu_init_preempt(void);
  14482. static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
  14483. static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
  14484. -static void invoke_rcu_callbacks_kthread(void);
  14485. static bool rcu_is_callbacks_kthread(void);
  14486. +static void rcu_cpu_kthread_setup(unsigned int cpu);
  14487. #ifdef CONFIG_RCU_BOOST
  14488. -static void rcu_preempt_do_callbacks(void);
  14489. static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
  14490. struct rcu_node *rnp);
  14491. #endif /* #ifdef CONFIG_RCU_BOOST */
  14492. diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
  14493. index efdf7b61ce12..6a4158a83375 100644
  14494. --- a/kernel/rcu/tree_plugin.h
  14495. +++ b/kernel/rcu/tree_plugin.h
  14496. @@ -24,25 +24,10 @@
  14497. * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  14498. */
  14499. -#include <linux/delay.h>
  14500. -#include <linux/gfp.h>
  14501. -#include <linux/oom.h>
  14502. -#include <linux/smpboot.h>
  14503. -#include "../time/tick-internal.h"
  14504. -
  14505. #ifdef CONFIG_RCU_BOOST
  14506. #include "../locking/rtmutex_common.h"
  14507. -/*
  14508. - * Control variables for per-CPU and per-rcu_node kthreads. These
  14509. - * handle all flavors of RCU.
  14510. - */
  14511. -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
  14512. -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  14513. -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  14514. -DEFINE_PER_CPU(char, rcu_cpu_has_work);
  14515. -
  14516. #else /* #ifdef CONFIG_RCU_BOOST */
  14517. /*
  14518. @@ -55,6 +40,14 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work);
  14519. #endif /* #else #ifdef CONFIG_RCU_BOOST */
  14520. +/*
  14521. + * Control variables for per-CPU and per-rcu_node kthreads. These
  14522. + * handle all flavors of RCU.
  14523. + */
  14524. +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  14525. +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  14526. +DEFINE_PER_CPU(char, rcu_cpu_has_work);
  14527. +
  14528. #ifdef CONFIG_RCU_NOCB_CPU
  14529. static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
  14530. static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
  14531. @@ -428,7 +421,7 @@ void rcu_read_unlock_special(struct task_struct *t)
  14532. }
  14533. /* Hardware IRQ handlers cannot block, complain if they get here. */
  14534. - if (in_irq() || in_serving_softirq()) {
  14535. + if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) {
  14536. lockdep_rcu_suspicious(__FILE__, __LINE__,
  14537. "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
  14538. pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",
  14539. @@ -634,15 +627,6 @@ static void rcu_preempt_check_callbacks(void)
  14540. t->rcu_read_unlock_special.b.need_qs = true;
  14541. }
  14542. -#ifdef CONFIG_RCU_BOOST
  14543. -
  14544. -static void rcu_preempt_do_callbacks(void)
  14545. -{
  14546. - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
  14547. -}
  14548. -
  14549. -#endif /* #ifdef CONFIG_RCU_BOOST */
  14550. -
  14551. /*
  14552. * Queue a preemptible-RCU callback for invocation after a grace period.
  14553. */
  14554. @@ -924,6 +908,19 @@ void exit_rcu(void)
  14555. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  14556. +/*
  14557. + * If boosting, set rcuc kthreads to realtime priority.
  14558. + */
  14559. +static void rcu_cpu_kthread_setup(unsigned int cpu)
  14560. +{
  14561. +#ifdef CONFIG_RCU_BOOST
  14562. + struct sched_param sp;
  14563. +
  14564. + sp.sched_priority = kthread_prio;
  14565. + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  14566. +#endif /* #ifdef CONFIG_RCU_BOOST */
  14567. +}
  14568. +
  14569. #ifdef CONFIG_RCU_BOOST
  14570. #include "../locking/rtmutex_common.h"
  14571. @@ -955,16 +952,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp)
  14572. #endif /* #else #ifdef CONFIG_RCU_TRACE */
  14573. -static void rcu_wake_cond(struct task_struct *t, int status)
  14574. -{
  14575. - /*
  14576. - * If the thread is yielding, only wake it when this
  14577. - * is invoked from idle
  14578. - */
  14579. - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
  14580. - wake_up_process(t);
  14581. -}
  14582. -
  14583. /*
  14584. * Carry out RCU priority boosting on the task indicated by ->exp_tasks
  14585. * or ->boost_tasks, advancing the pointer to the next task in the
  14586. @@ -1108,23 +1095,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
  14587. }
  14588. /*
  14589. - * Wake up the per-CPU kthread to invoke RCU callbacks.
  14590. - */
  14591. -static void invoke_rcu_callbacks_kthread(void)
  14592. -{
  14593. - unsigned long flags;
  14594. -
  14595. - local_irq_save(flags);
  14596. - __this_cpu_write(rcu_cpu_has_work, 1);
  14597. - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
  14598. - current != __this_cpu_read(rcu_cpu_kthread_task)) {
  14599. - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
  14600. - __this_cpu_read(rcu_cpu_kthread_status));
  14601. - }
  14602. - local_irq_restore(flags);
  14603. -}
  14604. -
  14605. -/*
  14606. * Is the current CPU running the RCU-callbacks kthread?
  14607. * Caller must have preemption disabled.
  14608. */
  14609. @@ -1178,67 +1148,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
  14610. return 0;
  14611. }
  14612. -static void rcu_kthread_do_work(void)
  14613. -{
  14614. - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
  14615. - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
  14616. - rcu_preempt_do_callbacks();
  14617. -}
  14618. -
  14619. -static void rcu_cpu_kthread_setup(unsigned int cpu)
  14620. -{
  14621. - struct sched_param sp;
  14622. -
  14623. - sp.sched_priority = kthread_prio;
  14624. - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  14625. -}
  14626. -
  14627. -static void rcu_cpu_kthread_park(unsigned int cpu)
  14628. -{
  14629. - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  14630. -}
  14631. -
  14632. -static int rcu_cpu_kthread_should_run(unsigned int cpu)
  14633. -{
  14634. - return __this_cpu_read(rcu_cpu_has_work);
  14635. -}
  14636. -
  14637. -/*
  14638. - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  14639. - * RCU softirq used in flavors and configurations of RCU that do not
  14640. - * support RCU priority boosting.
  14641. - */
  14642. -static void rcu_cpu_kthread(unsigned int cpu)
  14643. -{
  14644. - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
  14645. - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
  14646. - int spincnt;
  14647. -
  14648. - for (spincnt = 0; spincnt < 10; spincnt++) {
  14649. - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
  14650. - local_bh_disable();
  14651. - *statusp = RCU_KTHREAD_RUNNING;
  14652. - this_cpu_inc(rcu_cpu_kthread_loops);
  14653. - local_irq_disable();
  14654. - work = *workp;
  14655. - *workp = 0;
  14656. - local_irq_enable();
  14657. - if (work)
  14658. - rcu_kthread_do_work();
  14659. - local_bh_enable();
  14660. - if (*workp == 0) {
  14661. - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
  14662. - *statusp = RCU_KTHREAD_WAITING;
  14663. - return;
  14664. - }
  14665. - }
  14666. - *statusp = RCU_KTHREAD_YIELDING;
  14667. - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
  14668. - schedule_timeout_interruptible(2);
  14669. - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
  14670. - *statusp = RCU_KTHREAD_WAITING;
  14671. -}
  14672. -
  14673. /*
  14674. * Set the per-rcu_node kthread's affinity to cover all CPUs that are
  14675. * served by the rcu_node in question. The CPU hotplug lock is still
  14676. @@ -1268,26 +1177,12 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
  14677. free_cpumask_var(cm);
  14678. }
  14679. -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  14680. - .store = &rcu_cpu_kthread_task,
  14681. - .thread_should_run = rcu_cpu_kthread_should_run,
  14682. - .thread_fn = rcu_cpu_kthread,
  14683. - .thread_comm = "rcuc/%u",
  14684. - .setup = rcu_cpu_kthread_setup,
  14685. - .park = rcu_cpu_kthread_park,
  14686. -};
  14687. -
  14688. /*
  14689. * Spawn boost kthreads -- called as soon as the scheduler is running.
  14690. */
  14691. static void __init rcu_spawn_boost_kthreads(void)
  14692. {
  14693. struct rcu_node *rnp;
  14694. - int cpu;
  14695. -
  14696. - for_each_possible_cpu(cpu)
  14697. - per_cpu(rcu_cpu_has_work, cpu) = 0;
  14698. - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  14699. rcu_for_each_leaf_node(rcu_state_p, rnp)
  14700. (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
  14701. }
  14702. @@ -1310,11 +1205,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
  14703. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  14704. }
  14705. -static void invoke_rcu_callbacks_kthread(void)
  14706. -{
  14707. - WARN_ON_ONCE(1);
  14708. -}
  14709. -
  14710. static bool rcu_is_callbacks_kthread(void)
  14711. {
  14712. return false;
  14713. @@ -1338,7 +1228,7 @@ static void rcu_prepare_kthreads(int cpu)
  14714. #endif /* #else #ifdef CONFIG_RCU_BOOST */
  14715. -#if !defined(CONFIG_RCU_FAST_NO_HZ)
  14716. +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL)
  14717. /*
  14718. * Check to see if any future RCU-related work will need to be done
  14719. @@ -1355,7 +1245,9 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt)
  14720. return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
  14721. ? 0 : rcu_cpu_has_callbacks(NULL);
  14722. }
  14723. +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */
  14724. +#if !defined(CONFIG_RCU_FAST_NO_HZ)
  14725. /*
  14726. * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
  14727. * after it.
  14728. @@ -1451,6 +1343,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
  14729. return cbs_ready;
  14730. }
  14731. +#ifndef CONFIG_PREEMPT_RT_FULL
  14732. +
  14733. /*
  14734. * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
  14735. * to invoke. If the CPU has callbacks, try to advance them. Tell the
  14736. @@ -1496,6 +1390,7 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt)
  14737. *nextevt = basemono + dj * TICK_NSEC;
  14738. return 0;
  14739. }
  14740. +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */
  14741. /*
  14742. * Prepare a CPU for idle from an RCU perspective. The first major task
  14743. diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
  14744. index ca828b41c938..6cbf7a9aa70f 100644
  14745. --- a/kernel/rcu/update.c
  14746. +++ b/kernel/rcu/update.c
  14747. @@ -295,6 +295,7 @@ int rcu_read_lock_held(void)
  14748. }
  14749. EXPORT_SYMBOL_GPL(rcu_read_lock_held);
  14750. +#ifndef CONFIG_PREEMPT_RT_FULL
  14751. /**
  14752. * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
  14753. *
  14754. @@ -321,6 +322,7 @@ int rcu_read_lock_bh_held(void)
  14755. return in_softirq() || irqs_disabled();
  14756. }
  14757. EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
  14758. +#endif
  14759. #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
  14760. diff --git a/kernel/relay.c b/kernel/relay.c
  14761. index 074994bcfa9b..7206cef7d6da 100644
  14762. --- a/kernel/relay.c
  14763. +++ b/kernel/relay.c
  14764. @@ -336,6 +336,10 @@ static void wakeup_readers(unsigned long data)
  14765. {
  14766. struct rchan_buf *buf = (struct rchan_buf *)data;
  14767. wake_up_interruptible(&buf->read_wait);
  14768. + /*
  14769. + * Stupid polling for now:
  14770. + */
  14771. + mod_timer(&buf->timer, jiffies + 1);
  14772. }
  14773. /**
  14774. @@ -353,6 +357,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init)
  14775. init_waitqueue_head(&buf->read_wait);
  14776. kref_init(&buf->kref);
  14777. setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
  14778. + mod_timer(&buf->timer, jiffies + 1);
  14779. } else
  14780. del_timer_sync(&buf->timer);
  14781. @@ -736,15 +741,6 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
  14782. else
  14783. buf->early_bytes += buf->chan->subbuf_size -
  14784. buf->padding[old_subbuf];
  14785. - smp_mb();
  14786. - if (waitqueue_active(&buf->read_wait))
  14787. - /*
  14788. - * Calling wake_up_interruptible() from here
  14789. - * will deadlock if we happen to be logging
  14790. - * from the scheduler (trying to re-grab
  14791. - * rq->lock), so defer it.
  14792. - */
  14793. - mod_timer(&buf->timer, jiffies + 1);
  14794. }
  14795. old = buf->data;
  14796. diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
  14797. index 414d9c16da42..5aaddf5838e9 100644
  14798. --- a/kernel/sched/Makefile
  14799. +++ b/kernel/sched/Makefile
  14800. @@ -17,7 +17,7 @@ endif
  14801. obj-y += core.o loadavg.o clock.o cputime.o
  14802. obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
  14803. -obj-y += wait.o swait.o completion.o idle.o
  14804. +obj-y += wait.o swait.o swork.o completion.o idle.o
  14805. obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
  14806. obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
  14807. obj-$(CONFIG_SCHEDSTATS) += stats.o
  14808. diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
  14809. index 8d0f35debf35..b62cf6400fe0 100644
  14810. --- a/kernel/sched/completion.c
  14811. +++ b/kernel/sched/completion.c
  14812. @@ -30,10 +30,10 @@ void complete(struct completion *x)
  14813. {
  14814. unsigned long flags;
  14815. - spin_lock_irqsave(&x->wait.lock, flags);
  14816. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  14817. x->done++;
  14818. - __wake_up_locked(&x->wait, TASK_NORMAL, 1);
  14819. - spin_unlock_irqrestore(&x->wait.lock, flags);
  14820. + swake_up_locked(&x->wait);
  14821. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  14822. }
  14823. EXPORT_SYMBOL(complete);
  14824. @@ -50,10 +50,10 @@ void complete_all(struct completion *x)
  14825. {
  14826. unsigned long flags;
  14827. - spin_lock_irqsave(&x->wait.lock, flags);
  14828. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  14829. x->done += UINT_MAX/2;
  14830. - __wake_up_locked(&x->wait, TASK_NORMAL, 0);
  14831. - spin_unlock_irqrestore(&x->wait.lock, flags);
  14832. + swake_up_all_locked(&x->wait);
  14833. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  14834. }
  14835. EXPORT_SYMBOL(complete_all);
  14836. @@ -62,20 +62,20 @@ do_wait_for_common(struct completion *x,
  14837. long (*action)(long), long timeout, int state)
  14838. {
  14839. if (!x->done) {
  14840. - DECLARE_WAITQUEUE(wait, current);
  14841. + DECLARE_SWAITQUEUE(wait);
  14842. - __add_wait_queue_tail_exclusive(&x->wait, &wait);
  14843. + __prepare_to_swait(&x->wait, &wait);
  14844. do {
  14845. if (signal_pending_state(state, current)) {
  14846. timeout = -ERESTARTSYS;
  14847. break;
  14848. }
  14849. __set_current_state(state);
  14850. - spin_unlock_irq(&x->wait.lock);
  14851. + raw_spin_unlock_irq(&x->wait.lock);
  14852. timeout = action(timeout);
  14853. - spin_lock_irq(&x->wait.lock);
  14854. + raw_spin_lock_irq(&x->wait.lock);
  14855. } while (!x->done && timeout);
  14856. - __remove_wait_queue(&x->wait, &wait);
  14857. + __finish_swait(&x->wait, &wait);
  14858. if (!x->done)
  14859. return timeout;
  14860. }
  14861. @@ -89,9 +89,9 @@ __wait_for_common(struct completion *x,
  14862. {
  14863. might_sleep();
  14864. - spin_lock_irq(&x->wait.lock);
  14865. + raw_spin_lock_irq(&x->wait.lock);
  14866. timeout = do_wait_for_common(x, action, timeout, state);
  14867. - spin_unlock_irq(&x->wait.lock);
  14868. + raw_spin_unlock_irq(&x->wait.lock);
  14869. return timeout;
  14870. }
  14871. @@ -277,12 +277,12 @@ bool try_wait_for_completion(struct completion *x)
  14872. if (!READ_ONCE(x->done))
  14873. return 0;
  14874. - spin_lock_irqsave(&x->wait.lock, flags);
  14875. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  14876. if (!x->done)
  14877. ret = 0;
  14878. else
  14879. x->done--;
  14880. - spin_unlock_irqrestore(&x->wait.lock, flags);
  14881. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  14882. return ret;
  14883. }
  14884. EXPORT_SYMBOL(try_wait_for_completion);
  14885. @@ -311,7 +311,7 @@ bool completion_done(struct completion *x)
  14886. * after it's acquired the lock.
  14887. */
  14888. smp_rmb();
  14889. - spin_unlock_wait(&x->wait.lock);
  14890. + raw_spin_unlock_wait(&x->wait.lock);
  14891. return true;
  14892. }
  14893. EXPORT_SYMBOL(completion_done);
  14894. diff --git a/kernel/sched/core.c b/kernel/sched/core.c
  14895. index 11546a6ed5df..140ee06079b6 100644
  14896. --- a/kernel/sched/core.c
  14897. +++ b/kernel/sched/core.c
  14898. @@ -128,7 +128,11 @@ const_debug unsigned int sysctl_sched_features =
  14899. * Number of tasks to iterate in a single balance run.
  14900. * Limited because this is done with IRQs disabled.
  14901. */
  14902. +#ifndef CONFIG_PREEMPT_RT_FULL
  14903. const_debug unsigned int sysctl_sched_nr_migrate = 32;
  14904. +#else
  14905. +const_debug unsigned int sysctl_sched_nr_migrate = 8;
  14906. +#endif
  14907. /*
  14908. * period over which we average the RT time consumption, measured
  14909. @@ -306,6 +310,7 @@ static void init_rq_hrtick(struct rq *rq)
  14910. hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  14911. rq->hrtick_timer.function = hrtick;
  14912. + rq->hrtick_timer.irqsafe = 1;
  14913. }
  14914. #else /* CONFIG_SCHED_HRTICK */
  14915. static inline void hrtick_clear(struct rq *rq)
  14916. @@ -414,7 +419,7 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
  14917. head->lastp = &node->next;
  14918. }
  14919. -void wake_up_q(struct wake_q_head *head)
  14920. +void __wake_up_q(struct wake_q_head *head, bool sleeper)
  14921. {
  14922. struct wake_q_node *node = head->first;
  14923. @@ -431,7 +436,10 @@ void wake_up_q(struct wake_q_head *head)
  14924. * wake_up_process() implies a wmb() to pair with the queueing
  14925. * in wake_q_add() so as not to miss wakeups.
  14926. */
  14927. - wake_up_process(task);
  14928. + if (sleeper)
  14929. + wake_up_lock_sleeper(task);
  14930. + else
  14931. + wake_up_process(task);
  14932. put_task_struct(task);
  14933. }
  14934. }
  14935. @@ -467,6 +475,38 @@ void resched_curr(struct rq *rq)
  14936. trace_sched_wake_idle_without_ipi(cpu);
  14937. }
  14938. +#ifdef CONFIG_PREEMPT_LAZY
  14939. +void resched_curr_lazy(struct rq *rq)
  14940. +{
  14941. + struct task_struct *curr = rq->curr;
  14942. + int cpu;
  14943. +
  14944. + if (!sched_feat(PREEMPT_LAZY)) {
  14945. + resched_curr(rq);
  14946. + return;
  14947. + }
  14948. +
  14949. + lockdep_assert_held(&rq->lock);
  14950. +
  14951. + if (test_tsk_need_resched(curr))
  14952. + return;
  14953. +
  14954. + if (test_tsk_need_resched_lazy(curr))
  14955. + return;
  14956. +
  14957. + set_tsk_need_resched_lazy(curr);
  14958. +
  14959. + cpu = cpu_of(rq);
  14960. + if (cpu == smp_processor_id())
  14961. + return;
  14962. +
  14963. + /* NEED_RESCHED_LAZY must be visible before we test polling */
  14964. + smp_mb();
  14965. + if (!tsk_is_polling(curr))
  14966. + smp_send_reschedule(cpu);
  14967. +}
  14968. +#endif
  14969. +
  14970. void resched_cpu(int cpu)
  14971. {
  14972. struct rq *rq = cpu_rq(cpu);
  14973. @@ -490,11 +530,14 @@ void resched_cpu(int cpu)
  14974. */
  14975. int get_nohz_timer_target(void)
  14976. {
  14977. - int i, cpu = smp_processor_id();
  14978. + int i, cpu;
  14979. struct sched_domain *sd;
  14980. + preempt_disable_rt();
  14981. + cpu = smp_processor_id();
  14982. +
  14983. if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu))
  14984. - return cpu;
  14985. + goto preempt_en_rt;
  14986. rcu_read_lock();
  14987. for_each_domain(cpu, sd) {
  14988. @@ -510,6 +553,8 @@ int get_nohz_timer_target(void)
  14989. cpu = housekeeping_any_cpu();
  14990. unlock:
  14991. rcu_read_unlock();
  14992. +preempt_en_rt:
  14993. + preempt_enable_rt();
  14994. return cpu;
  14995. }
  14996. /*
  14997. @@ -1051,6 +1096,11 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  14998. lockdep_assert_held(&p->pi_lock);
  14999. + if (__migrate_disabled(p)) {
  15000. + cpumask_copy(&p->cpus_allowed, new_mask);
  15001. + return;
  15002. + }
  15003. +
  15004. queued = task_on_rq_queued(p);
  15005. running = task_current(rq, p);
  15006. @@ -1073,6 +1123,84 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  15007. enqueue_task(rq, p, ENQUEUE_RESTORE);
  15008. }
  15009. +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
  15010. +static DEFINE_MUTEX(sched_down_mutex);
  15011. +static cpumask_t sched_down_cpumask;
  15012. +
  15013. +void tell_sched_cpu_down_begin(int cpu)
  15014. +{
  15015. + mutex_lock(&sched_down_mutex);
  15016. + cpumask_set_cpu(cpu, &sched_down_cpumask);
  15017. + mutex_unlock(&sched_down_mutex);
  15018. +}
  15019. +
  15020. +void tell_sched_cpu_down_done(int cpu)
  15021. +{
  15022. + mutex_lock(&sched_down_mutex);
  15023. + cpumask_clear_cpu(cpu, &sched_down_cpumask);
  15024. + mutex_unlock(&sched_down_mutex);
  15025. +}
  15026. +
  15027. +/**
  15028. + * migrate_me - try to move the current task off this cpu
  15029. + *
  15030. + * Used by the pin_current_cpu() code to try to get tasks
  15031. + * to move off the current CPU as it is going down.
  15032. + * It will only move the task if the task isn't pinned to
  15033. + * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
  15034. + * and the task has to be in a RUNNING state. Otherwise the
  15035. + * movement of the task will wake it up (change its state
  15036. + * to running) when the task did not expect it.
  15037. + *
  15038. + * Returns 1 if it succeeded in moving the current task
  15039. + * 0 otherwise.
  15040. + */
  15041. +int migrate_me(void)
  15042. +{
  15043. + struct task_struct *p = current;
  15044. + struct migration_arg arg;
  15045. + struct cpumask *cpumask;
  15046. + struct cpumask *mask;
  15047. + unsigned long flags;
  15048. + unsigned int dest_cpu;
  15049. + struct rq *rq;
  15050. +
  15051. + /*
  15052. + * We can not migrate tasks bounded to a CPU or tasks not
  15053. + * running. The movement of the task will wake it up.
  15054. + */
  15055. + if (p->flags & PF_NO_SETAFFINITY || p->state)
  15056. + return 0;
  15057. +
  15058. + mutex_lock(&sched_down_mutex);
  15059. + rq = task_rq_lock(p, &flags);
  15060. +
  15061. + cpumask = this_cpu_ptr(&sched_cpumasks);
  15062. + mask = &p->cpus_allowed;
  15063. +
  15064. + cpumask_andnot(cpumask, mask, &sched_down_cpumask);
  15065. +
  15066. + if (!cpumask_weight(cpumask)) {
  15067. + /* It's only on this CPU? */
  15068. + task_rq_unlock(rq, p, &flags);
  15069. + mutex_unlock(&sched_down_mutex);
  15070. + return 0;
  15071. + }
  15072. +
  15073. + dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
  15074. +
  15075. + arg.task = p;
  15076. + arg.dest_cpu = dest_cpu;
  15077. +
  15078. + task_rq_unlock(rq, p, &flags);
  15079. +
  15080. + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
  15081. + tlb_migrate_finish(p->mm);
  15082. + mutex_unlock(&sched_down_mutex);
  15083. +
  15084. + return 1;
  15085. +}
  15086. +
  15087. /*
  15088. * Change a given task's CPU affinity. Migrate the thread to a
  15089. * proper CPU and schedule it away if the CPU it's executing on
  15090. @@ -1112,7 +1240,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
  15091. do_set_cpus_allowed(p, new_mask);
  15092. /* Can the task run on the task's current CPU? If so, we're done */
  15093. - if (cpumask_test_cpu(task_cpu(p), new_mask))
  15094. + if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
  15095. goto out;
  15096. dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
  15097. @@ -1299,6 +1427,18 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
  15098. return ret;
  15099. }
  15100. +static bool check_task_state(struct task_struct *p, long match_state)
  15101. +{
  15102. + bool match = false;
  15103. +
  15104. + raw_spin_lock_irq(&p->pi_lock);
  15105. + if (p->state == match_state || p->saved_state == match_state)
  15106. + match = true;
  15107. + raw_spin_unlock_irq(&p->pi_lock);
  15108. +
  15109. + return match;
  15110. +}
  15111. +
  15112. /*
  15113. * wait_task_inactive - wait for a thread to unschedule.
  15114. *
  15115. @@ -1343,7 +1483,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
  15116. * is actually now running somewhere else!
  15117. */
  15118. while (task_running(rq, p)) {
  15119. - if (match_state && unlikely(p->state != match_state))
  15120. + if (match_state && !check_task_state(p, match_state))
  15121. return 0;
  15122. cpu_relax();
  15123. }
  15124. @@ -1358,7 +1498,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
  15125. running = task_running(rq, p);
  15126. queued = task_on_rq_queued(p);
  15127. ncsw = 0;
  15128. - if (!match_state || p->state == match_state)
  15129. + if (!match_state || p->state == match_state ||
  15130. + p->saved_state == match_state)
  15131. ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
  15132. task_rq_unlock(rq, p, &flags);
  15133. @@ -1515,7 +1656,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
  15134. {
  15135. lockdep_assert_held(&p->pi_lock);
  15136. - if (p->nr_cpus_allowed > 1)
  15137. + if (tsk_nr_cpus_allowed(p) > 1)
  15138. cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
  15139. /*
  15140. @@ -1595,10 +1736,6 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl
  15141. {
  15142. activate_task(rq, p, en_flags);
  15143. p->on_rq = TASK_ON_RQ_QUEUED;
  15144. -
  15145. - /* if a worker is waking up, notify workqueue */
  15146. - if (p->flags & PF_WQ_WORKER)
  15147. - wq_worker_waking_up(p, cpu_of(rq));
  15148. }
  15149. /*
  15150. @@ -1916,8 +2053,27 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
  15151. */
  15152. smp_mb__before_spinlock();
  15153. raw_spin_lock_irqsave(&p->pi_lock, flags);
  15154. - if (!(p->state & state))
  15155. + if (!(p->state & state)) {
  15156. + /*
  15157. + * The task might be running due to a spinlock sleeper
  15158. + * wakeup. Check the saved state and set it to running
  15159. + * if the wakeup condition is true.
  15160. + */
  15161. + if (!(wake_flags & WF_LOCK_SLEEPER)) {
  15162. + if (p->saved_state & state) {
  15163. + p->saved_state = TASK_RUNNING;
  15164. + success = 1;
  15165. + }
  15166. + }
  15167. goto out;
  15168. + }
  15169. +
  15170. + /*
  15171. + * If this is a regular wakeup, then we can unconditionally
  15172. + * clear the saved state of a "lock sleeper".
  15173. + */
  15174. + if (!(wake_flags & WF_LOCK_SLEEPER))
  15175. + p->saved_state = TASK_RUNNING;
  15176. trace_sched_waking(p);
  15177. @@ -1982,53 +2138,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
  15178. }
  15179. /**
  15180. - * try_to_wake_up_local - try to wake up a local task with rq lock held
  15181. - * @p: the thread to be awakened
  15182. - *
  15183. - * Put @p on the run-queue if it's not already there. The caller must
  15184. - * ensure that this_rq() is locked, @p is bound to this_rq() and not
  15185. - * the current task.
  15186. - */
  15187. -static void try_to_wake_up_local(struct task_struct *p)
  15188. -{
  15189. - struct rq *rq = task_rq(p);
  15190. -
  15191. - if (WARN_ON_ONCE(rq != this_rq()) ||
  15192. - WARN_ON_ONCE(p == current))
  15193. - return;
  15194. -
  15195. - lockdep_assert_held(&rq->lock);
  15196. -
  15197. - if (!raw_spin_trylock(&p->pi_lock)) {
  15198. - /*
  15199. - * This is OK, because current is on_cpu, which avoids it being
  15200. - * picked for load-balance and preemption/IRQs are still
  15201. - * disabled avoiding further scheduler activity on it and we've
  15202. - * not yet picked a replacement task.
  15203. - */
  15204. - lockdep_unpin_lock(&rq->lock);
  15205. - raw_spin_unlock(&rq->lock);
  15206. - raw_spin_lock(&p->pi_lock);
  15207. - raw_spin_lock(&rq->lock);
  15208. - lockdep_pin_lock(&rq->lock);
  15209. - }
  15210. -
  15211. - if (!(p->state & TASK_NORMAL))
  15212. - goto out;
  15213. -
  15214. - trace_sched_waking(p);
  15215. -
  15216. - if (!task_on_rq_queued(p))
  15217. - ttwu_activate(rq, p, ENQUEUE_WAKEUP);
  15218. -
  15219. - ttwu_do_wakeup(rq, p, 0);
  15220. - if (schedstat_enabled())
  15221. - ttwu_stat(p, smp_processor_id(), 0);
  15222. -out:
  15223. - raw_spin_unlock(&p->pi_lock);
  15224. -}
  15225. -
  15226. -/**
  15227. * wake_up_process - Wake up a specific process
  15228. * @p: The process to be woken up.
  15229. *
  15230. @@ -2046,6 +2155,18 @@ int wake_up_process(struct task_struct *p)
  15231. }
  15232. EXPORT_SYMBOL(wake_up_process);
  15233. +/**
  15234. + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
  15235. + * @p: The process to be woken up.
  15236. + *
  15237. + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
  15238. + * the nature of the wakeup.
  15239. + */
  15240. +int wake_up_lock_sleeper(struct task_struct *p)
  15241. +{
  15242. + return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
  15243. +}
  15244. +
  15245. int wake_up_state(struct task_struct *p, unsigned int state)
  15246. {
  15247. return try_to_wake_up(p, state, 0);
  15248. @@ -2303,6 +2424,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
  15249. p->on_cpu = 0;
  15250. #endif
  15251. init_task_preempt_count(p);
  15252. +#ifdef CONFIG_HAVE_PREEMPT_LAZY
  15253. + task_thread_info(p)->preempt_lazy_count = 0;
  15254. +#endif
  15255. #ifdef CONFIG_SMP
  15256. plist_node_init(&p->pushable_tasks, MAX_PRIO);
  15257. RB_CLEAR_NODE(&p->pushable_dl_tasks);
  15258. @@ -2627,8 +2751,12 @@ static struct rq *finish_task_switch(struct task_struct *prev)
  15259. finish_arch_post_lock_switch();
  15260. fire_sched_in_preempt_notifiers(current);
  15261. + /*
  15262. + * We use mmdrop_delayed() here so we don't have to do the
  15263. + * full __mmdrop() when we are the last user.
  15264. + */
  15265. if (mm)
  15266. - mmdrop(mm);
  15267. + mmdrop_delayed(mm);
  15268. if (unlikely(prev_state == TASK_DEAD)) {
  15269. if (prev->sched_class->task_dead)
  15270. prev->sched_class->task_dead(prev);
  15271. @@ -3062,6 +3190,77 @@ static inline void schedule_debug(struct task_struct *prev)
  15272. schedstat_inc(this_rq(), sched_count);
  15273. }
  15274. +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
  15275. +
  15276. +void migrate_disable(void)
  15277. +{
  15278. + struct task_struct *p = current;
  15279. +
  15280. + if (in_atomic() || irqs_disabled()) {
  15281. +#ifdef CONFIG_SCHED_DEBUG
  15282. + p->migrate_disable_atomic++;
  15283. +#endif
  15284. + return;
  15285. + }
  15286. +
  15287. +#ifdef CONFIG_SCHED_DEBUG
  15288. + if (unlikely(p->migrate_disable_atomic)) {
  15289. + tracing_off();
  15290. + WARN_ON_ONCE(1);
  15291. + }
  15292. +#endif
  15293. +
  15294. + if (p->migrate_disable) {
  15295. + p->migrate_disable++;
  15296. + return;
  15297. + }
  15298. +
  15299. + preempt_disable();
  15300. + preempt_lazy_disable();
  15301. + pin_current_cpu();
  15302. + p->migrate_disable = 1;
  15303. + preempt_enable();
  15304. +}
  15305. +EXPORT_SYMBOL(migrate_disable);
  15306. +
  15307. +void migrate_enable(void)
  15308. +{
  15309. + struct task_struct *p = current;
  15310. +
  15311. + if (in_atomic() || irqs_disabled()) {
  15312. +#ifdef CONFIG_SCHED_DEBUG
  15313. + p->migrate_disable_atomic--;
  15314. +#endif
  15315. + return;
  15316. + }
  15317. +
  15318. +#ifdef CONFIG_SCHED_DEBUG
  15319. + if (unlikely(p->migrate_disable_atomic)) {
  15320. + tracing_off();
  15321. + WARN_ON_ONCE(1);
  15322. + }
  15323. +#endif
  15324. + WARN_ON_ONCE(p->migrate_disable <= 0);
  15325. +
  15326. + if (p->migrate_disable > 1) {
  15327. + p->migrate_disable--;
  15328. + return;
  15329. + }
  15330. +
  15331. + preempt_disable();
  15332. + /*
  15333. + * Clearing migrate_disable causes tsk_cpus_allowed to
  15334. + * show the tasks original cpu affinity.
  15335. + */
  15336. + p->migrate_disable = 0;
  15337. +
  15338. + unpin_current_cpu();
  15339. + preempt_enable();
  15340. + preempt_lazy_enable();
  15341. +}
  15342. +EXPORT_SYMBOL(migrate_enable);
  15343. +#endif
  15344. +
  15345. /*
  15346. * Pick up the highest-prio task:
  15347. */
  15348. @@ -3188,19 +3387,6 @@ static void __sched notrace __schedule(bool preempt)
  15349. } else {
  15350. deactivate_task(rq, prev, DEQUEUE_SLEEP);
  15351. prev->on_rq = 0;
  15352. -
  15353. - /*
  15354. - * If a worker went to sleep, notify and ask workqueue
  15355. - * whether it wants to wake up a task to maintain
  15356. - * concurrency.
  15357. - */
  15358. - if (prev->flags & PF_WQ_WORKER) {
  15359. - struct task_struct *to_wakeup;
  15360. -
  15361. - to_wakeup = wq_worker_sleeping(prev);
  15362. - if (to_wakeup)
  15363. - try_to_wake_up_local(to_wakeup);
  15364. - }
  15365. }
  15366. switch_count = &prev->nvcsw;
  15367. }
  15368. @@ -3210,6 +3396,7 @@ static void __sched notrace __schedule(bool preempt)
  15369. next = pick_next_task(rq, prev);
  15370. clear_tsk_need_resched(prev);
  15371. + clear_tsk_need_resched_lazy(prev);
  15372. clear_preempt_need_resched();
  15373. rq->clock_skip_update = 0;
  15374. @@ -3231,9 +3418,20 @@ STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
  15375. static inline void sched_submit_work(struct task_struct *tsk)
  15376. {
  15377. - if (!tsk->state || tsk_is_pi_blocked(tsk))
  15378. + if (!tsk->state)
  15379. return;
  15380. /*
  15381. + * If a worker went to sleep, notify and ask workqueue whether
  15382. + * it wants to wake up a task to maintain concurrency.
  15383. + */
  15384. + if (tsk->flags & PF_WQ_WORKER)
  15385. + wq_worker_sleeping(tsk);
  15386. +
  15387. +
  15388. + if (tsk_is_pi_blocked(tsk))
  15389. + return;
  15390. +
  15391. + /*
  15392. * If we are going to sleep and we have plugged IO queued,
  15393. * make sure to submit it to avoid deadlocks.
  15394. */
  15395. @@ -3241,6 +3439,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
  15396. blk_schedule_flush_plug(tsk);
  15397. }
  15398. +static void sched_update_worker(struct task_struct *tsk)
  15399. +{
  15400. + if (tsk->flags & PF_WQ_WORKER)
  15401. + wq_worker_running(tsk);
  15402. +}
  15403. +
  15404. asmlinkage __visible void __sched schedule(void)
  15405. {
  15406. struct task_struct *tsk = current;
  15407. @@ -3251,6 +3455,7 @@ asmlinkage __visible void __sched schedule(void)
  15408. __schedule(false);
  15409. sched_preempt_enable_no_resched();
  15410. } while (need_resched());
  15411. + sched_update_worker(tsk);
  15412. }
  15413. EXPORT_SYMBOL(schedule);
  15414. @@ -3299,6 +3504,30 @@ static void __sched notrace preempt_schedule_common(void)
  15415. } while (need_resched());
  15416. }
  15417. +#ifdef CONFIG_PREEMPT_LAZY
  15418. +/*
  15419. + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
  15420. + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
  15421. + * preempt_lazy_count counter >0.
  15422. + */
  15423. +static __always_inline int preemptible_lazy(void)
  15424. +{
  15425. + if (test_thread_flag(TIF_NEED_RESCHED))
  15426. + return 1;
  15427. + if (current_thread_info()->preempt_lazy_count)
  15428. + return 0;
  15429. + return 1;
  15430. +}
  15431. +
  15432. +#else
  15433. +
  15434. +static inline int preemptible_lazy(void)
  15435. +{
  15436. + return 1;
  15437. +}
  15438. +
  15439. +#endif
  15440. +
  15441. #ifdef CONFIG_PREEMPT
  15442. /*
  15443. * this is the entry point to schedule() from in-kernel preemption
  15444. @@ -3313,6 +3542,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
  15445. */
  15446. if (likely(!preemptible()))
  15447. return;
  15448. + if (!preemptible_lazy())
  15449. + return;
  15450. preempt_schedule_common();
  15451. }
  15452. @@ -3339,6 +3570,8 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
  15453. if (likely(!preemptible()))
  15454. return;
  15455. + if (!preemptible_lazy())
  15456. + return;
  15457. do {
  15458. preempt_disable_notrace();
  15459. @@ -3348,7 +3581,16 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
  15460. * an infinite recursion.
  15461. */
  15462. prev_ctx = exception_enter();
  15463. + /*
  15464. + * The add/subtract must not be traced by the function
  15465. + * tracer. But we still want to account for the
  15466. + * preempt off latency tracer. Since the _notrace versions
  15467. + * of add/subtract skip the accounting for latency tracer
  15468. + * we must force it manually.
  15469. + */
  15470. + start_critical_timings();
  15471. __schedule(true);
  15472. + stop_critical_timings();
  15473. exception_exit(prev_ctx);
  15474. preempt_enable_no_resched_notrace();
  15475. @@ -4693,6 +4935,7 @@ int __cond_resched_lock(spinlock_t *lock)
  15476. }
  15477. EXPORT_SYMBOL(__cond_resched_lock);
  15478. +#ifndef CONFIG_PREEMPT_RT_FULL
  15479. int __sched __cond_resched_softirq(void)
  15480. {
  15481. BUG_ON(!in_softirq());
  15482. @@ -4706,6 +4949,7 @@ int __sched __cond_resched_softirq(void)
  15483. return 0;
  15484. }
  15485. EXPORT_SYMBOL(__cond_resched_softirq);
  15486. +#endif
  15487. /**
  15488. * yield - yield the current processor to other threads.
  15489. @@ -5072,7 +5316,9 @@ void init_idle(struct task_struct *idle, int cpu)
  15490. /* Set the preempt count _outside_ the spinlocks! */
  15491. init_idle_preempt_count(idle, cpu);
  15492. -
  15493. +#ifdef CONFIG_HAVE_PREEMPT_LAZY
  15494. + task_thread_info(idle)->preempt_lazy_count = 0;
  15495. +#endif
  15496. /*
  15497. * The idle tasks have their own, simple scheduling class:
  15498. */
  15499. @@ -5213,6 +5459,8 @@ void sched_setnuma(struct task_struct *p, int nid)
  15500. #endif /* CONFIG_NUMA_BALANCING */
  15501. #ifdef CONFIG_HOTPLUG_CPU
  15502. +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
  15503. +
  15504. /*
  15505. * Ensures that the idle task is using init_mm right before its cpu goes
  15506. * offline.
  15507. @@ -5227,7 +5475,11 @@ void idle_task_exit(void)
  15508. switch_mm(mm, &init_mm, current);
  15509. finish_arch_post_lock_switch();
  15510. }
  15511. - mmdrop(mm);
  15512. + /*
  15513. + * Defer the cleanup to an alive cpu. On RT we can neither
  15514. + * call mmdrop() nor mmdrop_delayed() from here.
  15515. + */
  15516. + per_cpu(idle_last_mm, smp_processor_id()) = mm;
  15517. }
  15518. /*
  15519. @@ -5423,6 +5675,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
  15520. case CPU_DEAD:
  15521. calc_load_migrate(rq);
  15522. + if (per_cpu(idle_last_mm, cpu)) {
  15523. + mmdrop(per_cpu(idle_last_mm, cpu));
  15524. + per_cpu(idle_last_mm, cpu) = NULL;
  15525. + }
  15526. break;
  15527. #endif
  15528. }
  15529. @@ -7405,7 +7661,7 @@ void __init sched_init(void)
  15530. #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  15531. static inline int preempt_count_equals(int preempt_offset)
  15532. {
  15533. - int nested = preempt_count() + rcu_preempt_depth();
  15534. + int nested = preempt_count() + sched_rcu_preempt_depth();
  15535. return (nested == preempt_offset);
  15536. }
  15537. diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
  15538. index 5a75b08cfd85..5be58820465c 100644
  15539. --- a/kernel/sched/cpudeadline.c
  15540. +++ b/kernel/sched/cpudeadline.c
  15541. @@ -103,10 +103,10 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
  15542. const struct sched_dl_entity *dl_se = &p->dl;
  15543. if (later_mask &&
  15544. - cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
  15545. + cpumask_and(later_mask, cp->free_cpus, tsk_cpus_allowed(p))) {
  15546. best_cpu = cpumask_any(later_mask);
  15547. goto out;
  15548. - } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
  15549. + } else if (cpumask_test_cpu(cpudl_maximum(cp), tsk_cpus_allowed(p)) &&
  15550. dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
  15551. best_cpu = cpudl_maximum(cp);
  15552. if (later_mask)
  15553. diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
  15554. index 981fcd7dc394..11e9705bf937 100644
  15555. --- a/kernel/sched/cpupri.c
  15556. +++ b/kernel/sched/cpupri.c
  15557. @@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
  15558. if (skip)
  15559. continue;
  15560. - if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
  15561. + if (cpumask_any_and(tsk_cpus_allowed(p), vec->mask) >= nr_cpu_ids)
  15562. continue;
  15563. if (lowest_mask) {
  15564. - cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
  15565. + cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask);
  15566. /*
  15567. * We have to ensure that we have at least one bit
  15568. diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
  15569. index 686ec8adf952..8099333b5e1d 100644
  15570. --- a/kernel/sched/deadline.c
  15571. +++ b/kernel/sched/deadline.c
  15572. @@ -134,7 +134,7 @@ static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  15573. {
  15574. struct task_struct *p = dl_task_of(dl_se);
  15575. - if (p->nr_cpus_allowed > 1)
  15576. + if (tsk_nr_cpus_allowed(p) > 1)
  15577. dl_rq->dl_nr_migratory++;
  15578. update_dl_migration(dl_rq);
  15579. @@ -144,7 +144,7 @@ static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  15580. {
  15581. struct task_struct *p = dl_task_of(dl_se);
  15582. - if (p->nr_cpus_allowed > 1)
  15583. + if (tsk_nr_cpus_allowed(p) > 1)
  15584. dl_rq->dl_nr_migratory--;
  15585. update_dl_migration(dl_rq);
  15586. @@ -694,6 +694,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
  15587. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  15588. timer->function = dl_task_timer;
  15589. + timer->irqsafe = 1;
  15590. }
  15591. static
  15592. @@ -966,7 +967,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
  15593. enqueue_dl_entity(&p->dl, pi_se, flags);
  15594. - if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
  15595. + if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
  15596. enqueue_pushable_dl_task(rq, p);
  15597. }
  15598. @@ -1040,9 +1041,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
  15599. * try to make it stay here, it might be important.
  15600. */
  15601. if (unlikely(dl_task(curr)) &&
  15602. - (curr->nr_cpus_allowed < 2 ||
  15603. + (tsk_nr_cpus_allowed(curr) < 2 ||
  15604. !dl_entity_preempt(&p->dl, &curr->dl)) &&
  15605. - (p->nr_cpus_allowed > 1)) {
  15606. + (tsk_nr_cpus_allowed(p) > 1)) {
  15607. int target = find_later_rq(p);
  15608. if (target != -1 &&
  15609. @@ -1063,7 +1064,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
  15610. * Current can't be migrated, useless to reschedule,
  15611. * let's hope p can move out.
  15612. */
  15613. - if (rq->curr->nr_cpus_allowed == 1 ||
  15614. + if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
  15615. cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
  15616. return;
  15617. @@ -1071,7 +1072,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
  15618. * p is migratable, so let's not schedule it and
  15619. * see if it is pushed or pulled somewhere else.
  15620. */
  15621. - if (p->nr_cpus_allowed != 1 &&
  15622. + if (tsk_nr_cpus_allowed(p) != 1 &&
  15623. cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
  15624. return;
  15625. @@ -1185,7 +1186,7 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
  15626. {
  15627. update_curr_dl(rq);
  15628. - if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
  15629. + if (on_dl_rq(&p->dl) && tsk_nr_cpus_allowed(p) > 1)
  15630. enqueue_pushable_dl_task(rq, p);
  15631. }
  15632. @@ -1286,7 +1287,7 @@ static int find_later_rq(struct task_struct *task)
  15633. if (unlikely(!later_mask))
  15634. return -1;
  15635. - if (task->nr_cpus_allowed == 1)
  15636. + if (tsk_nr_cpus_allowed(task) == 1)
  15637. return -1;
  15638. /*
  15639. @@ -1392,7 +1393,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
  15640. if (double_lock_balance(rq, later_rq)) {
  15641. if (unlikely(task_rq(task) != rq ||
  15642. !cpumask_test_cpu(later_rq->cpu,
  15643. - &task->cpus_allowed) ||
  15644. + tsk_cpus_allowed(task)) ||
  15645. task_running(rq, task) ||
  15646. !dl_task(task) ||
  15647. !task_on_rq_queued(task))) {
  15648. @@ -1432,7 +1433,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
  15649. BUG_ON(rq->cpu != task_cpu(p));
  15650. BUG_ON(task_current(rq, p));
  15651. - BUG_ON(p->nr_cpus_allowed <= 1);
  15652. + BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
  15653. BUG_ON(!task_on_rq_queued(p));
  15654. BUG_ON(!dl_task(p));
  15655. @@ -1471,7 +1472,7 @@ static int push_dl_task(struct rq *rq)
  15656. */
  15657. if (dl_task(rq->curr) &&
  15658. dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
  15659. - rq->curr->nr_cpus_allowed > 1) {
  15660. + tsk_nr_cpus_allowed(rq->curr) > 1) {
  15661. resched_curr(rq);
  15662. return 0;
  15663. }
  15664. @@ -1618,9 +1619,9 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
  15665. {
  15666. if (!task_running(rq, p) &&
  15667. !test_tsk_need_resched(rq->curr) &&
  15668. - p->nr_cpus_allowed > 1 &&
  15669. + tsk_nr_cpus_allowed(p) > 1 &&
  15670. dl_task(rq->curr) &&
  15671. - (rq->curr->nr_cpus_allowed < 2 ||
  15672. + (tsk_nr_cpus_allowed(rq->curr) < 2 ||
  15673. !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
  15674. push_dl_tasks(rq);
  15675. }
  15676. @@ -1724,7 +1725,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
  15677. if (task_on_rq_queued(p) && rq->curr != p) {
  15678. #ifdef CONFIG_SMP
  15679. - if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
  15680. + if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded)
  15681. queue_push_tasks(rq);
  15682. #else
  15683. if (dl_task(rq->curr))
  15684. diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
  15685. index 4fbc3bd5ff60..5503c1f4290a 100644
  15686. --- a/kernel/sched/debug.c
  15687. +++ b/kernel/sched/debug.c
  15688. @@ -559,6 +559,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
  15689. P(rt_throttled);
  15690. PN(rt_time);
  15691. PN(rt_runtime);
  15692. +#ifdef CONFIG_SMP
  15693. + P(rt_nr_migratory);
  15694. +#endif
  15695. #undef PN
  15696. #undef P
  15697. @@ -954,6 +957,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
  15698. #endif
  15699. P(policy);
  15700. P(prio);
  15701. +#ifdef CONFIG_PREEMPT_RT_FULL
  15702. + P(migrate_disable);
  15703. +#endif
  15704. + P(nr_cpus_allowed);
  15705. #undef PN
  15706. #undef __PN
  15707. #undef P
  15708. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  15709. index eeaf920f46b9..3b66683e5b3f 100644
  15710. --- a/kernel/sched/fair.c
  15711. +++ b/kernel/sched/fair.c
  15712. @@ -3335,7 +3335,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
  15713. ideal_runtime = sched_slice(cfs_rq, curr);
  15714. delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
  15715. if (delta_exec > ideal_runtime) {
  15716. - resched_curr(rq_of(cfs_rq));
  15717. + resched_curr_lazy(rq_of(cfs_rq));
  15718. /*
  15719. * The current task ran long enough, ensure it doesn't get
  15720. * re-elected due to buddy favours.
  15721. @@ -3359,7 +3359,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
  15722. return;
  15723. if (delta > ideal_runtime)
  15724. - resched_curr(rq_of(cfs_rq));
  15725. + resched_curr_lazy(rq_of(cfs_rq));
  15726. }
  15727. static void
  15728. @@ -3504,7 +3504,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
  15729. * validating it and just reschedule.
  15730. */
  15731. if (queued) {
  15732. - resched_curr(rq_of(cfs_rq));
  15733. + resched_curr_lazy(rq_of(cfs_rq));
  15734. return;
  15735. }
  15736. /*
  15737. @@ -3686,7 +3686,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
  15738. * hierarchy can be throttled
  15739. */
  15740. if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
  15741. - resched_curr(rq_of(cfs_rq));
  15742. + resched_curr_lazy(rq_of(cfs_rq));
  15743. }
  15744. static __always_inline
  15745. @@ -4298,7 +4298,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
  15746. if (delta < 0) {
  15747. if (rq->curr == p)
  15748. - resched_curr(rq);
  15749. + resched_curr_lazy(rq);
  15750. return;
  15751. }
  15752. hrtick_start(rq, delta);
  15753. @@ -5438,7 +5438,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
  15754. return;
  15755. preempt:
  15756. - resched_curr(rq);
  15757. + resched_curr_lazy(rq);
  15758. /*
  15759. * Only set the backward buddy when the current task is still
  15760. * on the rq. This can happen when a wakeup gets interleaved
  15761. @@ -8189,7 +8189,7 @@ static void task_fork_fair(struct task_struct *p)
  15762. * 'current' within the tree based on its new key value.
  15763. */
  15764. swap(curr->vruntime, se->vruntime);
  15765. - resched_curr(rq);
  15766. + resched_curr_lazy(rq);
  15767. }
  15768. se->vruntime -= cfs_rq->min_vruntime;
  15769. @@ -8214,7 +8214,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
  15770. */
  15771. if (rq->curr == p) {
  15772. if (p->prio > oldprio)
  15773. - resched_curr(rq);
  15774. + resched_curr_lazy(rq);
  15775. } else
  15776. check_preempt_curr(rq, p, 0);
  15777. }
  15778. diff --git a/kernel/sched/features.h b/kernel/sched/features.h
  15779. index 69631fa46c2f..6d28fcd08872 100644
  15780. --- a/kernel/sched/features.h
  15781. +++ b/kernel/sched/features.h
  15782. @@ -45,11 +45,19 @@ SCHED_FEAT(LB_BIAS, true)
  15783. */
  15784. SCHED_FEAT(NONTASK_CAPACITY, true)
  15785. +#ifdef CONFIG_PREEMPT_RT_FULL
  15786. +SCHED_FEAT(TTWU_QUEUE, false)
  15787. +# ifdef CONFIG_PREEMPT_LAZY
  15788. +SCHED_FEAT(PREEMPT_LAZY, true)
  15789. +# endif
  15790. +#else
  15791. +
  15792. /*
  15793. * Queue remote wakeups on the target CPU and process them
  15794. * using the scheduler IPI. Reduces rq->lock contention/bounces.
  15795. */
  15796. SCHED_FEAT(TTWU_QUEUE, true)
  15797. +#endif
  15798. #ifdef HAVE_RT_PUSH_IPI
  15799. /*
  15800. diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
  15801. index ec4f538d4396..f7b281059ddf 100644
  15802. --- a/kernel/sched/rt.c
  15803. +++ b/kernel/sched/rt.c
  15804. @@ -47,6 +47,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
  15805. hrtimer_init(&rt_b->rt_period_timer,
  15806. CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  15807. + rt_b->rt_period_timer.irqsafe = 1;
  15808. rt_b->rt_period_timer.function = sched_rt_period_timer;
  15809. }
  15810. @@ -101,6 +102,7 @@ void init_rt_rq(struct rt_rq *rt_rq)
  15811. rt_rq->push_cpu = nr_cpu_ids;
  15812. raw_spin_lock_init(&rt_rq->push_lock);
  15813. init_irq_work(&rt_rq->push_work, push_irq_work_func);
  15814. + rt_rq->push_work.flags |= IRQ_WORK_HARD_IRQ;
  15815. #endif
  15816. #endif /* CONFIG_SMP */
  15817. /* We start is dequeued state, because no RT tasks are queued */
  15818. @@ -334,7 +336,7 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  15819. rt_rq = &rq_of_rt_rq(rt_rq)->rt;
  15820. rt_rq->rt_nr_total++;
  15821. - if (p->nr_cpus_allowed > 1)
  15822. + if (tsk_nr_cpus_allowed(p) > 1)
  15823. rt_rq->rt_nr_migratory++;
  15824. update_rt_migration(rt_rq);
  15825. @@ -351,7 +353,7 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  15826. rt_rq = &rq_of_rt_rq(rt_rq)->rt;
  15827. rt_rq->rt_nr_total--;
  15828. - if (p->nr_cpus_allowed > 1)
  15829. + if (tsk_nr_cpus_allowed(p) > 1)
  15830. rt_rq->rt_nr_migratory--;
  15831. update_rt_migration(rt_rq);
  15832. @@ -1324,7 +1326,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
  15833. enqueue_rt_entity(rt_se, flags);
  15834. - if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
  15835. + if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
  15836. enqueue_pushable_task(rq, p);
  15837. }
  15838. @@ -1413,7 +1415,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
  15839. * will have to sort it out.
  15840. */
  15841. if (curr && unlikely(rt_task(curr)) &&
  15842. - (curr->nr_cpus_allowed < 2 ||
  15843. + (tsk_nr_cpus_allowed(curr) < 2 ||
  15844. curr->prio <= p->prio)) {
  15845. int target = find_lowest_rq(p);
  15846. @@ -1437,7 +1439,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
  15847. * Current can't be migrated, useless to reschedule,
  15848. * let's hope p can move out.
  15849. */
  15850. - if (rq->curr->nr_cpus_allowed == 1 ||
  15851. + if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
  15852. !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
  15853. return;
  15854. @@ -1445,7 +1447,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
  15855. * p is migratable, so let's not schedule it and
  15856. * see if it is pushed or pulled somewhere else.
  15857. */
  15858. - if (p->nr_cpus_allowed != 1
  15859. + if (tsk_nr_cpus_allowed(p) != 1
  15860. && cpupri_find(&rq->rd->cpupri, p, NULL))
  15861. return;
  15862. @@ -1579,7 +1581,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
  15863. * The previous task needs to be made eligible for pushing
  15864. * if it is still active
  15865. */
  15866. - if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
  15867. + if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1)
  15868. enqueue_pushable_task(rq, p);
  15869. }
  15870. @@ -1629,7 +1631,7 @@ static int find_lowest_rq(struct task_struct *task)
  15871. if (unlikely(!lowest_mask))
  15872. return -1;
  15873. - if (task->nr_cpus_allowed == 1)
  15874. + if (tsk_nr_cpus_allowed(task) == 1)
  15875. return -1; /* No other targets possible */
  15876. if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
  15877. @@ -1762,7 +1764,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
  15878. BUG_ON(rq->cpu != task_cpu(p));
  15879. BUG_ON(task_current(rq, p));
  15880. - BUG_ON(p->nr_cpus_allowed <= 1);
  15881. + BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
  15882. BUG_ON(!task_on_rq_queued(p));
  15883. BUG_ON(!rt_task(p));
  15884. @@ -2122,9 +2124,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
  15885. {
  15886. if (!task_running(rq, p) &&
  15887. !test_tsk_need_resched(rq->curr) &&
  15888. - p->nr_cpus_allowed > 1 &&
  15889. + tsk_nr_cpus_allowed(p) > 1 &&
  15890. (dl_task(rq->curr) || rt_task(rq->curr)) &&
  15891. - (rq->curr->nr_cpus_allowed < 2 ||
  15892. + (tsk_nr_cpus_allowed(rq->curr) < 2 ||
  15893. rq->curr->prio <= p->prio))
  15894. push_rt_tasks(rq);
  15895. }
  15896. @@ -2197,7 +2199,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
  15897. */
  15898. if (task_on_rq_queued(p) && rq->curr != p) {
  15899. #ifdef CONFIG_SMP
  15900. - if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
  15901. + if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded)
  15902. queue_push_tasks(rq);
  15903. #else
  15904. if (p->prio < rq->curr->prio)
  15905. diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
  15906. index ec2e8d23527e..93c999cbe58f 100644
  15907. --- a/kernel/sched/sched.h
  15908. +++ b/kernel/sched/sched.h
  15909. @@ -1128,6 +1128,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
  15910. #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
  15911. #define WF_FORK 0x02 /* child wakeup after fork */
  15912. #define WF_MIGRATED 0x4 /* internal use, task got migrated */
  15913. +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */
  15914. /*
  15915. * To aid in avoiding the subversion of "niceness" due to uneven distribution
  15916. @@ -1303,6 +1304,15 @@ extern void init_sched_fair_class(void);
  15917. extern void resched_curr(struct rq *rq);
  15918. extern void resched_cpu(int cpu);
  15919. +#ifdef CONFIG_PREEMPT_LAZY
  15920. +extern void resched_curr_lazy(struct rq *rq);
  15921. +#else
  15922. +static inline void resched_curr_lazy(struct rq *rq)
  15923. +{
  15924. + resched_curr(rq);
  15925. +}
  15926. +#endif
  15927. +
  15928. extern struct rt_bandwidth def_rt_bandwidth;
  15929. extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
  15930. diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
  15931. index 82f0dff90030..e2e224c59c0a 100644
  15932. --- a/kernel/sched/swait.c
  15933. +++ b/kernel/sched/swait.c
  15934. @@ -1,5 +1,6 @@
  15935. #include <linux/sched.h>
  15936. #include <linux/swait.h>
  15937. +#include <linux/suspend.h>
  15938. void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
  15939. struct lock_class_key *key)
  15940. @@ -29,6 +30,25 @@ void swake_up_locked(struct swait_queue_head *q)
  15941. }
  15942. EXPORT_SYMBOL(swake_up_locked);
  15943. +void swake_up_all_locked(struct swait_queue_head *q)
  15944. +{
  15945. + struct swait_queue *curr;
  15946. + int wakes = 0;
  15947. +
  15948. + while (!list_empty(&q->task_list)) {
  15949. +
  15950. + curr = list_first_entry(&q->task_list, typeof(*curr),
  15951. + task_list);
  15952. + wake_up_process(curr->task);
  15953. + list_del_init(&curr->task_list);
  15954. + wakes++;
  15955. + }
  15956. + if (pm_in_action)
  15957. + return;
  15958. + WARN(wakes > 2, "complete_all() with %d waiters\n", wakes);
  15959. +}
  15960. +EXPORT_SYMBOL(swake_up_all_locked);
  15961. +
  15962. void swake_up(struct swait_queue_head *q)
  15963. {
  15964. unsigned long flags;
  15965. diff --git a/kernel/sched/swork.c b/kernel/sched/swork.c
  15966. new file mode 100644
  15967. index 000000000000..1950f40ca725
  15968. --- /dev/null
  15969. +++ b/kernel/sched/swork.c
  15970. @@ -0,0 +1,173 @@
  15971. +/*
  15972. + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de
  15973. + *
  15974. + * Provides a framework for enqueuing callbacks from irq context
  15975. + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context.
  15976. + */
  15977. +
  15978. +#include <linux/swait.h>
  15979. +#include <linux/swork.h>
  15980. +#include <linux/kthread.h>
  15981. +#include <linux/slab.h>
  15982. +#include <linux/spinlock.h>
  15983. +#include <linux/export.h>
  15984. +
  15985. +#define SWORK_EVENT_PENDING (1 << 0)
  15986. +
  15987. +static DEFINE_MUTEX(worker_mutex);
  15988. +static struct sworker *glob_worker;
  15989. +
  15990. +struct sworker {
  15991. + struct list_head events;
  15992. + struct swait_queue_head wq;
  15993. +
  15994. + raw_spinlock_t lock;
  15995. +
  15996. + struct task_struct *task;
  15997. + int refs;
  15998. +};
  15999. +
  16000. +static bool swork_readable(struct sworker *worker)
  16001. +{
  16002. + bool r;
  16003. +
  16004. + if (kthread_should_stop())
  16005. + return true;
  16006. +
  16007. + raw_spin_lock_irq(&worker->lock);
  16008. + r = !list_empty(&worker->events);
  16009. + raw_spin_unlock_irq(&worker->lock);
  16010. +
  16011. + return r;
  16012. +}
  16013. +
  16014. +static int swork_kthread(void *arg)
  16015. +{
  16016. + struct sworker *worker = arg;
  16017. +
  16018. + for (;;) {
  16019. + swait_event_interruptible(worker->wq,
  16020. + swork_readable(worker));
  16021. + if (kthread_should_stop())
  16022. + break;
  16023. +
  16024. + raw_spin_lock_irq(&worker->lock);
  16025. + while (!list_empty(&worker->events)) {
  16026. + struct swork_event *sev;
  16027. +
  16028. + sev = list_first_entry(&worker->events,
  16029. + struct swork_event, item);
  16030. + list_del(&sev->item);
  16031. + raw_spin_unlock_irq(&worker->lock);
  16032. +
  16033. + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING,
  16034. + &sev->flags));
  16035. + sev->func(sev);
  16036. + raw_spin_lock_irq(&worker->lock);
  16037. + }
  16038. + raw_spin_unlock_irq(&worker->lock);
  16039. + }
  16040. + return 0;
  16041. +}
  16042. +
  16043. +static struct sworker *swork_create(void)
  16044. +{
  16045. + struct sworker *worker;
  16046. +
  16047. + worker = kzalloc(sizeof(*worker), GFP_KERNEL);
  16048. + if (!worker)
  16049. + return ERR_PTR(-ENOMEM);
  16050. +
  16051. + INIT_LIST_HEAD(&worker->events);
  16052. + raw_spin_lock_init(&worker->lock);
  16053. + init_swait_queue_head(&worker->wq);
  16054. +
  16055. + worker->task = kthread_run(swork_kthread, worker, "kswork");
  16056. + if (IS_ERR(worker->task)) {
  16057. + kfree(worker);
  16058. + return ERR_PTR(-ENOMEM);
  16059. + }
  16060. +
  16061. + return worker;
  16062. +}
  16063. +
  16064. +static void swork_destroy(struct sworker *worker)
  16065. +{
  16066. + kthread_stop(worker->task);
  16067. +
  16068. + WARN_ON(!list_empty(&worker->events));
  16069. + kfree(worker);
  16070. +}
  16071. +
  16072. +/**
  16073. + * swork_queue - queue swork
  16074. + *
  16075. + * Returns %false if @work was already on a queue, %true otherwise.
  16076. + *
  16077. + * The work is queued and processed on a random CPU
  16078. + */
  16079. +bool swork_queue(struct swork_event *sev)
  16080. +{
  16081. + unsigned long flags;
  16082. +
  16083. + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags))
  16084. + return false;
  16085. +
  16086. + raw_spin_lock_irqsave(&glob_worker->lock, flags);
  16087. + list_add_tail(&sev->item, &glob_worker->events);
  16088. + raw_spin_unlock_irqrestore(&glob_worker->lock, flags);
  16089. +
  16090. + swake_up(&glob_worker->wq);
  16091. + return true;
  16092. +}
  16093. +EXPORT_SYMBOL_GPL(swork_queue);
  16094. +
  16095. +/**
  16096. + * swork_get - get an instance of the sworker
  16097. + *
  16098. + * Returns an negative error code if the initialization if the worker did not
  16099. + * work, %0 otherwise.
  16100. + *
  16101. + */
  16102. +int swork_get(void)
  16103. +{
  16104. + struct sworker *worker;
  16105. +
  16106. + mutex_lock(&worker_mutex);
  16107. + if (!glob_worker) {
  16108. + worker = swork_create();
  16109. + if (IS_ERR(worker)) {
  16110. + mutex_unlock(&worker_mutex);
  16111. + return -ENOMEM;
  16112. + }
  16113. +
  16114. + glob_worker = worker;
  16115. + }
  16116. +
  16117. + glob_worker->refs++;
  16118. + mutex_unlock(&worker_mutex);
  16119. +
  16120. + return 0;
  16121. +}
  16122. +EXPORT_SYMBOL_GPL(swork_get);
  16123. +
  16124. +/**
  16125. + * swork_put - puts an instance of the sworker
  16126. + *
  16127. + * Will destroy the sworker thread. This function must not be called until all
  16128. + * queued events have been completed.
  16129. + */
  16130. +void swork_put(void)
  16131. +{
  16132. + mutex_lock(&worker_mutex);
  16133. +
  16134. + glob_worker->refs--;
  16135. + if (glob_worker->refs > 0)
  16136. + goto out;
  16137. +
  16138. + swork_destroy(glob_worker);
  16139. + glob_worker = NULL;
  16140. +out:
  16141. + mutex_unlock(&worker_mutex);
  16142. +}
  16143. +EXPORT_SYMBOL_GPL(swork_put);
  16144. diff --git a/kernel/signal.c b/kernel/signal.c
  16145. index aa9bf00749c1..0a625c7b8792 100644
  16146. --- a/kernel/signal.c
  16147. +++ b/kernel/signal.c
  16148. @@ -14,6 +14,7 @@
  16149. #include <linux/export.h>
  16150. #include <linux/init.h>
  16151. #include <linux/sched.h>
  16152. +#include <linux/sched/rt.h>
  16153. #include <linux/fs.h>
  16154. #include <linux/tty.h>
  16155. #include <linux/binfmts.h>
  16156. @@ -352,13 +353,30 @@ static bool task_participate_group_stop(struct task_struct *task)
  16157. return false;
  16158. }
  16159. +static inline struct sigqueue *get_task_cache(struct task_struct *t)
  16160. +{
  16161. + struct sigqueue *q = t->sigqueue_cache;
  16162. +
  16163. + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
  16164. + return NULL;
  16165. + return q;
  16166. +}
  16167. +
  16168. +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
  16169. +{
  16170. + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
  16171. + return 0;
  16172. + return 1;
  16173. +}
  16174. +
  16175. /*
  16176. * allocate a new signal queue record
  16177. * - this may be called without locks if and only if t == current, otherwise an
  16178. * appropriate lock must be held to stop the target task from exiting
  16179. */
  16180. static struct sigqueue *
  16181. -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
  16182. +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
  16183. + int override_rlimit, int fromslab)
  16184. {
  16185. struct sigqueue *q = NULL;
  16186. struct user_struct *user;
  16187. @@ -375,7 +393,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
  16188. if (override_rlimit ||
  16189. atomic_read(&user->sigpending) <=
  16190. task_rlimit(t, RLIMIT_SIGPENDING)) {
  16191. - q = kmem_cache_alloc(sigqueue_cachep, flags);
  16192. + if (!fromslab)
  16193. + q = get_task_cache(t);
  16194. + if (!q)
  16195. + q = kmem_cache_alloc(sigqueue_cachep, flags);
  16196. } else {
  16197. print_dropped_signal(sig);
  16198. }
  16199. @@ -392,6 +413,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
  16200. return q;
  16201. }
  16202. +static struct sigqueue *
  16203. +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
  16204. + int override_rlimit)
  16205. +{
  16206. + return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
  16207. +}
  16208. +
  16209. static void __sigqueue_free(struct sigqueue *q)
  16210. {
  16211. if (q->flags & SIGQUEUE_PREALLOC)
  16212. @@ -401,6 +429,21 @@ static void __sigqueue_free(struct sigqueue *q)
  16213. kmem_cache_free(sigqueue_cachep, q);
  16214. }
  16215. +static void sigqueue_free_current(struct sigqueue *q)
  16216. +{
  16217. + struct user_struct *up;
  16218. +
  16219. + if (q->flags & SIGQUEUE_PREALLOC)
  16220. + return;
  16221. +
  16222. + up = q->user;
  16223. + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
  16224. + atomic_dec(&up->sigpending);
  16225. + free_uid(up);
  16226. + } else
  16227. + __sigqueue_free(q);
  16228. +}
  16229. +
  16230. void flush_sigqueue(struct sigpending *queue)
  16231. {
  16232. struct sigqueue *q;
  16233. @@ -414,6 +457,21 @@ void flush_sigqueue(struct sigpending *queue)
  16234. }
  16235. /*
  16236. + * Called from __exit_signal. Flush tsk->pending and
  16237. + * tsk->sigqueue_cache
  16238. + */
  16239. +void flush_task_sigqueue(struct task_struct *tsk)
  16240. +{
  16241. + struct sigqueue *q;
  16242. +
  16243. + flush_sigqueue(&tsk->pending);
  16244. +
  16245. + q = get_task_cache(tsk);
  16246. + if (q)
  16247. + kmem_cache_free(sigqueue_cachep, q);
  16248. +}
  16249. +
  16250. +/*
  16251. * Flush all pending signals for this kthread.
  16252. */
  16253. void flush_signals(struct task_struct *t)
  16254. @@ -525,7 +583,7 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
  16255. still_pending:
  16256. list_del_init(&first->list);
  16257. copy_siginfo(info, &first->info);
  16258. - __sigqueue_free(first);
  16259. + sigqueue_free_current(first);
  16260. } else {
  16261. /*
  16262. * Ok, it wasn't in the queue. This must be
  16263. @@ -560,6 +618,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
  16264. {
  16265. int signr;
  16266. + WARN_ON_ONCE(tsk != current);
  16267. +
  16268. /* We only dequeue private signals from ourselves, we don't let
  16269. * signalfd steal them
  16270. */
  16271. @@ -1156,8 +1216,8 @@ int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
  16272. * We don't want to have recursive SIGSEGV's etc, for example,
  16273. * that is why we also clear SIGNAL_UNKILLABLE.
  16274. */
  16275. -int
  16276. -force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  16277. +static int
  16278. +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  16279. {
  16280. unsigned long int flags;
  16281. int ret, blocked, ignored;
  16282. @@ -1182,6 +1242,39 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  16283. return ret;
  16284. }
  16285. +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  16286. +{
  16287. +/*
  16288. + * On some archs, PREEMPT_RT has to delay sending a signal from a trap
  16289. + * since it can not enable preemption, and the signal code's spin_locks
  16290. + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
  16291. + * send the signal on exit of the trap.
  16292. + */
  16293. +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
  16294. + if (in_atomic()) {
  16295. + if (WARN_ON_ONCE(t != current))
  16296. + return 0;
  16297. + if (WARN_ON_ONCE(t->forced_info.si_signo))
  16298. + return 0;
  16299. +
  16300. + if (is_si_special(info)) {
  16301. + WARN_ON_ONCE(info != SEND_SIG_PRIV);
  16302. + t->forced_info.si_signo = sig;
  16303. + t->forced_info.si_errno = 0;
  16304. + t->forced_info.si_code = SI_KERNEL;
  16305. + t->forced_info.si_pid = 0;
  16306. + t->forced_info.si_uid = 0;
  16307. + } else {
  16308. + t->forced_info = *info;
  16309. + }
  16310. +
  16311. + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
  16312. + return 0;
  16313. + }
  16314. +#endif
  16315. + return do_force_sig_info(sig, info, t);
  16316. +}
  16317. +
  16318. /*
  16319. * Nuke all other threads in the group.
  16320. */
  16321. @@ -1216,12 +1309,12 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
  16322. * Disable interrupts early to avoid deadlocks.
  16323. * See rcu_read_unlock() comment header for details.
  16324. */
  16325. - local_irq_save(*flags);
  16326. + local_irq_save_nort(*flags);
  16327. rcu_read_lock();
  16328. sighand = rcu_dereference(tsk->sighand);
  16329. if (unlikely(sighand == NULL)) {
  16330. rcu_read_unlock();
  16331. - local_irq_restore(*flags);
  16332. + local_irq_restore_nort(*flags);
  16333. break;
  16334. }
  16335. /*
  16336. @@ -1242,7 +1335,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
  16337. }
  16338. spin_unlock(&sighand->siglock);
  16339. rcu_read_unlock();
  16340. - local_irq_restore(*flags);
  16341. + local_irq_restore_nort(*flags);
  16342. }
  16343. return sighand;
  16344. @@ -1485,7 +1578,8 @@ EXPORT_SYMBOL(kill_pid);
  16345. */
  16346. struct sigqueue *sigqueue_alloc(void)
  16347. {
  16348. - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
  16349. + /* Preallocated sigqueue objects always from the slabcache ! */
  16350. + struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
  16351. if (q)
  16352. q->flags |= SIGQUEUE_PREALLOC;
  16353. @@ -1846,15 +1940,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
  16354. if (gstop_done && ptrace_reparented(current))
  16355. do_notify_parent_cldstop(current, false, why);
  16356. - /*
  16357. - * Don't want to allow preemption here, because
  16358. - * sys_ptrace() needs this task to be inactive.
  16359. - *
  16360. - * XXX: implement read_unlock_no_resched().
  16361. - */
  16362. - preempt_disable();
  16363. read_unlock(&tasklist_lock);
  16364. - preempt_enable_no_resched();
  16365. freezable_schedule();
  16366. } else {
  16367. /*
  16368. @@ -2751,23 +2837,18 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
  16369. * @ts: upper bound on process time suspension
  16370. */
  16371. int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
  16372. - const struct timespec *ts)
  16373. + const struct timespec *ts)
  16374. {
  16375. + ktime_t *to = NULL, timeout = { .tv64 = KTIME_MAX };
  16376. struct task_struct *tsk = current;
  16377. - long timeout = MAX_SCHEDULE_TIMEOUT;
  16378. sigset_t mask = *which;
  16379. - int sig;
  16380. + int sig, ret = 0;
  16381. if (ts) {
  16382. if (!timespec_valid(ts))
  16383. return -EINVAL;
  16384. - timeout = timespec_to_jiffies(ts);
  16385. - /*
  16386. - * We can be close to the next tick, add another one
  16387. - * to ensure we will wait at least the time asked for.
  16388. - */
  16389. - if (ts->tv_sec || ts->tv_nsec)
  16390. - timeout++;
  16391. + timeout = timespec_to_ktime(*ts);
  16392. + to = &timeout;
  16393. }
  16394. /*
  16395. @@ -2778,7 +2859,7 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
  16396. spin_lock_irq(&tsk->sighand->siglock);
  16397. sig = dequeue_signal(tsk, &mask, info);
  16398. - if (!sig && timeout) {
  16399. + if (!sig && timeout.tv64) {
  16400. /*
  16401. * None ready, temporarily unblock those we're interested
  16402. * while we are sleeping in so that we'll be awakened when
  16403. @@ -2790,8 +2871,9 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
  16404. recalc_sigpending();
  16405. spin_unlock_irq(&tsk->sighand->siglock);
  16406. - timeout = freezable_schedule_timeout_interruptible(timeout);
  16407. -
  16408. + __set_current_state(TASK_INTERRUPTIBLE);
  16409. + ret = freezable_schedule_hrtimeout_range(to, tsk->timer_slack_ns,
  16410. + HRTIMER_MODE_REL);
  16411. spin_lock_irq(&tsk->sighand->siglock);
  16412. __set_task_blocked(tsk, &tsk->real_blocked);
  16413. sigemptyset(&tsk->real_blocked);
  16414. @@ -2801,7 +2883,7 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
  16415. if (sig)
  16416. return sig;
  16417. - return timeout ? -EINTR : -EAGAIN;
  16418. + return ret ? -EINTR : -EAGAIN;
  16419. }
  16420. /**
  16421. diff --git a/kernel/softirq.c b/kernel/softirq.c
  16422. index 17caf4b63342..a602b7152de7 100644
  16423. --- a/kernel/softirq.c
  16424. +++ b/kernel/softirq.c
  16425. @@ -21,10 +21,12 @@
  16426. #include <linux/freezer.h>
  16427. #include <linux/kthread.h>
  16428. #include <linux/rcupdate.h>
  16429. +#include <linux/delay.h>
  16430. #include <linux/ftrace.h>
  16431. #include <linux/smp.h>
  16432. #include <linux/smpboot.h>
  16433. #include <linux/tick.h>
  16434. +#include <linux/locallock.h>
  16435. #include <linux/irq.h>
  16436. #define CREATE_TRACE_POINTS
  16437. @@ -56,12 +58,108 @@ EXPORT_SYMBOL(irq_stat);
  16438. static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
  16439. DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
  16440. +#ifdef CONFIG_PREEMPT_RT_FULL
  16441. +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ))
  16442. +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd);
  16443. +#endif
  16444. const char * const softirq_to_name[NR_SOFTIRQS] = {
  16445. "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
  16446. "TASKLET", "SCHED", "HRTIMER", "RCU"
  16447. };
  16448. +#ifdef CONFIG_NO_HZ_COMMON
  16449. +# ifdef CONFIG_PREEMPT_RT_FULL
  16450. +
  16451. +struct softirq_runner {
  16452. + struct task_struct *runner[NR_SOFTIRQS];
  16453. +};
  16454. +
  16455. +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners);
  16456. +
  16457. +static inline void softirq_set_runner(unsigned int sirq)
  16458. +{
  16459. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  16460. +
  16461. + sr->runner[sirq] = current;
  16462. +}
  16463. +
  16464. +static inline void softirq_clr_runner(unsigned int sirq)
  16465. +{
  16466. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  16467. +
  16468. + sr->runner[sirq] = NULL;
  16469. +}
  16470. +
  16471. +/*
  16472. + * On preempt-rt a softirq running context might be blocked on a
  16473. + * lock. There might be no other runnable task on this CPU because the
  16474. + * lock owner runs on some other CPU. So we have to go into idle with
  16475. + * the pending bit set. Therefor we need to check this otherwise we
  16476. + * warn about false positives which confuses users and defeats the
  16477. + * whole purpose of this test.
  16478. + *
  16479. + * This code is called with interrupts disabled.
  16480. + */
  16481. +void softirq_check_pending_idle(void)
  16482. +{
  16483. + static int rate_limit;
  16484. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  16485. + u32 warnpending;
  16486. + int i;
  16487. +
  16488. + if (rate_limit >= 10)
  16489. + return;
  16490. +
  16491. + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
  16492. + for (i = 0; i < NR_SOFTIRQS; i++) {
  16493. + struct task_struct *tsk = sr->runner[i];
  16494. +
  16495. + /*
  16496. + * The wakeup code in rtmutex.c wakes up the task
  16497. + * _before_ it sets pi_blocked_on to NULL under
  16498. + * tsk->pi_lock. So we need to check for both: state
  16499. + * and pi_blocked_on.
  16500. + */
  16501. + if (tsk) {
  16502. + raw_spin_lock(&tsk->pi_lock);
  16503. + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
  16504. + /* Clear all bits pending in that task */
  16505. + warnpending &= ~(tsk->softirqs_raised);
  16506. + warnpending &= ~(1 << i);
  16507. + }
  16508. + raw_spin_unlock(&tsk->pi_lock);
  16509. + }
  16510. + }
  16511. +
  16512. + if (warnpending) {
  16513. + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
  16514. + warnpending);
  16515. + rate_limit++;
  16516. + }
  16517. +}
  16518. +# else
  16519. +/*
  16520. + * On !PREEMPT_RT we just printk rate limited:
  16521. + */
  16522. +void softirq_check_pending_idle(void)
  16523. +{
  16524. + static int rate_limit;
  16525. +
  16526. + if (rate_limit < 10 &&
  16527. + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  16528. + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
  16529. + local_softirq_pending());
  16530. + rate_limit++;
  16531. + }
  16532. +}
  16533. +# endif
  16534. +
  16535. +#else /* !CONFIG_NO_HZ_COMMON */
  16536. +static inline void softirq_set_runner(unsigned int sirq) { }
  16537. +static inline void softirq_clr_runner(unsigned int sirq) { }
  16538. +#endif
  16539. +
  16540. /*
  16541. * we cannot loop indefinitely here to avoid userspace starvation,
  16542. * but we also don't want to introduce a worst case 1/HZ latency
  16543. @@ -77,6 +175,79 @@ static void wakeup_softirqd(void)
  16544. wake_up_process(tsk);
  16545. }
  16546. +#ifdef CONFIG_PREEMPT_RT_FULL
  16547. +static void wakeup_timer_softirqd(void)
  16548. +{
  16549. + /* Interrupts are disabled: no need to stop preemption */
  16550. + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd);
  16551. +
  16552. + if (tsk && tsk->state != TASK_RUNNING)
  16553. + wake_up_process(tsk);
  16554. +}
  16555. +#endif
  16556. +
  16557. +static void handle_softirq(unsigned int vec_nr)
  16558. +{
  16559. + struct softirq_action *h = softirq_vec + vec_nr;
  16560. + int prev_count;
  16561. +
  16562. + prev_count = preempt_count();
  16563. +
  16564. + kstat_incr_softirqs_this_cpu(vec_nr);
  16565. +
  16566. + trace_softirq_entry(vec_nr);
  16567. + h->action(h);
  16568. + trace_softirq_exit(vec_nr);
  16569. + if (unlikely(prev_count != preempt_count())) {
  16570. + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
  16571. + vec_nr, softirq_to_name[vec_nr], h->action,
  16572. + prev_count, preempt_count());
  16573. + preempt_count_set(prev_count);
  16574. + }
  16575. +}
  16576. +
  16577. +#ifndef CONFIG_PREEMPT_RT_FULL
  16578. +static inline int ksoftirqd_softirq_pending(void)
  16579. +{
  16580. + return local_softirq_pending();
  16581. +}
  16582. +
  16583. +static void handle_pending_softirqs(u32 pending)
  16584. +{
  16585. + struct softirq_action *h = softirq_vec;
  16586. + int softirq_bit;
  16587. +
  16588. + local_irq_enable();
  16589. +
  16590. + h = softirq_vec;
  16591. +
  16592. + while ((softirq_bit = ffs(pending))) {
  16593. + unsigned int vec_nr;
  16594. +
  16595. + h += softirq_bit - 1;
  16596. + vec_nr = h - softirq_vec;
  16597. + handle_softirq(vec_nr);
  16598. +
  16599. + h++;
  16600. + pending >>= softirq_bit;
  16601. + }
  16602. +
  16603. + rcu_bh_qs();
  16604. + local_irq_disable();
  16605. +}
  16606. +
  16607. +static void run_ksoftirqd(unsigned int cpu)
  16608. +{
  16609. + local_irq_disable();
  16610. + if (ksoftirqd_softirq_pending()) {
  16611. + __do_softirq();
  16612. + local_irq_enable();
  16613. + cond_resched_rcu_qs();
  16614. + return;
  16615. + }
  16616. + local_irq_enable();
  16617. +}
  16618. +
  16619. /*
  16620. * preempt_count and SOFTIRQ_OFFSET usage:
  16621. * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
  16622. @@ -232,10 +403,8 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
  16623. unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
  16624. unsigned long old_flags = current->flags;
  16625. int max_restart = MAX_SOFTIRQ_RESTART;
  16626. - struct softirq_action *h;
  16627. bool in_hardirq;
  16628. __u32 pending;
  16629. - int softirq_bit;
  16630. /*
  16631. * Mask out PF_MEMALLOC s current task context is borrowed for the
  16632. @@ -254,36 +423,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
  16633. /* Reset the pending bitmask before enabling irqs */
  16634. set_softirq_pending(0);
  16635. - local_irq_enable();
  16636. -
  16637. - h = softirq_vec;
  16638. -
  16639. - while ((softirq_bit = ffs(pending))) {
  16640. - unsigned int vec_nr;
  16641. - int prev_count;
  16642. -
  16643. - h += softirq_bit - 1;
  16644. -
  16645. - vec_nr = h - softirq_vec;
  16646. - prev_count = preempt_count();
  16647. -
  16648. - kstat_incr_softirqs_this_cpu(vec_nr);
  16649. -
  16650. - trace_softirq_entry(vec_nr);
  16651. - h->action(h);
  16652. - trace_softirq_exit(vec_nr);
  16653. - if (unlikely(prev_count != preempt_count())) {
  16654. - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
  16655. - vec_nr, softirq_to_name[vec_nr], h->action,
  16656. - prev_count, preempt_count());
  16657. - preempt_count_set(prev_count);
  16658. - }
  16659. - h++;
  16660. - pending >>= softirq_bit;
  16661. - }
  16662. -
  16663. - rcu_bh_qs();
  16664. - local_irq_disable();
  16665. + handle_pending_softirqs(pending);
  16666. pending = local_softirq_pending();
  16667. if (pending) {
  16668. @@ -320,6 +460,310 @@ asmlinkage __visible void do_softirq(void)
  16669. }
  16670. /*
  16671. + * This function must run with irqs disabled!
  16672. + */
  16673. +void raise_softirq_irqoff(unsigned int nr)
  16674. +{
  16675. + __raise_softirq_irqoff(nr);
  16676. +
  16677. + /*
  16678. + * If we're in an interrupt or softirq, we're done
  16679. + * (this also catches softirq-disabled code). We will
  16680. + * actually run the softirq once we return from
  16681. + * the irq or softirq.
  16682. + *
  16683. + * Otherwise we wake up ksoftirqd to make sure we
  16684. + * schedule the softirq soon.
  16685. + */
  16686. + if (!in_interrupt())
  16687. + wakeup_softirqd();
  16688. +}
  16689. +
  16690. +void __raise_softirq_irqoff(unsigned int nr)
  16691. +{
  16692. + trace_softirq_raise(nr);
  16693. + or_softirq_pending(1UL << nr);
  16694. +}
  16695. +
  16696. +static inline void local_bh_disable_nort(void) { local_bh_disable(); }
  16697. +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
  16698. +static void ksoftirqd_set_sched_params(unsigned int cpu) { }
  16699. +
  16700. +#else /* !PREEMPT_RT_FULL */
  16701. +
  16702. +/*
  16703. + * On RT we serialize softirq execution with a cpu local lock per softirq
  16704. + */
  16705. +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks);
  16706. +
  16707. +void __init softirq_early_init(void)
  16708. +{
  16709. + int i;
  16710. +
  16711. + for (i = 0; i < NR_SOFTIRQS; i++)
  16712. + local_irq_lock_init(local_softirq_locks[i]);
  16713. +}
  16714. +
  16715. +static void lock_softirq(int which)
  16716. +{
  16717. + local_lock(local_softirq_locks[which]);
  16718. +}
  16719. +
  16720. +static void unlock_softirq(int which)
  16721. +{
  16722. + local_unlock(local_softirq_locks[which]);
  16723. +}
  16724. +
  16725. +static void do_single_softirq(int which)
  16726. +{
  16727. + unsigned long old_flags = current->flags;
  16728. +
  16729. + current->flags &= ~PF_MEMALLOC;
  16730. + vtime_account_irq_enter(current);
  16731. + current->flags |= PF_IN_SOFTIRQ;
  16732. + lockdep_softirq_enter();
  16733. + local_irq_enable();
  16734. + handle_softirq(which);
  16735. + local_irq_disable();
  16736. + lockdep_softirq_exit();
  16737. + current->flags &= ~PF_IN_SOFTIRQ;
  16738. + vtime_account_irq_enter(current);
  16739. + tsk_restore_flags(current, old_flags, PF_MEMALLOC);
  16740. +}
  16741. +
  16742. +/*
  16743. + * Called with interrupts disabled. Process softirqs which were raised
  16744. + * in current context (or on behalf of ksoftirqd).
  16745. + */
  16746. +static void do_current_softirqs(void)
  16747. +{
  16748. + while (current->softirqs_raised) {
  16749. + int i = __ffs(current->softirqs_raised);
  16750. + unsigned int pending, mask = (1U << i);
  16751. +
  16752. + current->softirqs_raised &= ~mask;
  16753. + local_irq_enable();
  16754. +
  16755. + /*
  16756. + * If the lock is contended, we boost the owner to
  16757. + * process the softirq or leave the critical section
  16758. + * now.
  16759. + */
  16760. + lock_softirq(i);
  16761. + local_irq_disable();
  16762. + softirq_set_runner(i);
  16763. + /*
  16764. + * Check with the local_softirq_pending() bits,
  16765. + * whether we need to process this still or if someone
  16766. + * else took care of it.
  16767. + */
  16768. + pending = local_softirq_pending();
  16769. + if (pending & mask) {
  16770. + set_softirq_pending(pending & ~mask);
  16771. + do_single_softirq(i);
  16772. + }
  16773. + softirq_clr_runner(i);
  16774. + WARN_ON(current->softirq_nestcnt != 1);
  16775. + local_irq_enable();
  16776. + unlock_softirq(i);
  16777. + local_irq_disable();
  16778. + }
  16779. +}
  16780. +
  16781. +void __local_bh_disable(void)
  16782. +{
  16783. + if (++current->softirq_nestcnt == 1)
  16784. + migrate_disable();
  16785. +}
  16786. +EXPORT_SYMBOL(__local_bh_disable);
  16787. +
  16788. +void __local_bh_enable(void)
  16789. +{
  16790. + if (WARN_ON(current->softirq_nestcnt == 0))
  16791. + return;
  16792. +
  16793. + local_irq_disable();
  16794. + if (current->softirq_nestcnt == 1 && current->softirqs_raised)
  16795. + do_current_softirqs();
  16796. + local_irq_enable();
  16797. +
  16798. + if (--current->softirq_nestcnt == 0)
  16799. + migrate_enable();
  16800. +}
  16801. +EXPORT_SYMBOL(__local_bh_enable);
  16802. +
  16803. +void _local_bh_enable(void)
  16804. +{
  16805. + if (WARN_ON(current->softirq_nestcnt == 0))
  16806. + return;
  16807. + if (--current->softirq_nestcnt == 0)
  16808. + migrate_enable();
  16809. +}
  16810. +EXPORT_SYMBOL(_local_bh_enable);
  16811. +
  16812. +int in_serving_softirq(void)
  16813. +{
  16814. + return current->flags & PF_IN_SOFTIRQ;
  16815. +}
  16816. +EXPORT_SYMBOL(in_serving_softirq);
  16817. +
  16818. +/* Called with preemption disabled */
  16819. +static void run_ksoftirqd(unsigned int cpu)
  16820. +{
  16821. + local_irq_disable();
  16822. + current->softirq_nestcnt++;
  16823. +
  16824. + do_current_softirqs();
  16825. + current->softirq_nestcnt--;
  16826. + local_irq_enable();
  16827. + cond_resched_rcu_qs();
  16828. +}
  16829. +
  16830. +/*
  16831. + * Called from netif_rx_ni(). Preemption enabled, but migration
  16832. + * disabled. So the cpu can't go away under us.
  16833. + */
  16834. +void thread_do_softirq(void)
  16835. +{
  16836. + if (!in_serving_softirq() && current->softirqs_raised) {
  16837. + current->softirq_nestcnt++;
  16838. + do_current_softirqs();
  16839. + current->softirq_nestcnt--;
  16840. + }
  16841. +}
  16842. +
  16843. +static void do_raise_softirq_irqoff(unsigned int nr)
  16844. +{
  16845. + unsigned int mask;
  16846. +
  16847. + mask = 1UL << nr;
  16848. +
  16849. + trace_softirq_raise(nr);
  16850. + or_softirq_pending(mask);
  16851. +
  16852. + /*
  16853. + * If we are not in a hard interrupt and inside a bh disabled
  16854. + * region, we simply raise the flag on current. local_bh_enable()
  16855. + * will make sure that the softirq is executed. Otherwise we
  16856. + * delegate it to ksoftirqd.
  16857. + */
  16858. + if (!in_irq() && current->softirq_nestcnt)
  16859. + current->softirqs_raised |= mask;
  16860. + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd))
  16861. + return;
  16862. +
  16863. + if (mask & TIMER_SOFTIRQS)
  16864. + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
  16865. + else
  16866. + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
  16867. +}
  16868. +
  16869. +static void wakeup_proper_softirq(unsigned int nr)
  16870. +{
  16871. + if ((1UL << nr) & TIMER_SOFTIRQS)
  16872. + wakeup_timer_softirqd();
  16873. + else
  16874. + wakeup_softirqd();
  16875. +}
  16876. +
  16877. +
  16878. +void __raise_softirq_irqoff(unsigned int nr)
  16879. +{
  16880. + do_raise_softirq_irqoff(nr);
  16881. + if (!in_irq() && !current->softirq_nestcnt)
  16882. + wakeup_proper_softirq(nr);
  16883. +}
  16884. +
  16885. +/*
  16886. + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd
  16887. + */
  16888. +void __raise_softirq_irqoff_ksoft(unsigned int nr)
  16889. +{
  16890. + unsigned int mask;
  16891. +
  16892. + if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) ||
  16893. + !__this_cpu_read(ktimer_softirqd)))
  16894. + return;
  16895. + mask = 1UL << nr;
  16896. +
  16897. + trace_softirq_raise(nr);
  16898. + or_softirq_pending(mask);
  16899. + if (mask & TIMER_SOFTIRQS)
  16900. + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
  16901. + else
  16902. + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
  16903. + wakeup_proper_softirq(nr);
  16904. +}
  16905. +
  16906. +/*
  16907. + * This function must run with irqs disabled!
  16908. + */
  16909. +void raise_softirq_irqoff(unsigned int nr)
  16910. +{
  16911. + do_raise_softirq_irqoff(nr);
  16912. +
  16913. + /*
  16914. + * If we're in an hard interrupt we let irq return code deal
  16915. + * with the wakeup of ksoftirqd.
  16916. + */
  16917. + if (in_irq())
  16918. + return;
  16919. + /*
  16920. + * If we are in thread context but outside of a bh disabled
  16921. + * region, we need to wake ksoftirqd as well.
  16922. + *
  16923. + * CHECKME: Some of the places which do that could be wrapped
  16924. + * into local_bh_disable/enable pairs. Though it's unclear
  16925. + * whether this is worth the effort. To find those places just
  16926. + * raise a WARN() if the condition is met.
  16927. + */
  16928. + if (!current->softirq_nestcnt)
  16929. + wakeup_proper_softirq(nr);
  16930. +}
  16931. +
  16932. +static inline int ksoftirqd_softirq_pending(void)
  16933. +{
  16934. + return current->softirqs_raised;
  16935. +}
  16936. +
  16937. +static inline void local_bh_disable_nort(void) { }
  16938. +static inline void _local_bh_enable_nort(void) { }
  16939. +
  16940. +static inline void ksoftirqd_set_sched_params(unsigned int cpu)
  16941. +{
  16942. + /* Take over all but timer pending softirqs when starting */
  16943. + local_irq_disable();
  16944. + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS;
  16945. + local_irq_enable();
  16946. +}
  16947. +
  16948. +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu)
  16949. +{
  16950. + struct sched_param param = { .sched_priority = 1 };
  16951. +
  16952. + sched_setscheduler(current, SCHED_FIFO, &param);
  16953. +
  16954. + /* Take over timer pending softirqs when starting */
  16955. + local_irq_disable();
  16956. + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS;
  16957. + local_irq_enable();
  16958. +}
  16959. +
  16960. +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu,
  16961. + bool online)
  16962. +{
  16963. + struct sched_param param = { .sched_priority = 0 };
  16964. +
  16965. + sched_setscheduler(current, SCHED_NORMAL, &param);
  16966. +}
  16967. +
  16968. +static int ktimer_softirqd_should_run(unsigned int cpu)
  16969. +{
  16970. + return current->softirqs_raised;
  16971. +}
  16972. +
  16973. +#endif /* PREEMPT_RT_FULL */
  16974. +/*
  16975. * Enter an interrupt context.
  16976. */
  16977. void irq_enter(void)
  16978. @@ -330,9 +774,9 @@ void irq_enter(void)
  16979. * Prevent raise_softirq from needlessly waking up ksoftirqd
  16980. * here, as softirq will be serviced on return from interrupt.
  16981. */
  16982. - local_bh_disable();
  16983. + local_bh_disable_nort();
  16984. tick_irq_enter();
  16985. - _local_bh_enable();
  16986. + _local_bh_enable_nort();
  16987. }
  16988. __irq_enter();
  16989. @@ -340,6 +784,7 @@ void irq_enter(void)
  16990. static inline void invoke_softirq(void)
  16991. {
  16992. +#ifndef CONFIG_PREEMPT_RT_FULL
  16993. if (!force_irqthreads) {
  16994. #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
  16995. /*
  16996. @@ -359,6 +804,18 @@ static inline void invoke_softirq(void)
  16997. } else {
  16998. wakeup_softirqd();
  16999. }
  17000. +#else /* PREEMPT_RT_FULL */
  17001. + unsigned long flags;
  17002. +
  17003. + local_irq_save(flags);
  17004. + if (__this_cpu_read(ksoftirqd) &&
  17005. + __this_cpu_read(ksoftirqd)->softirqs_raised)
  17006. + wakeup_softirqd();
  17007. + if (__this_cpu_read(ktimer_softirqd) &&
  17008. + __this_cpu_read(ktimer_softirqd)->softirqs_raised)
  17009. + wakeup_timer_softirqd();
  17010. + local_irq_restore(flags);
  17011. +#endif
  17012. }
  17013. static inline void tick_irq_exit(void)
  17014. @@ -395,26 +852,6 @@ void irq_exit(void)
  17015. trace_hardirq_exit(); /* must be last! */
  17016. }
  17017. -/*
  17018. - * This function must run with irqs disabled!
  17019. - */
  17020. -inline void raise_softirq_irqoff(unsigned int nr)
  17021. -{
  17022. - __raise_softirq_irqoff(nr);
  17023. -
  17024. - /*
  17025. - * If we're in an interrupt or softirq, we're done
  17026. - * (this also catches softirq-disabled code). We will
  17027. - * actually run the softirq once we return from
  17028. - * the irq or softirq.
  17029. - *
  17030. - * Otherwise we wake up ksoftirqd to make sure we
  17031. - * schedule the softirq soon.
  17032. - */
  17033. - if (!in_interrupt())
  17034. - wakeup_softirqd();
  17035. -}
  17036. -
  17037. void raise_softirq(unsigned int nr)
  17038. {
  17039. unsigned long flags;
  17040. @@ -424,12 +861,6 @@ void raise_softirq(unsigned int nr)
  17041. local_irq_restore(flags);
  17042. }
  17043. -void __raise_softirq_irqoff(unsigned int nr)
  17044. -{
  17045. - trace_softirq_raise(nr);
  17046. - or_softirq_pending(1UL << nr);
  17047. -}
  17048. -
  17049. void open_softirq(int nr, void (*action)(struct softirq_action *))
  17050. {
  17051. softirq_vec[nr].action = action;
  17052. @@ -446,15 +877,45 @@ struct tasklet_head {
  17053. static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
  17054. static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
  17055. +static void inline
  17056. +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
  17057. +{
  17058. + if (tasklet_trylock(t)) {
  17059. +again:
  17060. + /* We may have been preempted before tasklet_trylock
  17061. + * and __tasklet_action may have already run.
  17062. + * So double check the sched bit while the takslet
  17063. + * is locked before adding it to the list.
  17064. + */
  17065. + if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
  17066. + t->next = NULL;
  17067. + *head->tail = t;
  17068. + head->tail = &(t->next);
  17069. + raise_softirq_irqoff(nr);
  17070. + tasklet_unlock(t);
  17071. + } else {
  17072. + /* This is subtle. If we hit the corner case above
  17073. + * It is possible that we get preempted right here,
  17074. + * and another task has successfully called
  17075. + * tasklet_schedule(), then this function, and
  17076. + * failed on the trylock. Thus we must be sure
  17077. + * before releasing the tasklet lock, that the
  17078. + * SCHED_BIT is clear. Otherwise the tasklet
  17079. + * may get its SCHED_BIT set, but not added to the
  17080. + * list
  17081. + */
  17082. + if (!tasklet_tryunlock(t))
  17083. + goto again;
  17084. + }
  17085. + }
  17086. +}
  17087. +
  17088. void __tasklet_schedule(struct tasklet_struct *t)
  17089. {
  17090. unsigned long flags;
  17091. local_irq_save(flags);
  17092. - t->next = NULL;
  17093. - *__this_cpu_read(tasklet_vec.tail) = t;
  17094. - __this_cpu_write(tasklet_vec.tail, &(t->next));
  17095. - raise_softirq_irqoff(TASKLET_SOFTIRQ);
  17096. + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
  17097. local_irq_restore(flags);
  17098. }
  17099. EXPORT_SYMBOL(__tasklet_schedule);
  17100. @@ -464,10 +925,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
  17101. unsigned long flags;
  17102. local_irq_save(flags);
  17103. - t->next = NULL;
  17104. - *__this_cpu_read(tasklet_hi_vec.tail) = t;
  17105. - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
  17106. - raise_softirq_irqoff(HI_SOFTIRQ);
  17107. + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
  17108. local_irq_restore(flags);
  17109. }
  17110. EXPORT_SYMBOL(__tasklet_hi_schedule);
  17111. @@ -476,82 +934,122 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
  17112. {
  17113. BUG_ON(!irqs_disabled());
  17114. - t->next = __this_cpu_read(tasklet_hi_vec.head);
  17115. - __this_cpu_write(tasklet_hi_vec.head, t);
  17116. - __raise_softirq_irqoff(HI_SOFTIRQ);
  17117. + __tasklet_hi_schedule(t);
  17118. }
  17119. EXPORT_SYMBOL(__tasklet_hi_schedule_first);
  17120. -static void tasklet_action(struct softirq_action *a)
  17121. +void tasklet_enable(struct tasklet_struct *t)
  17122. {
  17123. - struct tasklet_struct *list;
  17124. + if (!atomic_dec_and_test(&t->count))
  17125. + return;
  17126. + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
  17127. + tasklet_schedule(t);
  17128. +}
  17129. +EXPORT_SYMBOL(tasklet_enable);
  17130. - local_irq_disable();
  17131. - list = __this_cpu_read(tasklet_vec.head);
  17132. - __this_cpu_write(tasklet_vec.head, NULL);
  17133. - __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
  17134. - local_irq_enable();
  17135. +static void __tasklet_action(struct softirq_action *a,
  17136. + struct tasklet_struct *list)
  17137. +{
  17138. + int loops = 1000000;
  17139. while (list) {
  17140. struct tasklet_struct *t = list;
  17141. list = list->next;
  17142. - if (tasklet_trylock(t)) {
  17143. - if (!atomic_read(&t->count)) {
  17144. - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
  17145. - &t->state))
  17146. - BUG();
  17147. - t->func(t->data);
  17148. - tasklet_unlock(t);
  17149. - continue;
  17150. - }
  17151. - tasklet_unlock(t);
  17152. + /*
  17153. + * Should always succeed - after a tasklist got on the
  17154. + * list (after getting the SCHED bit set from 0 to 1),
  17155. + * nothing but the tasklet softirq it got queued to can
  17156. + * lock it:
  17157. + */
  17158. + if (!tasklet_trylock(t)) {
  17159. + WARN_ON(1);
  17160. + continue;
  17161. }
  17162. - local_irq_disable();
  17163. t->next = NULL;
  17164. - *__this_cpu_read(tasklet_vec.tail) = t;
  17165. - __this_cpu_write(tasklet_vec.tail, &(t->next));
  17166. - __raise_softirq_irqoff(TASKLET_SOFTIRQ);
  17167. - local_irq_enable();
  17168. +
  17169. + /*
  17170. + * If we cannot handle the tasklet because it's disabled,
  17171. + * mark it as pending. tasklet_enable() will later
  17172. + * re-schedule the tasklet.
  17173. + */
  17174. + if (unlikely(atomic_read(&t->count))) {
  17175. +out_disabled:
  17176. + /* implicit unlock: */
  17177. + wmb();
  17178. + t->state = TASKLET_STATEF_PENDING;
  17179. + continue;
  17180. + }
  17181. +
  17182. + /*
  17183. + * After this point on the tasklet might be rescheduled
  17184. + * on another CPU, but it can only be added to another
  17185. + * CPU's tasklet list if we unlock the tasklet (which we
  17186. + * dont do yet).
  17187. + */
  17188. + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
  17189. + WARN_ON(1);
  17190. +
  17191. +again:
  17192. + t->func(t->data);
  17193. +
  17194. + /*
  17195. + * Try to unlock the tasklet. We must use cmpxchg, because
  17196. + * another CPU might have scheduled or disabled the tasklet.
  17197. + * We only allow the STATE_RUN -> 0 transition here.
  17198. + */
  17199. + while (!tasklet_tryunlock(t)) {
  17200. + /*
  17201. + * If it got disabled meanwhile, bail out:
  17202. + */
  17203. + if (atomic_read(&t->count))
  17204. + goto out_disabled;
  17205. + /*
  17206. + * If it got scheduled meanwhile, re-execute
  17207. + * the tasklet function:
  17208. + */
  17209. + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
  17210. + goto again;
  17211. + if (!--loops) {
  17212. + printk("hm, tasklet state: %08lx\n", t->state);
  17213. + WARN_ON(1);
  17214. + tasklet_unlock(t);
  17215. + break;
  17216. + }
  17217. + }
  17218. }
  17219. }
  17220. +static void tasklet_action(struct softirq_action *a)
  17221. +{
  17222. + struct tasklet_struct *list;
  17223. +
  17224. + local_irq_disable();
  17225. +
  17226. + list = __this_cpu_read(tasklet_vec.head);
  17227. + __this_cpu_write(tasklet_vec.head, NULL);
  17228. + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
  17229. +
  17230. + local_irq_enable();
  17231. +
  17232. + __tasklet_action(a, list);
  17233. +}
  17234. +
  17235. static void tasklet_hi_action(struct softirq_action *a)
  17236. {
  17237. struct tasklet_struct *list;
  17238. local_irq_disable();
  17239. +
  17240. list = __this_cpu_read(tasklet_hi_vec.head);
  17241. __this_cpu_write(tasklet_hi_vec.head, NULL);
  17242. __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
  17243. +
  17244. local_irq_enable();
  17245. - while (list) {
  17246. - struct tasklet_struct *t = list;
  17247. -
  17248. - list = list->next;
  17249. -
  17250. - if (tasklet_trylock(t)) {
  17251. - if (!atomic_read(&t->count)) {
  17252. - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
  17253. - &t->state))
  17254. - BUG();
  17255. - t->func(t->data);
  17256. - tasklet_unlock(t);
  17257. - continue;
  17258. - }
  17259. - tasklet_unlock(t);
  17260. - }
  17261. -
  17262. - local_irq_disable();
  17263. - t->next = NULL;
  17264. - *__this_cpu_read(tasklet_hi_vec.tail) = t;
  17265. - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
  17266. - __raise_softirq_irqoff(HI_SOFTIRQ);
  17267. - local_irq_enable();
  17268. - }
  17269. + __tasklet_action(a, list);
  17270. }
  17271. void tasklet_init(struct tasklet_struct *t,
  17272. @@ -572,7 +1070,7 @@ void tasklet_kill(struct tasklet_struct *t)
  17273. while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
  17274. do {
  17275. - yield();
  17276. + msleep(1);
  17277. } while (test_bit(TASKLET_STATE_SCHED, &t->state));
  17278. }
  17279. tasklet_unlock_wait(t);
  17280. @@ -646,25 +1144,26 @@ void __init softirq_init(void)
  17281. open_softirq(HI_SOFTIRQ, tasklet_hi_action);
  17282. }
  17283. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  17284. +void tasklet_unlock_wait(struct tasklet_struct *t)
  17285. +{
  17286. + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
  17287. + /*
  17288. + * Hack for now to avoid this busy-loop:
  17289. + */
  17290. +#ifdef CONFIG_PREEMPT_RT_FULL
  17291. + msleep(1);
  17292. +#else
  17293. + barrier();
  17294. +#endif
  17295. + }
  17296. +}
  17297. +EXPORT_SYMBOL(tasklet_unlock_wait);
  17298. +#endif
  17299. +
  17300. static int ksoftirqd_should_run(unsigned int cpu)
  17301. {
  17302. - return local_softirq_pending();
  17303. -}
  17304. -
  17305. -static void run_ksoftirqd(unsigned int cpu)
  17306. -{
  17307. - local_irq_disable();
  17308. - if (local_softirq_pending()) {
  17309. - /*
  17310. - * We can safely run softirq on inline stack, as we are not deep
  17311. - * in the task stack here.
  17312. - */
  17313. - __do_softirq();
  17314. - local_irq_enable();
  17315. - cond_resched_rcu_qs();
  17316. - return;
  17317. - }
  17318. - local_irq_enable();
  17319. + return ksoftirqd_softirq_pending();
  17320. }
  17321. #ifdef CONFIG_HOTPLUG_CPU
  17322. @@ -746,16 +1245,31 @@ static struct notifier_block cpu_nfb = {
  17323. static struct smp_hotplug_thread softirq_threads = {
  17324. .store = &ksoftirqd,
  17325. + .setup = ksoftirqd_set_sched_params,
  17326. .thread_should_run = ksoftirqd_should_run,
  17327. .thread_fn = run_ksoftirqd,
  17328. .thread_comm = "ksoftirqd/%u",
  17329. };
  17330. +#ifdef CONFIG_PREEMPT_RT_FULL
  17331. +static struct smp_hotplug_thread softirq_timer_threads = {
  17332. + .store = &ktimer_softirqd,
  17333. + .setup = ktimer_softirqd_set_sched_params,
  17334. + .cleanup = ktimer_softirqd_clr_sched_params,
  17335. + .thread_should_run = ktimer_softirqd_should_run,
  17336. + .thread_fn = run_ksoftirqd,
  17337. + .thread_comm = "ktimersoftd/%u",
  17338. +};
  17339. +#endif
  17340. +
  17341. static __init int spawn_ksoftirqd(void)
  17342. {
  17343. register_cpu_notifier(&cpu_nfb);
  17344. BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
  17345. +#ifdef CONFIG_PREEMPT_RT_FULL
  17346. + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads));
  17347. +#endif
  17348. return 0;
  17349. }
  17350. diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
  17351. index a467e6c28a3b..d01a5118d58a 100644
  17352. --- a/kernel/stop_machine.c
  17353. +++ b/kernel/stop_machine.c
  17354. @@ -36,7 +36,7 @@ struct cpu_stop_done {
  17355. struct cpu_stopper {
  17356. struct task_struct *thread;
  17357. - spinlock_t lock;
  17358. + raw_spinlock_t lock;
  17359. bool enabled; /* is this stopper enabled? */
  17360. struct list_head works; /* list of pending works */
  17361. @@ -82,14 +82,14 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
  17362. unsigned long flags;
  17363. bool enabled;
  17364. - spin_lock_irqsave(&stopper->lock, flags);
  17365. + raw_spin_lock_irqsave(&stopper->lock, flags);
  17366. enabled = stopper->enabled;
  17367. if (enabled)
  17368. __cpu_stop_queue_work(stopper, work);
  17369. else if (work->done)
  17370. cpu_stop_signal_done(work->done);
  17371. - spin_unlock_irqrestore(&stopper->lock, flags);
  17372. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  17373. return enabled;
  17374. }
  17375. @@ -224,8 +224,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
  17376. int err;
  17377. lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
  17378. - spin_lock_irq(&stopper1->lock);
  17379. - spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
  17380. + raw_spin_lock_irq(&stopper1->lock);
  17381. + raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
  17382. err = -ENOENT;
  17383. if (!stopper1->enabled || !stopper2->enabled)
  17384. @@ -235,8 +235,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
  17385. __cpu_stop_queue_work(stopper1, work1);
  17386. __cpu_stop_queue_work(stopper2, work2);
  17387. unlock:
  17388. - spin_unlock(&stopper2->lock);
  17389. - spin_unlock_irq(&stopper1->lock);
  17390. + raw_spin_unlock(&stopper2->lock);
  17391. + raw_spin_unlock_irq(&stopper1->lock);
  17392. lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
  17393. return err;
  17394. @@ -313,18 +313,21 @@ static DEFINE_MUTEX(stop_cpus_mutex);
  17395. static bool queue_stop_cpus_work(const struct cpumask *cpumask,
  17396. cpu_stop_fn_t fn, void *arg,
  17397. - struct cpu_stop_done *done)
  17398. + struct cpu_stop_done *done, bool inactive)
  17399. {
  17400. struct cpu_stop_work *work;
  17401. unsigned int cpu;
  17402. bool queued = false;
  17403. /*
  17404. - * Disable preemption while queueing to avoid getting
  17405. - * preempted by a stopper which might wait for other stoppers
  17406. - * to enter @fn which can lead to deadlock.
  17407. + * Make sure that all work is queued on all cpus before
  17408. + * any of the cpus can execute it.
  17409. */
  17410. - lg_global_lock(&stop_cpus_lock);
  17411. + if (!inactive)
  17412. + lg_global_lock(&stop_cpus_lock);
  17413. + else
  17414. + lg_global_trylock_relax(&stop_cpus_lock);
  17415. +
  17416. for_each_cpu(cpu, cpumask) {
  17417. work = &per_cpu(cpu_stopper.stop_work, cpu);
  17418. work->fn = fn;
  17419. @@ -344,7 +347,7 @@ static int __stop_cpus(const struct cpumask *cpumask,
  17420. struct cpu_stop_done done;
  17421. cpu_stop_init_done(&done, cpumask_weight(cpumask));
  17422. - if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
  17423. + if (!queue_stop_cpus_work(cpumask, fn, arg, &done, false))
  17424. return -ENOENT;
  17425. wait_for_completion(&done.completion);
  17426. return done.ret;
  17427. @@ -425,9 +428,9 @@ static int cpu_stop_should_run(unsigned int cpu)
  17428. unsigned long flags;
  17429. int run;
  17430. - spin_lock_irqsave(&stopper->lock, flags);
  17431. + raw_spin_lock_irqsave(&stopper->lock, flags);
  17432. run = !list_empty(&stopper->works);
  17433. - spin_unlock_irqrestore(&stopper->lock, flags);
  17434. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  17435. return run;
  17436. }
  17437. @@ -438,13 +441,13 @@ static void cpu_stopper_thread(unsigned int cpu)
  17438. repeat:
  17439. work = NULL;
  17440. - spin_lock_irq(&stopper->lock);
  17441. + raw_spin_lock_irq(&stopper->lock);
  17442. if (!list_empty(&stopper->works)) {
  17443. work = list_first_entry(&stopper->works,
  17444. struct cpu_stop_work, list);
  17445. list_del_init(&work->list);
  17446. }
  17447. - spin_unlock_irq(&stopper->lock);
  17448. + raw_spin_unlock_irq(&stopper->lock);
  17449. if (work) {
  17450. cpu_stop_fn_t fn = work->fn;
  17451. @@ -452,6 +455,16 @@ static void cpu_stopper_thread(unsigned int cpu)
  17452. struct cpu_stop_done *done = work->done;
  17453. int ret;
  17454. + /*
  17455. + * Wait until the stopper finished scheduling on all
  17456. + * cpus
  17457. + */
  17458. + lg_global_lock(&stop_cpus_lock);
  17459. + /*
  17460. + * Let other cpu threads continue as well
  17461. + */
  17462. + lg_global_unlock(&stop_cpus_lock);
  17463. +
  17464. /* cpu stop callbacks must not sleep, make in_atomic() == T */
  17465. preempt_count_inc();
  17466. ret = fn(arg);
  17467. @@ -518,10 +531,12 @@ static int __init cpu_stop_init(void)
  17468. for_each_possible_cpu(cpu) {
  17469. struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
  17470. - spin_lock_init(&stopper->lock);
  17471. + raw_spin_lock_init(&stopper->lock);
  17472. INIT_LIST_HEAD(&stopper->works);
  17473. }
  17474. + lg_lock_init(&stop_cpus_lock, "stop_cpus_lock");
  17475. +
  17476. BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
  17477. stop_machine_unpark(raw_smp_processor_id());
  17478. stop_machine_initialized = true;
  17479. @@ -616,7 +631,7 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
  17480. set_state(&msdata, MULTI_STOP_PREPARE);
  17481. cpu_stop_init_done(&done, num_active_cpus());
  17482. queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
  17483. - &done);
  17484. + &done, true);
  17485. ret = multi_cpu_stop(&msdata);
  17486. /* Busy wait for completion. */
  17487. diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
  17488. index fa0b983290cf..ab846abc8b7b 100644
  17489. --- a/kernel/time/hrtimer.c
  17490. +++ b/kernel/time/hrtimer.c
  17491. @@ -48,11 +48,13 @@
  17492. #include <linux/sched/rt.h>
  17493. #include <linux/sched/deadline.h>
  17494. #include <linux/timer.h>
  17495. +#include <linux/kthread.h>
  17496. #include <linux/freezer.h>
  17497. #include <asm/uaccess.h>
  17498. #include <trace/events/timer.h>
  17499. +#include <trace/events/hist.h>
  17500. #include "tick-internal.h"
  17501. @@ -706,6 +708,44 @@ static void clock_was_set_work(struct work_struct *work)
  17502. static DECLARE_WORK(hrtimer_work, clock_was_set_work);
  17503. +#ifdef CONFIG_PREEMPT_RT_FULL
  17504. +/*
  17505. + * RT can not call schedule_work from real interrupt context.
  17506. + * Need to make a thread to do the real work.
  17507. + */
  17508. +static struct task_struct *clock_set_delay_thread;
  17509. +static bool do_clock_set_delay;
  17510. +
  17511. +static int run_clock_set_delay(void *ignore)
  17512. +{
  17513. + while (!kthread_should_stop()) {
  17514. + set_current_state(TASK_INTERRUPTIBLE);
  17515. + if (do_clock_set_delay) {
  17516. + do_clock_set_delay = false;
  17517. + schedule_work(&hrtimer_work);
  17518. + }
  17519. + schedule();
  17520. + }
  17521. + __set_current_state(TASK_RUNNING);
  17522. + return 0;
  17523. +}
  17524. +
  17525. +void clock_was_set_delayed(void)
  17526. +{
  17527. + do_clock_set_delay = true;
  17528. + /* Make visible before waking up process */
  17529. + smp_wmb();
  17530. + wake_up_process(clock_set_delay_thread);
  17531. +}
  17532. +
  17533. +static __init int create_clock_set_delay_thread(void)
  17534. +{
  17535. + clock_set_delay_thread = kthread_run(run_clock_set_delay, NULL, "kclksetdelayd");
  17536. + BUG_ON(!clock_set_delay_thread);
  17537. + return 0;
  17538. +}
  17539. +early_initcall(create_clock_set_delay_thread);
  17540. +#else /* PREEMPT_RT_FULL */
  17541. /*
  17542. * Called from timekeeping and resume code to reprogramm the hrtimer
  17543. * interrupt device on all cpus.
  17544. @@ -714,6 +754,7 @@ void clock_was_set_delayed(void)
  17545. {
  17546. schedule_work(&hrtimer_work);
  17547. }
  17548. +#endif
  17549. #else
  17550. @@ -723,11 +764,8 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; }
  17551. static inline void hrtimer_switch_to_hres(void) { }
  17552. static inline void
  17553. hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
  17554. -static inline int hrtimer_reprogram(struct hrtimer *timer,
  17555. - struct hrtimer_clock_base *base)
  17556. -{
  17557. - return 0;
  17558. -}
  17559. +static inline void hrtimer_reprogram(struct hrtimer *timer,
  17560. + struct hrtimer_clock_base *base) { }
  17561. static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
  17562. static inline void retrigger_next_event(void *arg) { }
  17563. @@ -859,6 +897,32 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
  17564. }
  17565. EXPORT_SYMBOL_GPL(hrtimer_forward);
  17566. +#ifdef CONFIG_PREEMPT_RT_BASE
  17567. +# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
  17568. +
  17569. +/**
  17570. + * hrtimer_wait_for_timer - Wait for a running timer
  17571. + *
  17572. + * @timer: timer to wait for
  17573. + *
  17574. + * The function waits in case the timers callback function is
  17575. + * currently executed on the waitqueue of the timer base. The
  17576. + * waitqueue is woken up after the timer callback function has
  17577. + * finished execution.
  17578. + */
  17579. +void hrtimer_wait_for_timer(const struct hrtimer *timer)
  17580. +{
  17581. + struct hrtimer_clock_base *base = timer->base;
  17582. +
  17583. + if (base && base->cpu_base && !timer->irqsafe)
  17584. + wait_event(base->cpu_base->wait,
  17585. + !(hrtimer_callback_running(timer)));
  17586. +}
  17587. +
  17588. +#else
  17589. +# define wake_up_timer_waiters(b) do { } while (0)
  17590. +#endif
  17591. +
  17592. /*
  17593. * enqueue_hrtimer - internal function to (re)start a timer
  17594. *
  17595. @@ -900,6 +964,11 @@ static void __remove_hrtimer(struct hrtimer *timer,
  17596. if (!(state & HRTIMER_STATE_ENQUEUED))
  17597. return;
  17598. + if (unlikely(!list_empty(&timer->cb_entry))) {
  17599. + list_del_init(&timer->cb_entry);
  17600. + return;
  17601. + }
  17602. +
  17603. if (!timerqueue_del(&base->active, &timer->node))
  17604. cpu_base->active_bases &= ~(1 << base->index);
  17605. @@ -995,7 +1064,16 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
  17606. new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
  17607. timer_stats_hrtimer_set_start_info(timer);
  17608. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  17609. + {
  17610. + ktime_t now = new_base->get_time();
  17611. + if (ktime_to_ns(tim) < ktime_to_ns(now))
  17612. + timer->praecox = now;
  17613. + else
  17614. + timer->praecox = ktime_set(0, 0);
  17615. + }
  17616. +#endif
  17617. leftmost = enqueue_hrtimer(timer, new_base);
  17618. if (!leftmost)
  17619. goto unlock;
  17620. @@ -1067,7 +1145,7 @@ int hrtimer_cancel(struct hrtimer *timer)
  17621. if (ret >= 0)
  17622. return ret;
  17623. - cpu_relax();
  17624. + hrtimer_wait_for_timer(timer);
  17625. }
  17626. }
  17627. EXPORT_SYMBOL_GPL(hrtimer_cancel);
  17628. @@ -1131,6 +1209,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
  17629. base = hrtimer_clockid_to_base(clock_id);
  17630. timer->base = &cpu_base->clock_base[base];
  17631. + INIT_LIST_HEAD(&timer->cb_entry);
  17632. timerqueue_init(&timer->node);
  17633. #ifdef CONFIG_TIMER_STATS
  17634. @@ -1171,6 +1250,7 @@ bool hrtimer_active(const struct hrtimer *timer)
  17635. seq = raw_read_seqcount_begin(&cpu_base->seq);
  17636. if (timer->state != HRTIMER_STATE_INACTIVE ||
  17637. + cpu_base->running_soft == timer ||
  17638. cpu_base->running == timer)
  17639. return true;
  17640. @@ -1269,10 +1349,112 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
  17641. cpu_base->running = NULL;
  17642. }
  17643. +#ifdef CONFIG_PREEMPT_RT_BASE
  17644. +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
  17645. + struct hrtimer_clock_base *base)
  17646. +{
  17647. + int leftmost;
  17648. +
  17649. + if (restart != HRTIMER_NORESTART &&
  17650. + !(timer->state & HRTIMER_STATE_ENQUEUED)) {
  17651. +
  17652. + leftmost = enqueue_hrtimer(timer, base);
  17653. + if (!leftmost)
  17654. + return;
  17655. +#ifdef CONFIG_HIGH_RES_TIMERS
  17656. + if (!hrtimer_is_hres_active(timer)) {
  17657. + /*
  17658. + * Kick to reschedule the next tick to handle the new timer
  17659. + * on dynticks target.
  17660. + */
  17661. + if (base->cpu_base->nohz_active)
  17662. + wake_up_nohz_cpu(base->cpu_base->cpu);
  17663. + } else {
  17664. +
  17665. + hrtimer_reprogram(timer, base);
  17666. + }
  17667. +#endif
  17668. + }
  17669. +}
  17670. +
  17671. +/*
  17672. + * The changes in mainline which removed the callback modes from
  17673. + * hrtimer are not yet working with -rt. The non wakeup_process()
  17674. + * based callbacks which involve sleeping locks need to be treated
  17675. + * seperately.
  17676. + */
  17677. +static void hrtimer_rt_run_pending(void)
  17678. +{
  17679. + enum hrtimer_restart (*fn)(struct hrtimer *);
  17680. + struct hrtimer_cpu_base *cpu_base;
  17681. + struct hrtimer_clock_base *base;
  17682. + struct hrtimer *timer;
  17683. + int index, restart;
  17684. +
  17685. + local_irq_disable();
  17686. + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
  17687. +
  17688. + raw_spin_lock(&cpu_base->lock);
  17689. +
  17690. + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
  17691. + base = &cpu_base->clock_base[index];
  17692. +
  17693. + while (!list_empty(&base->expired)) {
  17694. + timer = list_first_entry(&base->expired,
  17695. + struct hrtimer, cb_entry);
  17696. +
  17697. + /*
  17698. + * Same as the above __run_hrtimer function
  17699. + * just we run with interrupts enabled.
  17700. + */
  17701. + debug_deactivate(timer);
  17702. + cpu_base->running_soft = timer;
  17703. + raw_write_seqcount_barrier(&cpu_base->seq);
  17704. +
  17705. + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
  17706. + timer_stats_account_hrtimer(timer);
  17707. + fn = timer->function;
  17708. +
  17709. + raw_spin_unlock_irq(&cpu_base->lock);
  17710. + restart = fn(timer);
  17711. + raw_spin_lock_irq(&cpu_base->lock);
  17712. +
  17713. + hrtimer_rt_reprogram(restart, timer, base);
  17714. + raw_write_seqcount_barrier(&cpu_base->seq);
  17715. +
  17716. + WARN_ON_ONCE(cpu_base->running_soft != timer);
  17717. + cpu_base->running_soft = NULL;
  17718. + }
  17719. + }
  17720. +
  17721. + raw_spin_unlock_irq(&cpu_base->lock);
  17722. +
  17723. + wake_up_timer_waiters(cpu_base);
  17724. +}
  17725. +
  17726. +static int hrtimer_rt_defer(struct hrtimer *timer)
  17727. +{
  17728. + if (timer->irqsafe)
  17729. + return 0;
  17730. +
  17731. + __remove_hrtimer(timer, timer->base, timer->state, 0);
  17732. + list_add_tail(&timer->cb_entry, &timer->base->expired);
  17733. + return 1;
  17734. +}
  17735. +
  17736. +#else
  17737. +
  17738. +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
  17739. +
  17740. +#endif
  17741. +
  17742. +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
  17743. +
  17744. static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
  17745. {
  17746. struct hrtimer_clock_base *base = cpu_base->clock_base;
  17747. unsigned int active = cpu_base->active_bases;
  17748. + int raise = 0;
  17749. for (; active; base++, active >>= 1) {
  17750. struct timerqueue_node *node;
  17751. @@ -1288,6 +1470,15 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
  17752. timer = container_of(node, struct hrtimer, node);
  17753. + trace_hrtimer_interrupt(raw_smp_processor_id(),
  17754. + ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ?
  17755. + timer->praecox : hrtimer_get_expires(timer),
  17756. + basenow)),
  17757. + current,
  17758. + timer->function == hrtimer_wakeup ?
  17759. + container_of(timer, struct hrtimer_sleeper,
  17760. + timer)->task : NULL);
  17761. +
  17762. /*
  17763. * The immediate goal for using the softexpires is
  17764. * minimizing wakeups, not running timers at the
  17765. @@ -1303,9 +1494,14 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
  17766. if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
  17767. break;
  17768. - __run_hrtimer(cpu_base, base, timer, &basenow);
  17769. + if (!hrtimer_rt_defer(timer))
  17770. + __run_hrtimer(cpu_base, base, timer, &basenow);
  17771. + else
  17772. + raise = 1;
  17773. }
  17774. }
  17775. + if (raise)
  17776. + raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  17777. }
  17778. #ifdef CONFIG_HIGH_RES_TIMERS
  17779. @@ -1468,16 +1664,18 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
  17780. void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
  17781. {
  17782. sl->timer.function = hrtimer_wakeup;
  17783. + sl->timer.irqsafe = 1;
  17784. sl->task = task;
  17785. }
  17786. EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
  17787. -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
  17788. +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
  17789. + unsigned long state)
  17790. {
  17791. hrtimer_init_sleeper(t, current);
  17792. do {
  17793. - set_current_state(TASK_INTERRUPTIBLE);
  17794. + set_current_state(state);
  17795. hrtimer_start_expires(&t->timer, mode);
  17796. if (likely(t->task))
  17797. @@ -1519,7 +1717,8 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
  17798. HRTIMER_MODE_ABS);
  17799. hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
  17800. - if (do_nanosleep(&t, HRTIMER_MODE_ABS))
  17801. + /* cpu_chill() does not care about restart state. */
  17802. + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE))
  17803. goto out;
  17804. rmtp = restart->nanosleep.rmtp;
  17805. @@ -1536,8 +1735,10 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
  17806. return ret;
  17807. }
  17808. -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  17809. - const enum hrtimer_mode mode, const clockid_t clockid)
  17810. +static long
  17811. +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  17812. + const enum hrtimer_mode mode, const clockid_t clockid,
  17813. + unsigned long state)
  17814. {
  17815. struct restart_block *restart;
  17816. struct hrtimer_sleeper t;
  17817. @@ -1550,7 +1751,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  17818. hrtimer_init_on_stack(&t.timer, clockid, mode);
  17819. hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
  17820. - if (do_nanosleep(&t, mode))
  17821. + if (do_nanosleep(&t, mode, state))
  17822. goto out;
  17823. /* Absolute timers do not update the rmtp value and restart: */
  17824. @@ -1577,6 +1778,12 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  17825. return ret;
  17826. }
  17827. +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  17828. + const enum hrtimer_mode mode, const clockid_t clockid)
  17829. +{
  17830. + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE);
  17831. +}
  17832. +
  17833. SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
  17834. struct timespec __user *, rmtp)
  17835. {
  17836. @@ -1591,6 +1798,26 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
  17837. return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
  17838. }
  17839. +#ifdef CONFIG_PREEMPT_RT_FULL
  17840. +/*
  17841. + * Sleep for 1 ms in hope whoever holds what we want will let it go.
  17842. + */
  17843. +void cpu_chill(void)
  17844. +{
  17845. + struct timespec tu = {
  17846. + .tv_nsec = NSEC_PER_MSEC,
  17847. + };
  17848. + unsigned int freeze_flag = current->flags & PF_NOFREEZE;
  17849. +
  17850. + current->flags |= PF_NOFREEZE;
  17851. + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
  17852. + TASK_UNINTERRUPTIBLE);
  17853. + if (!freeze_flag)
  17854. + current->flags &= ~PF_NOFREEZE;
  17855. +}
  17856. +EXPORT_SYMBOL(cpu_chill);
  17857. +#endif
  17858. +
  17859. /*
  17860. * Functions related to boot-time initialization:
  17861. */
  17862. @@ -1602,10 +1829,14 @@ static void init_hrtimers_cpu(int cpu)
  17863. for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
  17864. cpu_base->clock_base[i].cpu_base = cpu_base;
  17865. timerqueue_init_head(&cpu_base->clock_base[i].active);
  17866. + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
  17867. }
  17868. cpu_base->cpu = cpu;
  17869. hrtimer_init_hres(cpu_base);
  17870. +#ifdef CONFIG_PREEMPT_RT_BASE
  17871. + init_waitqueue_head(&cpu_base->wait);
  17872. +#endif
  17873. }
  17874. #ifdef CONFIG_HOTPLUG_CPU
  17875. @@ -1703,11 +1934,21 @@ static struct notifier_block hrtimers_nb = {
  17876. .notifier_call = hrtimer_cpu_notify,
  17877. };
  17878. +#ifdef CONFIG_PREEMPT_RT_BASE
  17879. +static void run_hrtimer_softirq(struct softirq_action *h)
  17880. +{
  17881. + hrtimer_rt_run_pending();
  17882. +}
  17883. +#endif
  17884. +
  17885. void __init hrtimers_init(void)
  17886. {
  17887. hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
  17888. (void *)(long)smp_processor_id());
  17889. register_cpu_notifier(&hrtimers_nb);
  17890. +#ifdef CONFIG_PREEMPT_RT_BASE
  17891. + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
  17892. +#endif
  17893. }
  17894. /**
  17895. diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
  17896. index 1d5c7204ddc9..184de6751180 100644
  17897. --- a/kernel/time/itimer.c
  17898. +++ b/kernel/time/itimer.c
  17899. @@ -213,6 +213,7 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
  17900. /* We are sharing ->siglock with it_real_fn() */
  17901. if (hrtimer_try_to_cancel(timer) < 0) {
  17902. spin_unlock_irq(&tsk->sighand->siglock);
  17903. + hrtimer_wait_for_timer(&tsk->signal->real_timer);
  17904. goto again;
  17905. }
  17906. expires = timeval_to_ktime(value->it_value);
  17907. diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
  17908. index 555e21f7b966..a5d6435fabbb 100644
  17909. --- a/kernel/time/jiffies.c
  17910. +++ b/kernel/time/jiffies.c
  17911. @@ -74,7 +74,8 @@ static struct clocksource clocksource_jiffies = {
  17912. .max_cycles = 10,
  17913. };
  17914. -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
  17915. +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
  17916. +__cacheline_aligned_in_smp seqcount_t jiffies_seq;
  17917. #if (BITS_PER_LONG < 64)
  17918. u64 get_jiffies_64(void)
  17919. @@ -83,9 +84,9 @@ u64 get_jiffies_64(void)
  17920. u64 ret;
  17921. do {
  17922. - seq = read_seqbegin(&jiffies_lock);
  17923. + seq = read_seqcount_begin(&jiffies_seq);
  17924. ret = jiffies_64;
  17925. - } while (read_seqretry(&jiffies_lock, seq));
  17926. + } while (read_seqcount_retry(&jiffies_seq, seq));
  17927. return ret;
  17928. }
  17929. EXPORT_SYMBOL(get_jiffies_64);
  17930. diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
  17931. index 6df8927c58a5..6b4c19fa233f 100644
  17932. --- a/kernel/time/ntp.c
  17933. +++ b/kernel/time/ntp.c
  17934. @@ -10,6 +10,7 @@
  17935. #include <linux/workqueue.h>
  17936. #include <linux/hrtimer.h>
  17937. #include <linux/jiffies.h>
  17938. +#include <linux/kthread.h>
  17939. #include <linux/math64.h>
  17940. #include <linux/timex.h>
  17941. #include <linux/time.h>
  17942. @@ -568,10 +569,52 @@ static void sync_cmos_clock(struct work_struct *work)
  17943. &sync_cmos_work, timespec64_to_jiffies(&next));
  17944. }
  17945. +#ifdef CONFIG_PREEMPT_RT_FULL
  17946. +/*
  17947. + * RT can not call schedule_delayed_work from real interrupt context.
  17948. + * Need to make a thread to do the real work.
  17949. + */
  17950. +static struct task_struct *cmos_delay_thread;
  17951. +static bool do_cmos_delay;
  17952. +
  17953. +static int run_cmos_delay(void *ignore)
  17954. +{
  17955. + while (!kthread_should_stop()) {
  17956. + set_current_state(TASK_INTERRUPTIBLE);
  17957. + if (do_cmos_delay) {
  17958. + do_cmos_delay = false;
  17959. + queue_delayed_work(system_power_efficient_wq,
  17960. + &sync_cmos_work, 0);
  17961. + }
  17962. + schedule();
  17963. + }
  17964. + __set_current_state(TASK_RUNNING);
  17965. + return 0;
  17966. +}
  17967. +
  17968. +void ntp_notify_cmos_timer(void)
  17969. +{
  17970. + do_cmos_delay = true;
  17971. + /* Make visible before waking up process */
  17972. + smp_wmb();
  17973. + wake_up_process(cmos_delay_thread);
  17974. +}
  17975. +
  17976. +static __init int create_cmos_delay_thread(void)
  17977. +{
  17978. + cmos_delay_thread = kthread_run(run_cmos_delay, NULL, "kcmosdelayd");
  17979. + BUG_ON(!cmos_delay_thread);
  17980. + return 0;
  17981. +}
  17982. +early_initcall(create_cmos_delay_thread);
  17983. +
  17984. +#else
  17985. +
  17986. void ntp_notify_cmos_timer(void)
  17987. {
  17988. queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
  17989. }
  17990. +#endif /* CONFIG_PREEMPT_RT_FULL */
  17991. #else
  17992. void ntp_notify_cmos_timer(void) { }
  17993. diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
  17994. index 1cafba860b08..5f16807aa3ff 100644
  17995. --- a/kernel/time/posix-cpu-timers.c
  17996. +++ b/kernel/time/posix-cpu-timers.c
  17997. @@ -3,6 +3,7 @@
  17998. */
  17999. #include <linux/sched.h>
  18000. +#include <linux/sched/rt.h>
  18001. #include <linux/posix-timers.h>
  18002. #include <linux/errno.h>
  18003. #include <linux/math64.h>
  18004. @@ -620,7 +621,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
  18005. /*
  18006. * Disarm any old timer after extracting its expiry time.
  18007. */
  18008. - WARN_ON_ONCE(!irqs_disabled());
  18009. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  18010. ret = 0;
  18011. old_incr = timer->it.cpu.incr;
  18012. @@ -1063,7 +1064,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
  18013. /*
  18014. * Now re-arm for the new expiry time.
  18015. */
  18016. - WARN_ON_ONCE(!irqs_disabled());
  18017. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  18018. arm_timer(timer);
  18019. unlock_task_sighand(p, &flags);
  18020. @@ -1152,13 +1153,13 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
  18021. * already updated our counts. We need to check if any timers fire now.
  18022. * Interrupts are disabled.
  18023. */
  18024. -void run_posix_cpu_timers(struct task_struct *tsk)
  18025. +static void __run_posix_cpu_timers(struct task_struct *tsk)
  18026. {
  18027. LIST_HEAD(firing);
  18028. struct k_itimer *timer, *next;
  18029. unsigned long flags;
  18030. - WARN_ON_ONCE(!irqs_disabled());
  18031. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  18032. /*
  18033. * The fast path checks that there are no expired thread or thread
  18034. @@ -1212,6 +1213,190 @@ void run_posix_cpu_timers(struct task_struct *tsk)
  18035. }
  18036. }
  18037. +#ifdef CONFIG_PREEMPT_RT_BASE
  18038. +#include <linux/kthread.h>
  18039. +#include <linux/cpu.h>
  18040. +DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
  18041. +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
  18042. +
  18043. +static int posix_cpu_timers_thread(void *data)
  18044. +{
  18045. + int cpu = (long)data;
  18046. +
  18047. + BUG_ON(per_cpu(posix_timer_task,cpu) != current);
  18048. +
  18049. + while (!kthread_should_stop()) {
  18050. + struct task_struct *tsk = NULL;
  18051. + struct task_struct *next = NULL;
  18052. +
  18053. + if (cpu_is_offline(cpu))
  18054. + goto wait_to_die;
  18055. +
  18056. + /* grab task list */
  18057. + raw_local_irq_disable();
  18058. + tsk = per_cpu(posix_timer_tasklist, cpu);
  18059. + per_cpu(posix_timer_tasklist, cpu) = NULL;
  18060. + raw_local_irq_enable();
  18061. +
  18062. + /* its possible the list is empty, just return */
  18063. + if (!tsk) {
  18064. + set_current_state(TASK_INTERRUPTIBLE);
  18065. + schedule();
  18066. + __set_current_state(TASK_RUNNING);
  18067. + continue;
  18068. + }
  18069. +
  18070. + /* Process task list */
  18071. + while (1) {
  18072. + /* save next */
  18073. + next = tsk->posix_timer_list;
  18074. +
  18075. + /* run the task timers, clear its ptr and
  18076. + * unreference it
  18077. + */
  18078. + __run_posix_cpu_timers(tsk);
  18079. + tsk->posix_timer_list = NULL;
  18080. + put_task_struct(tsk);
  18081. +
  18082. + /* check if this is the last on the list */
  18083. + if (next == tsk)
  18084. + break;
  18085. + tsk = next;
  18086. + }
  18087. + }
  18088. + return 0;
  18089. +
  18090. +wait_to_die:
  18091. + /* Wait for kthread_stop */
  18092. + set_current_state(TASK_INTERRUPTIBLE);
  18093. + while (!kthread_should_stop()) {
  18094. + schedule();
  18095. + set_current_state(TASK_INTERRUPTIBLE);
  18096. + }
  18097. + __set_current_state(TASK_RUNNING);
  18098. + return 0;
  18099. +}
  18100. +
  18101. +static inline int __fastpath_timer_check(struct task_struct *tsk)
  18102. +{
  18103. + /* tsk == current, ensure it is safe to use ->signal/sighand */
  18104. + if (unlikely(tsk->exit_state))
  18105. + return 0;
  18106. +
  18107. + if (!task_cputime_zero(&tsk->cputime_expires))
  18108. + return 1;
  18109. +
  18110. + if (!task_cputime_zero(&tsk->signal->cputime_expires))
  18111. + return 1;
  18112. +
  18113. + return 0;
  18114. +}
  18115. +
  18116. +void run_posix_cpu_timers(struct task_struct *tsk)
  18117. +{
  18118. + unsigned long cpu = smp_processor_id();
  18119. + struct task_struct *tasklist;
  18120. +
  18121. + BUG_ON(!irqs_disabled());
  18122. + if(!per_cpu(posix_timer_task, cpu))
  18123. + return;
  18124. + /* get per-cpu references */
  18125. + tasklist = per_cpu(posix_timer_tasklist, cpu);
  18126. +
  18127. + /* check to see if we're already queued */
  18128. + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
  18129. + get_task_struct(tsk);
  18130. + if (tasklist) {
  18131. + tsk->posix_timer_list = tasklist;
  18132. + } else {
  18133. + /*
  18134. + * The list is terminated by a self-pointing
  18135. + * task_struct
  18136. + */
  18137. + tsk->posix_timer_list = tsk;
  18138. + }
  18139. + per_cpu(posix_timer_tasklist, cpu) = tsk;
  18140. +
  18141. + wake_up_process(per_cpu(posix_timer_task, cpu));
  18142. + }
  18143. +}
  18144. +
  18145. +/*
  18146. + * posix_cpu_thread_call - callback that gets triggered when a CPU is added.
  18147. + * Here we can start up the necessary migration thread for the new CPU.
  18148. + */
  18149. +static int posix_cpu_thread_call(struct notifier_block *nfb,
  18150. + unsigned long action, void *hcpu)
  18151. +{
  18152. + int cpu = (long)hcpu;
  18153. + struct task_struct *p;
  18154. + struct sched_param param;
  18155. +
  18156. + switch (action) {
  18157. + case CPU_UP_PREPARE:
  18158. + p = kthread_create(posix_cpu_timers_thread, hcpu,
  18159. + "posixcputmr/%d",cpu);
  18160. + if (IS_ERR(p))
  18161. + return NOTIFY_BAD;
  18162. + p->flags |= PF_NOFREEZE;
  18163. + kthread_bind(p, cpu);
  18164. + /* Must be high prio to avoid getting starved */
  18165. + param.sched_priority = MAX_RT_PRIO-1;
  18166. + sched_setscheduler(p, SCHED_FIFO, &param);
  18167. + per_cpu(posix_timer_task,cpu) = p;
  18168. + break;
  18169. + case CPU_ONLINE:
  18170. + /* Strictly unneccessary, as first user will wake it. */
  18171. + wake_up_process(per_cpu(posix_timer_task,cpu));
  18172. + break;
  18173. +#ifdef CONFIG_HOTPLUG_CPU
  18174. + case CPU_UP_CANCELED:
  18175. + /* Unbind it from offline cpu so it can run. Fall thru. */
  18176. + kthread_bind(per_cpu(posix_timer_task, cpu),
  18177. + cpumask_any(cpu_online_mask));
  18178. + kthread_stop(per_cpu(posix_timer_task,cpu));
  18179. + per_cpu(posix_timer_task,cpu) = NULL;
  18180. + break;
  18181. + case CPU_DEAD:
  18182. + kthread_stop(per_cpu(posix_timer_task,cpu));
  18183. + per_cpu(posix_timer_task,cpu) = NULL;
  18184. + break;
  18185. +#endif
  18186. + }
  18187. + return NOTIFY_OK;
  18188. +}
  18189. +
  18190. +/* Register at highest priority so that task migration (migrate_all_tasks)
  18191. + * happens before everything else.
  18192. + */
  18193. +static struct notifier_block posix_cpu_thread_notifier = {
  18194. + .notifier_call = posix_cpu_thread_call,
  18195. + .priority = 10
  18196. +};
  18197. +
  18198. +static int __init posix_cpu_thread_init(void)
  18199. +{
  18200. + void *hcpu = (void *)(long)smp_processor_id();
  18201. + /* Start one for boot CPU. */
  18202. + unsigned long cpu;
  18203. +
  18204. + /* init the per-cpu posix_timer_tasklets */
  18205. + for_each_possible_cpu(cpu)
  18206. + per_cpu(posix_timer_tasklist, cpu) = NULL;
  18207. +
  18208. + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu);
  18209. + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu);
  18210. + register_cpu_notifier(&posix_cpu_thread_notifier);
  18211. + return 0;
  18212. +}
  18213. +early_initcall(posix_cpu_thread_init);
  18214. +#else /* CONFIG_PREEMPT_RT_BASE */
  18215. +void run_posix_cpu_timers(struct task_struct *tsk)
  18216. +{
  18217. + __run_posix_cpu_timers(tsk);
  18218. +}
  18219. +#endif /* CONFIG_PREEMPT_RT_BASE */
  18220. +
  18221. /*
  18222. * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
  18223. * The tsk->sighand->siglock must be held by the caller.
  18224. diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
  18225. index f2826c35e918..464a98155a0e 100644
  18226. --- a/kernel/time/posix-timers.c
  18227. +++ b/kernel/time/posix-timers.c
  18228. @@ -506,6 +506,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
  18229. static struct pid *good_sigevent(sigevent_t * event)
  18230. {
  18231. struct task_struct *rtn = current->group_leader;
  18232. + int sig = event->sigev_signo;
  18233. if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
  18234. (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
  18235. @@ -514,7 +515,8 @@ static struct pid *good_sigevent(sigevent_t * event)
  18236. return NULL;
  18237. if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
  18238. - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
  18239. + (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) ||
  18240. + sig_kernel_coredump(sig)))
  18241. return NULL;
  18242. return task_pid(rtn);
  18243. @@ -826,6 +828,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
  18244. return overrun;
  18245. }
  18246. +/*
  18247. + * Protected by RCU!
  18248. + */
  18249. +static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
  18250. +{
  18251. +#ifdef CONFIG_PREEMPT_RT_FULL
  18252. + if (kc->timer_set == common_timer_set)
  18253. + hrtimer_wait_for_timer(&timr->it.real.timer);
  18254. + else
  18255. + /* FIXME: Whacky hack for posix-cpu-timers */
  18256. + schedule_timeout(1);
  18257. +#endif
  18258. +}
  18259. +
  18260. /* Set a POSIX.1b interval timer. */
  18261. /* timr->it_lock is taken. */
  18262. static int
  18263. @@ -903,6 +919,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
  18264. if (!timr)
  18265. return -EINVAL;
  18266. + rcu_read_lock();
  18267. kc = clockid_to_kclock(timr->it_clock);
  18268. if (WARN_ON_ONCE(!kc || !kc->timer_set))
  18269. error = -EINVAL;
  18270. @@ -911,9 +928,12 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
  18271. unlock_timer(timr, flag);
  18272. if (error == TIMER_RETRY) {
  18273. + timer_wait_for_callback(kc, timr);
  18274. rtn = NULL; // We already got the old time...
  18275. + rcu_read_unlock();
  18276. goto retry;
  18277. }
  18278. + rcu_read_unlock();
  18279. if (old_setting && !error &&
  18280. copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
  18281. @@ -951,10 +971,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
  18282. if (!timer)
  18283. return -EINVAL;
  18284. + rcu_read_lock();
  18285. if (timer_delete_hook(timer) == TIMER_RETRY) {
  18286. unlock_timer(timer, flags);
  18287. + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
  18288. + timer);
  18289. + rcu_read_unlock();
  18290. goto retry_delete;
  18291. }
  18292. + rcu_read_unlock();
  18293. spin_lock(&current->sighand->siglock);
  18294. list_del(&timer->list);
  18295. @@ -980,8 +1005,18 @@ static void itimer_delete(struct k_itimer *timer)
  18296. retry_delete:
  18297. spin_lock_irqsave(&timer->it_lock, flags);
  18298. - if (timer_delete_hook(timer) == TIMER_RETRY) {
  18299. + /* On RT we can race with a deletion */
  18300. + if (!timer->it_signal) {
  18301. unlock_timer(timer, flags);
  18302. + return;
  18303. + }
  18304. +
  18305. + if (timer_delete_hook(timer) == TIMER_RETRY) {
  18306. + rcu_read_lock();
  18307. + unlock_timer(timer, flags);
  18308. + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
  18309. + timer);
  18310. + rcu_read_unlock();
  18311. goto retry_delete;
  18312. }
  18313. list_del(&timer->list);
  18314. diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
  18315. index 53d7184da0be..1b4ac3361c3f 100644
  18316. --- a/kernel/time/tick-broadcast-hrtimer.c
  18317. +++ b/kernel/time/tick-broadcast-hrtimer.c
  18318. @@ -106,5 +106,6 @@ void tick_setup_hrtimer_broadcast(void)
  18319. {
  18320. hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  18321. bctimer.function = bc_handler;
  18322. + bctimer.irqsafe = true;
  18323. clockevents_register_device(&ce_broadcast_hrtimer);
  18324. }
  18325. diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
  18326. index 4fcd99e12aa0..5a47f2e98faf 100644
  18327. --- a/kernel/time/tick-common.c
  18328. +++ b/kernel/time/tick-common.c
  18329. @@ -79,13 +79,15 @@ int tick_is_oneshot_available(void)
  18330. static void tick_periodic(int cpu)
  18331. {
  18332. if (tick_do_timer_cpu == cpu) {
  18333. - write_seqlock(&jiffies_lock);
  18334. + raw_spin_lock(&jiffies_lock);
  18335. + write_seqcount_begin(&jiffies_seq);
  18336. /* Keep track of the next tick event */
  18337. tick_next_period = ktime_add(tick_next_period, tick_period);
  18338. do_timer(1);
  18339. - write_sequnlock(&jiffies_lock);
  18340. + write_seqcount_end(&jiffies_seq);
  18341. + raw_spin_unlock(&jiffies_lock);
  18342. update_wall_time();
  18343. }
  18344. @@ -157,9 +159,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
  18345. ktime_t next;
  18346. do {
  18347. - seq = read_seqbegin(&jiffies_lock);
  18348. + seq = read_seqcount_begin(&jiffies_seq);
  18349. next = tick_next_period;
  18350. - } while (read_seqretry(&jiffies_lock, seq));
  18351. + } while (read_seqcount_retry(&jiffies_seq, seq));
  18352. clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
  18353. diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
  18354. index 966a5a6fdd0a..f738251000fe 100644
  18355. --- a/kernel/time/tick-internal.h
  18356. +++ b/kernel/time/tick-internal.h
  18357. @@ -164,3 +164,4 @@ static inline void timers_update_migration(bool update_nohz) { }
  18358. DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
  18359. extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
  18360. +void timer_clear_idle(void);
  18361. diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
  18362. index 58e3310c9b21..6fc9a5fdd5f4 100644
  18363. --- a/kernel/time/tick-sched.c
  18364. +++ b/kernel/time/tick-sched.c
  18365. @@ -62,7 +62,8 @@ static void tick_do_update_jiffies64(ktime_t now)
  18366. return;
  18367. /* Reevalute with jiffies_lock held */
  18368. - write_seqlock(&jiffies_lock);
  18369. + raw_spin_lock(&jiffies_lock);
  18370. + write_seqcount_begin(&jiffies_seq);
  18371. delta = ktime_sub(now, last_jiffies_update);
  18372. if (delta.tv64 >= tick_period.tv64) {
  18373. @@ -85,10 +86,12 @@ static void tick_do_update_jiffies64(ktime_t now)
  18374. /* Keep the tick_next_period variable up to date */
  18375. tick_next_period = ktime_add(last_jiffies_update, tick_period);
  18376. } else {
  18377. - write_sequnlock(&jiffies_lock);
  18378. + write_seqcount_end(&jiffies_seq);
  18379. + raw_spin_unlock(&jiffies_lock);
  18380. return;
  18381. }
  18382. - write_sequnlock(&jiffies_lock);
  18383. + write_seqcount_end(&jiffies_seq);
  18384. + raw_spin_unlock(&jiffies_lock);
  18385. update_wall_time();
  18386. }
  18387. @@ -99,12 +102,14 @@ static ktime_t tick_init_jiffy_update(void)
  18388. {
  18389. ktime_t period;
  18390. - write_seqlock(&jiffies_lock);
  18391. + raw_spin_lock(&jiffies_lock);
  18392. + write_seqcount_begin(&jiffies_seq);
  18393. /* Did we start the jiffies update yet ? */
  18394. if (last_jiffies_update.tv64 == 0)
  18395. last_jiffies_update = tick_next_period;
  18396. period = last_jiffies_update;
  18397. - write_sequnlock(&jiffies_lock);
  18398. + write_seqcount_end(&jiffies_seq);
  18399. + raw_spin_unlock(&jiffies_lock);
  18400. return period;
  18401. }
  18402. @@ -212,6 +217,7 @@ static void nohz_full_kick_func(struct irq_work *work)
  18403. static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
  18404. .func = nohz_full_kick_func,
  18405. + .flags = IRQ_WORK_HARD_IRQ,
  18406. };
  18407. /*
  18408. @@ -670,10 +676,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
  18409. /* Read jiffies and the time when jiffies were updated last */
  18410. do {
  18411. - seq = read_seqbegin(&jiffies_lock);
  18412. + seq = read_seqcount_begin(&jiffies_seq);
  18413. basemono = last_jiffies_update.tv64;
  18414. basejiff = jiffies;
  18415. - } while (read_seqretry(&jiffies_lock, seq));
  18416. + } while (read_seqcount_retry(&jiffies_seq, seq));
  18417. ts->last_jiffies = basejiff;
  18418. if (rcu_needs_cpu(basemono, &next_rcu) ||
  18419. @@ -700,6 +706,12 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
  18420. delta = next_tick - basemono;
  18421. if (delta <= (u64)TICK_NSEC) {
  18422. tick.tv64 = 0;
  18423. +
  18424. + /*
  18425. + * Tell the timer code that the base is not idle, i.e. undo
  18426. + * the effect of get_next_timer_interrupt().
  18427. + */
  18428. + timer_clear_idle();
  18429. /*
  18430. * We've not stopped the tick yet, and there's a timer in the
  18431. * next period, so no point in stopping it either, bail.
  18432. @@ -808,6 +820,12 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int
  18433. tick_do_update_jiffies64(now);
  18434. update_cpu_load_nohz(active);
  18435. + /*
  18436. + * Clear the timer idle flag, so we avoid IPIs on remote queueing and
  18437. + * the clock forward checks in the enqueue path.
  18438. + */
  18439. + timer_clear_idle();
  18440. +
  18441. calc_load_exit_idle();
  18442. touch_softlockup_watchdog_sched();
  18443. /*
  18444. @@ -861,14 +879,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
  18445. return false;
  18446. if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
  18447. - static int ratelimit;
  18448. -
  18449. - if (ratelimit < 10 &&
  18450. - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  18451. - pr_warn("NOHZ: local_softirq_pending %02x\n",
  18452. - (unsigned int) local_softirq_pending());
  18453. - ratelimit++;
  18454. - }
  18455. + softirq_check_pending_idle();
  18456. return false;
  18457. }
  18458. @@ -1091,35 +1102,6 @@ static void tick_nohz_switch_to_nohz(void)
  18459. tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
  18460. }
  18461. -/*
  18462. - * When NOHZ is enabled and the tick is stopped, we need to kick the
  18463. - * tick timer from irq_enter() so that the jiffies update is kept
  18464. - * alive during long running softirqs. That's ugly as hell, but
  18465. - * correctness is key even if we need to fix the offending softirq in
  18466. - * the first place.
  18467. - *
  18468. - * Note, this is different to tick_nohz_restart. We just kick the
  18469. - * timer and do not touch the other magic bits which need to be done
  18470. - * when idle is left.
  18471. - */
  18472. -static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now)
  18473. -{
  18474. -#if 0
  18475. - /* Switch back to 2.6.27 behaviour */
  18476. - ktime_t delta;
  18477. -
  18478. - /*
  18479. - * Do not touch the tick device, when the next expiry is either
  18480. - * already reached or less/equal than the tick period.
  18481. - */
  18482. - delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now);
  18483. - if (delta.tv64 <= tick_period.tv64)
  18484. - return;
  18485. -
  18486. - tick_nohz_restart(ts, now);
  18487. -#endif
  18488. -}
  18489. -
  18490. static inline void tick_nohz_irq_enter(void)
  18491. {
  18492. struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
  18493. @@ -1130,10 +1112,8 @@ static inline void tick_nohz_irq_enter(void)
  18494. now = ktime_get();
  18495. if (ts->idle_active)
  18496. tick_nohz_stop_idle(ts, now);
  18497. - if (ts->tick_stopped) {
  18498. + if (ts->tick_stopped)
  18499. tick_nohz_update_jiffies(now);
  18500. - tick_nohz_kick_tick(ts, now);
  18501. - }
  18502. }
  18503. #else
  18504. @@ -1208,6 +1188,7 @@ void tick_setup_sched_timer(void)
  18505. * Emulate tick processing via per-CPU hrtimers:
  18506. */
  18507. hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  18508. + ts->sched_timer.irqsafe = 1;
  18509. ts->sched_timer.function = tick_sched_timer;
  18510. /* Get the next period (per cpu) */
  18511. diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
  18512. index 479d25cd3d4f..410c04b386e4 100644
  18513. --- a/kernel/time/timekeeping.c
  18514. +++ b/kernel/time/timekeeping.c
  18515. @@ -2319,8 +2319,10 @@ EXPORT_SYMBOL(hardpps);
  18516. */
  18517. void xtime_update(unsigned long ticks)
  18518. {
  18519. - write_seqlock(&jiffies_lock);
  18520. + raw_spin_lock(&jiffies_lock);
  18521. + write_seqcount_begin(&jiffies_seq);
  18522. do_timer(ticks);
  18523. - write_sequnlock(&jiffies_lock);
  18524. + write_seqcount_end(&jiffies_seq);
  18525. + raw_spin_unlock(&jiffies_lock);
  18526. update_wall_time();
  18527. }
  18528. diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
  18529. index 704f595ce83f..763a3e5121ff 100644
  18530. --- a/kernel/time/timekeeping.h
  18531. +++ b/kernel/time/timekeeping.h
  18532. @@ -19,7 +19,8 @@ extern void timekeeping_resume(void);
  18533. extern void do_timer(unsigned long ticks);
  18534. extern void update_wall_time(void);
  18535. -extern seqlock_t jiffies_lock;
  18536. +extern raw_spinlock_t jiffies_lock;
  18537. +extern seqcount_t jiffies_seq;
  18538. #define CS_NAME_LEN 32
  18539. diff --git a/kernel/time/timer.c b/kernel/time/timer.c
  18540. index 73164c3aa56b..b1d2d4026cfe 100644
  18541. --- a/kernel/time/timer.c
  18542. +++ b/kernel/time/timer.c
  18543. @@ -59,43 +59,156 @@ __visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
  18544. EXPORT_SYMBOL(jiffies_64);
  18545. /*
  18546. - * per-CPU timer vector definitions:
  18547. + * The timer wheel has LVL_DEPTH array levels. Each level provides an array of
  18548. + * LVL_SIZE buckets. Each level is driven by its own clock and therefor each
  18549. + * level has a different granularity.
  18550. + *
  18551. + * The level granularity is: LVL_CLK_DIV ^ lvl
  18552. + * The level clock frequency is: HZ / (LVL_CLK_DIV ^ level)
  18553. + *
  18554. + * The array level of a newly armed timer depends on the relative expiry
  18555. + * time. The farther the expiry time is away the higher the array level and
  18556. + * therefor the granularity becomes.
  18557. + *
  18558. + * Contrary to the original timer wheel implementation, which aims for 'exact'
  18559. + * expiry of the timers, this implementation removes the need for recascading
  18560. + * the timers into the lower array levels. The previous 'classic' timer wheel
  18561. + * implementation of the kernel already violated the 'exact' expiry by adding
  18562. + * slack to the expiry time to provide batched expiration. The granularity
  18563. + * levels provide implicit batching.
  18564. + *
  18565. + * This is an optimization of the original timer wheel implementation for the
  18566. + * majority of the timer wheel use cases: timeouts. The vast majority of
  18567. + * timeout timers (networking, disk I/O ...) are canceled before expiry. If
  18568. + * the timeout expires it indicates that normal operation is disturbed, so it
  18569. + * does not matter much whether the timeout comes with a slight delay.
  18570. + *
  18571. + * The only exception to this are networking timers with a small expiry
  18572. + * time. They rely on the granularity. Those fit into the first wheel level,
  18573. + * which has HZ granularity.
  18574. + *
  18575. + * We don't have cascading anymore. timers with a expiry time above the
  18576. + * capacity of the last wheel level are force expired at the maximum timeout
  18577. + * value of the last wheel level. From data sampling we know that the maximum
  18578. + * value observed is 5 days (network connection tracking), so this should not
  18579. + * be an issue.
  18580. + *
  18581. + * The currently chosen array constants values are a good compromise between
  18582. + * array size and granularity.
  18583. + *
  18584. + * This results in the following granularity and range levels:
  18585. + *
  18586. + * HZ 1000 steps
  18587. + * Level Offset Granularity Range
  18588. + * 0 0 1 ms 0 ms - 63 ms
  18589. + * 1 64 8 ms 64 ms - 511 ms
  18590. + * 2 128 64 ms 512 ms - 4095 ms (512ms - ~4s)
  18591. + * 3 192 512 ms 4096 ms - 32767 ms (~4s - ~32s)
  18592. + * 4 256 4096 ms (~4s) 32768 ms - 262143 ms (~32s - ~4m)
  18593. + * 5 320 32768 ms (~32s) 262144 ms - 2097151 ms (~4m - ~34m)
  18594. + * 6 384 262144 ms (~4m) 2097152 ms - 16777215 ms (~34m - ~4h)
  18595. + * 7 448 2097152 ms (~34m) 16777216 ms - 134217727 ms (~4h - ~1d)
  18596. + * 8 512 16777216 ms (~4h) 134217728 ms - 1073741822 ms (~1d - ~12d)
  18597. + *
  18598. + * HZ 300
  18599. + * Level Offset Granularity Range
  18600. + * 0 0 3 ms 0 ms - 210 ms
  18601. + * 1 64 26 ms 213 ms - 1703 ms (213ms - ~1s)
  18602. + * 2 128 213 ms 1706 ms - 13650 ms (~1s - ~13s)
  18603. + * 3 192 1706 ms (~1s) 13653 ms - 109223 ms (~13s - ~1m)
  18604. + * 4 256 13653 ms (~13s) 109226 ms - 873810 ms (~1m - ~14m)
  18605. + * 5 320 109226 ms (~1m) 873813 ms - 6990503 ms (~14m - ~1h)
  18606. + * 6 384 873813 ms (~14m) 6990506 ms - 55924050 ms (~1h - ~15h)
  18607. + * 7 448 6990506 ms (~1h) 55924053 ms - 447392423 ms (~15h - ~5d)
  18608. + * 8 512 55924053 ms (~15h) 447392426 ms - 3579139406 ms (~5d - ~41d)
  18609. + *
  18610. + * HZ 250
  18611. + * Level Offset Granularity Range
  18612. + * 0 0 4 ms 0 ms - 255 ms
  18613. + * 1 64 32 ms 256 ms - 2047 ms (256ms - ~2s)
  18614. + * 2 128 256 ms 2048 ms - 16383 ms (~2s - ~16s)
  18615. + * 3 192 2048 ms (~2s) 16384 ms - 131071 ms (~16s - ~2m)
  18616. + * 4 256 16384 ms (~16s) 131072 ms - 1048575 ms (~2m - ~17m)
  18617. + * 5 320 131072 ms (~2m) 1048576 ms - 8388607 ms (~17m - ~2h)
  18618. + * 6 384 1048576 ms (~17m) 8388608 ms - 67108863 ms (~2h - ~18h)
  18619. + * 7 448 8388608 ms (~2h) 67108864 ms - 536870911 ms (~18h - ~6d)
  18620. + * 8 512 67108864 ms (~18h) 536870912 ms - 4294967288 ms (~6d - ~49d)
  18621. + *
  18622. + * HZ 100
  18623. + * Level Offset Granularity Range
  18624. + * 0 0 10 ms 0 ms - 630 ms
  18625. + * 1 64 80 ms 640 ms - 5110 ms (640ms - ~5s)
  18626. + * 2 128 640 ms 5120 ms - 40950 ms (~5s - ~40s)
  18627. + * 3 192 5120 ms (~5s) 40960 ms - 327670 ms (~40s - ~5m)
  18628. + * 4 256 40960 ms (~40s) 327680 ms - 2621430 ms (~5m - ~43m)
  18629. + * 5 320 327680 ms (~5m) 2621440 ms - 20971510 ms (~43m - ~5h)
  18630. + * 6 384 2621440 ms (~43m) 20971520 ms - 167772150 ms (~5h - ~1d)
  18631. + * 7 448 20971520 ms (~5h) 167772160 ms - 1342177270 ms (~1d - ~15d)
  18632. */
  18633. -#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
  18634. -#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
  18635. -#define TVN_SIZE (1 << TVN_BITS)
  18636. -#define TVR_SIZE (1 << TVR_BITS)
  18637. -#define TVN_MASK (TVN_SIZE - 1)
  18638. -#define TVR_MASK (TVR_SIZE - 1)
  18639. -#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
  18640. -struct tvec {
  18641. - struct hlist_head vec[TVN_SIZE];
  18642. -};
  18643. +/* Clock divisor for the next level */
  18644. +#define LVL_CLK_SHIFT 3
  18645. +#define LVL_CLK_DIV (1UL << LVL_CLK_SHIFT)
  18646. +#define LVL_CLK_MASK (LVL_CLK_DIV - 1)
  18647. +#define LVL_SHIFT(n) ((n) * LVL_CLK_SHIFT)
  18648. +#define LVL_GRAN(n) (1UL << LVL_SHIFT(n))
  18649. -struct tvec_root {
  18650. - struct hlist_head vec[TVR_SIZE];
  18651. -};
  18652. +/*
  18653. + * The time start value for each level to select the bucket at enqueue
  18654. + * time.
  18655. + */
  18656. +#define LVL_START(n) ((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT))
  18657. -struct tvec_base {
  18658. - spinlock_t lock;
  18659. - struct timer_list *running_timer;
  18660. - unsigned long timer_jiffies;
  18661. - unsigned long next_timer;
  18662. - unsigned long active_timers;
  18663. - unsigned long all_timers;
  18664. - int cpu;
  18665. - bool migration_enabled;
  18666. - bool nohz_active;
  18667. - struct tvec_root tv1;
  18668. - struct tvec tv2;
  18669. - struct tvec tv3;
  18670. - struct tvec tv4;
  18671. - struct tvec tv5;
  18672. +/* Size of each clock level */
  18673. +#define LVL_BITS 6
  18674. +#define LVL_SIZE (1UL << LVL_BITS)
  18675. +#define LVL_MASK (LVL_SIZE - 1)
  18676. +#define LVL_OFFS(n) ((n) * LVL_SIZE)
  18677. +
  18678. +/* Level depth */
  18679. +#if HZ > 100
  18680. +# define LVL_DEPTH 9
  18681. +# else
  18682. +# define LVL_DEPTH 8
  18683. +#endif
  18684. +
  18685. +/* The cutoff (max. capacity of the wheel) */
  18686. +#define WHEEL_TIMEOUT_CUTOFF (LVL_START(LVL_DEPTH))
  18687. +#define WHEEL_TIMEOUT_MAX (WHEEL_TIMEOUT_CUTOFF - LVL_GRAN(LVL_DEPTH - 1))
  18688. +
  18689. +/*
  18690. + * The resulting wheel size. If NOHZ is configured we allocate two
  18691. + * wheels so we have a separate storage for the deferrable timers.
  18692. + */
  18693. +#define WHEEL_SIZE (LVL_SIZE * LVL_DEPTH)
  18694. +
  18695. +#ifdef CONFIG_NO_HZ_COMMON
  18696. +# define NR_BASES 2
  18697. +# define BASE_STD 0
  18698. +# define BASE_DEF 1
  18699. +#else
  18700. +# define NR_BASES 1
  18701. +# define BASE_STD 0
  18702. +# define BASE_DEF 0
  18703. +#endif
  18704. +
  18705. +struct timer_base {
  18706. + raw_spinlock_t lock;
  18707. + struct timer_list *running_timer;
  18708. +#ifdef CONFIG_PREEMPT_RT_FULL
  18709. + struct swait_queue_head wait_for_running_timer;
  18710. +#endif
  18711. + unsigned long clk;
  18712. + unsigned long next_expiry;
  18713. + unsigned int cpu;
  18714. + bool migration_enabled;
  18715. + bool nohz_active;
  18716. + bool is_idle;
  18717. + DECLARE_BITMAP(pending_map, WHEEL_SIZE);
  18718. + struct hlist_head vectors[WHEEL_SIZE];
  18719. } ____cacheline_aligned;
  18720. -
  18721. -static DEFINE_PER_CPU(struct tvec_base, tvec_bases);
  18722. +static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
  18723. #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
  18724. unsigned int sysctl_timer_migration = 1;
  18725. @@ -106,15 +219,17 @@ void timers_update_migration(bool update_nohz)
  18726. unsigned int cpu;
  18727. /* Avoid the loop, if nothing to update */
  18728. - if (this_cpu_read(tvec_bases.migration_enabled) == on)
  18729. + if (this_cpu_read(timer_bases[BASE_STD].migration_enabled) == on)
  18730. return;
  18731. for_each_possible_cpu(cpu) {
  18732. - per_cpu(tvec_bases.migration_enabled, cpu) = on;
  18733. + per_cpu(timer_bases[BASE_STD].migration_enabled, cpu) = on;
  18734. + per_cpu(timer_bases[BASE_DEF].migration_enabled, cpu) = on;
  18735. per_cpu(hrtimer_bases.migration_enabled, cpu) = on;
  18736. if (!update_nohz)
  18737. continue;
  18738. - per_cpu(tvec_bases.nohz_active, cpu) = true;
  18739. + per_cpu(timer_bases[BASE_STD].nohz_active, cpu) = true;
  18740. + per_cpu(timer_bases[BASE_DEF].nohz_active, cpu) = true;
  18741. per_cpu(hrtimer_bases.nohz_active, cpu) = true;
  18742. }
  18743. }
  18744. @@ -133,20 +248,6 @@ int timer_migration_handler(struct ctl_table *table, int write,
  18745. mutex_unlock(&mutex);
  18746. return ret;
  18747. }
  18748. -
  18749. -static inline struct tvec_base *get_target_base(struct tvec_base *base,
  18750. - int pinned)
  18751. -{
  18752. - if (pinned || !base->migration_enabled)
  18753. - return this_cpu_ptr(&tvec_bases);
  18754. - return per_cpu_ptr(&tvec_bases, get_nohz_timer_target());
  18755. -}
  18756. -#else
  18757. -static inline struct tvec_base *get_target_base(struct tvec_base *base,
  18758. - int pinned)
  18759. -{
  18760. - return this_cpu_ptr(&tvec_bases);
  18761. -}
  18762. #endif
  18763. static unsigned long round_jiffies_common(unsigned long j, int cpu,
  18764. @@ -351,101 +452,126 @@ unsigned long round_jiffies_up_relative(unsigned long j)
  18765. }
  18766. EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
  18767. -/**
  18768. - * set_timer_slack - set the allowed slack for a timer
  18769. - * @timer: the timer to be modified
  18770. - * @slack_hz: the amount of time (in jiffies) allowed for rounding
  18771. - *
  18772. - * Set the amount of time, in jiffies, that a certain timer has
  18773. - * in terms of slack. By setting this value, the timer subsystem
  18774. - * will schedule the actual timer somewhere between
  18775. - * the time mod_timer() asks for, and that time plus the slack.
  18776. - *
  18777. - * By setting the slack to -1, a percentage of the delay is used
  18778. - * instead.
  18779. - */
  18780. -void set_timer_slack(struct timer_list *timer, int slack_hz)
  18781. +
  18782. +static inline unsigned int timer_get_idx(struct timer_list *timer)
  18783. {
  18784. - timer->slack = slack_hz;
  18785. + return (timer->flags & TIMER_ARRAYMASK) >> TIMER_ARRAYSHIFT;
  18786. +}
  18787. +
  18788. +static inline void timer_set_idx(struct timer_list *timer, unsigned int idx)
  18789. +{
  18790. + timer->flags = (timer->flags & ~TIMER_ARRAYMASK) |
  18791. + idx << TIMER_ARRAYSHIFT;
  18792. +}
  18793. +
  18794. +/*
  18795. + * Helper function to calculate the array index for a given expiry
  18796. + * time.
  18797. + */
  18798. +static inline unsigned calc_index(unsigned expires, unsigned lvl)
  18799. +{
  18800. + expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
  18801. + return LVL_OFFS(lvl) + (expires & LVL_MASK);
  18802. +}
  18803. +
  18804. +static int calc_wheel_index(unsigned long expires, unsigned long clk)
  18805. +{
  18806. + unsigned long delta = expires - clk;
  18807. + unsigned int idx;
  18808. +
  18809. + if (delta < LVL_START(1)) {
  18810. + idx = calc_index(expires, 0);
  18811. + } else if (delta < LVL_START(2)) {
  18812. + idx = calc_index(expires, 1);
  18813. + } else if (delta < LVL_START(3)) {
  18814. + idx = calc_index(expires, 2);
  18815. + } else if (delta < LVL_START(4)) {
  18816. + idx = calc_index(expires, 3);
  18817. + } else if (delta < LVL_START(5)) {
  18818. + idx = calc_index(expires, 4);
  18819. + } else if (delta < LVL_START(6)) {
  18820. + idx = calc_index(expires, 5);
  18821. + } else if (delta < LVL_START(7)) {
  18822. + idx = calc_index(expires, 6);
  18823. + } else if (LVL_DEPTH > 8 && delta < LVL_START(8)) {
  18824. + idx = calc_index(expires, 7);
  18825. + } else if ((long) delta < 0) {
  18826. + idx = clk & LVL_MASK;
  18827. + } else {
  18828. + /*
  18829. + * Force expire obscene large timeouts to expire at the
  18830. + * capacity limit of the wheel.
  18831. + */
  18832. + if (expires >= WHEEL_TIMEOUT_CUTOFF)
  18833. + expires = WHEEL_TIMEOUT_MAX;
  18834. +
  18835. + idx = calc_index(expires, LVL_DEPTH - 1);
  18836. + }
  18837. + return idx;
  18838. +}
  18839. +
  18840. +/*
  18841. + * Enqueue the timer into the hash bucket, mark it pending in
  18842. + * the bitmap and store the index in the timer flags.
  18843. + */
  18844. +static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
  18845. + unsigned int idx)
  18846. +{
  18847. + hlist_add_head(&timer->entry, base->vectors + idx);
  18848. + __set_bit(idx, base->pending_map);
  18849. + timer_set_idx(timer, idx);
  18850. }
  18851. -EXPORT_SYMBOL_GPL(set_timer_slack);
  18852. static void
  18853. -__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
  18854. +__internal_add_timer(struct timer_base *base, struct timer_list *timer)
  18855. {
  18856. - unsigned long expires = timer->expires;
  18857. - unsigned long idx = expires - base->timer_jiffies;
  18858. - struct hlist_head *vec;
  18859. + unsigned int idx;
  18860. - if (idx < TVR_SIZE) {
  18861. - int i = expires & TVR_MASK;
  18862. - vec = base->tv1.vec + i;
  18863. - } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
  18864. - int i = (expires >> TVR_BITS) & TVN_MASK;
  18865. - vec = base->tv2.vec + i;
  18866. - } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
  18867. - int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
  18868. - vec = base->tv3.vec + i;
  18869. - } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
  18870. - int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
  18871. - vec = base->tv4.vec + i;
  18872. - } else if ((signed long) idx < 0) {
  18873. - /*
  18874. - * Can happen if you add a timer with expires == jiffies,
  18875. - * or you set a timer to go off in the past
  18876. - */
  18877. - vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
  18878. - } else {
  18879. - int i;
  18880. - /* If the timeout is larger than MAX_TVAL (on 64-bit
  18881. - * architectures or with CONFIG_BASE_SMALL=1) then we
  18882. - * use the maximum timeout.
  18883. - */
  18884. - if (idx > MAX_TVAL) {
  18885. - idx = MAX_TVAL;
  18886. - expires = idx + base->timer_jiffies;
  18887. - }
  18888. - i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
  18889. - vec = base->tv5.vec + i;
  18890. - }
  18891. -
  18892. - hlist_add_head(&timer->entry, vec);
  18893. + idx = calc_wheel_index(timer->expires, base->clk);
  18894. + enqueue_timer(base, timer, idx);
  18895. }
  18896. -static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
  18897. +static void
  18898. +trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
  18899. {
  18900. - /* Advance base->jiffies, if the base is empty */
  18901. - if (!base->all_timers++)
  18902. - base->timer_jiffies = jiffies;
  18903. -
  18904. - __internal_add_timer(base, timer);
  18905. - /*
  18906. - * Update base->active_timers and base->next_timer
  18907. - */
  18908. - if (!(timer->flags & TIMER_DEFERRABLE)) {
  18909. - if (!base->active_timers++ ||
  18910. - time_before(timer->expires, base->next_timer))
  18911. - base->next_timer = timer->expires;
  18912. - }
  18913. + if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
  18914. + return;
  18915. /*
  18916. - * Check whether the other CPU is in dynticks mode and needs
  18917. - * to be triggered to reevaluate the timer wheel.
  18918. - * We are protected against the other CPU fiddling
  18919. - * with the timer by holding the timer base lock. This also
  18920. - * makes sure that a CPU on the way to stop its tick can not
  18921. - * evaluate the timer wheel.
  18922. - *
  18923. - * Spare the IPI for deferrable timers on idle targets though.
  18924. - * The next busy ticks will take care of it. Except full dynticks
  18925. - * require special care against races with idle_cpu(), lets deal
  18926. - * with that later.
  18927. + * This wants some optimizing similar to the below, but we do that
  18928. + * when we switch from push to pull for deferrable timers.
  18929. */
  18930. - if (base->nohz_active) {
  18931. - if (!(timer->flags & TIMER_DEFERRABLE) ||
  18932. - tick_nohz_full_cpu(base->cpu))
  18933. + if (timer->flags & TIMER_DEFERRABLE) {
  18934. + if (tick_nohz_full_cpu(base->cpu))
  18935. wake_up_nohz_cpu(base->cpu);
  18936. + return;
  18937. }
  18938. +
  18939. + /*
  18940. + * We might have to IPI the remote CPU if the base is idle and the
  18941. + * timer is not deferrable. If the other cpu is on the way to idle
  18942. + * then it can't set base->is_idle as we hold base lock.
  18943. + */
  18944. + if (!base->is_idle)
  18945. + return;
  18946. +
  18947. + /* Check whether this is the new first expiring timer */
  18948. + if (time_after_eq(timer->expires, base->next_expiry))
  18949. + return;
  18950. +
  18951. + /*
  18952. + * Set the next expiry time and kick the cpu so it can reevaluate the
  18953. + * wheel
  18954. + */
  18955. + base->next_expiry = timer->expires;
  18956. + wake_up_nohz_cpu(base->cpu);
  18957. +}
  18958. +
  18959. +static void
  18960. +internal_add_timer(struct timer_base *base, struct timer_list *timer)
  18961. +{
  18962. + __internal_add_timer(base, timer);
  18963. + trigger_dyntick_cpu(base, timer);
  18964. }
  18965. #ifdef CONFIG_TIMER_STATS
  18966. @@ -681,7 +807,6 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags,
  18967. {
  18968. timer->entry.pprev = NULL;
  18969. timer->flags = flags | raw_smp_processor_id();
  18970. - timer->slack = -1;
  18971. #ifdef CONFIG_TIMER_STATS
  18972. timer->start_site = NULL;
  18973. timer->start_pid = -1;
  18974. @@ -721,71 +846,170 @@ static inline void detach_timer(struct timer_list *timer, bool clear_pending)
  18975. entry->next = LIST_POISON2;
  18976. }
  18977. -static inline void
  18978. -detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
  18979. -{
  18980. - detach_timer(timer, true);
  18981. - if (!(timer->flags & TIMER_DEFERRABLE))
  18982. - base->active_timers--;
  18983. - base->all_timers--;
  18984. -}
  18985. -
  18986. -static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
  18987. +static int detach_if_pending(struct timer_list *timer, struct timer_base *base,
  18988. bool clear_pending)
  18989. {
  18990. + unsigned idx = timer_get_idx(timer);
  18991. +
  18992. if (!timer_pending(timer))
  18993. return 0;
  18994. + if (hlist_is_singular_node(&timer->entry, base->vectors + idx))
  18995. + __clear_bit(idx, base->pending_map);
  18996. +
  18997. detach_timer(timer, clear_pending);
  18998. - if (!(timer->flags & TIMER_DEFERRABLE)) {
  18999. - base->active_timers--;
  19000. - if (timer->expires == base->next_timer)
  19001. - base->next_timer = base->timer_jiffies;
  19002. - }
  19003. - /* If this was the last timer, advance base->jiffies */
  19004. - if (!--base->all_timers)
  19005. - base->timer_jiffies = jiffies;
  19006. return 1;
  19007. }
  19008. +static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
  19009. +{
  19010. + struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
  19011. +
  19012. + /*
  19013. + * If the timer is deferrable and nohz is active then we need to use
  19014. + * the deferrable base.
  19015. + */
  19016. + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active &&
  19017. + (tflags & TIMER_DEFERRABLE))
  19018. + base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
  19019. + return base;
  19020. +}
  19021. +
  19022. +static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
  19023. +{
  19024. + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
  19025. +
  19026. + /*
  19027. + * If the timer is deferrable and nohz is active then we need to use
  19028. + * the deferrable base.
  19029. + */
  19030. + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active &&
  19031. + (tflags & TIMER_DEFERRABLE))
  19032. + base = this_cpu_ptr(&timer_bases[BASE_DEF]);
  19033. + return base;
  19034. +}
  19035. +
  19036. +static inline struct timer_base *get_timer_base(u32 tflags)
  19037. +{
  19038. + return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
  19039. +}
  19040. +
  19041. +#ifdef CONFIG_NO_HZ_COMMON
  19042. +static inline struct timer_base *__get_target_base(struct timer_base *base,
  19043. + unsigned tflags)
  19044. +{
  19045. +#ifdef CONFIG_SMP
  19046. + if ((tflags & TIMER_PINNED) || !base->migration_enabled)
  19047. + return get_timer_this_cpu_base(tflags);
  19048. + return get_timer_cpu_base(tflags, get_nohz_timer_target());
  19049. +#else
  19050. + return get_timer_this_cpu_base(tflags);
  19051. +#endif
  19052. +}
  19053. +
  19054. +static inline void forward_timer_base(struct timer_base *base)
  19055. +{
  19056. + /*
  19057. + * We only forward the base when it's idle and we have a delta between
  19058. + * base clock and jiffies.
  19059. + */
  19060. + if (!base->is_idle || (long) (jiffies - base->clk) < 2)
  19061. + return;
  19062. +
  19063. + /*
  19064. + * If the next expiry value is > jiffies, then we fast forward to
  19065. + * jiffies otherwise we forward to the next expiry value.
  19066. + */
  19067. + if (time_after(base->next_expiry, jiffies))
  19068. + base->clk = jiffies;
  19069. + else
  19070. + base->clk = base->next_expiry;
  19071. +}
  19072. +#else
  19073. +static inline struct timer_base *__get_target_base(struct timer_base *base,
  19074. + unsigned tflags)
  19075. +{
  19076. + return get_timer_this_cpu_base(tflags);
  19077. +}
  19078. +
  19079. +static inline void forward_timer_base(struct timer_base *base) { }
  19080. +#endif
  19081. +
  19082. +static inline struct timer_base *get_target_base(struct timer_base *base,
  19083. + unsigned tflags)
  19084. +{
  19085. + struct timer_base *target = __get_target_base(base, tflags);
  19086. +
  19087. + forward_timer_base(target);
  19088. + return target;
  19089. +}
  19090. +
  19091. /*
  19092. - * We are using hashed locking: holding per_cpu(tvec_bases).lock
  19093. - * means that all timers which are tied to this base via timer->base are
  19094. - * locked, and the base itself is locked too.
  19095. + * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
  19096. + * that all timers which are tied to this base are locked, and the base itself
  19097. + * is locked too.
  19098. *
  19099. * So __run_timers/migrate_timers can safely modify all timers which could
  19100. - * be found on ->tvX lists.
  19101. + * be found in the base->vectors array.
  19102. *
  19103. - * When the timer's base is locked and removed from the list, the
  19104. - * TIMER_MIGRATING flag is set, FIXME
  19105. + * When a timer is migrating then the TIMER_MIGRATING flag is set and we need
  19106. + * to wait until the migration is done.
  19107. */
  19108. -static struct tvec_base *lock_timer_base(struct timer_list *timer,
  19109. - unsigned long *flags)
  19110. +static struct timer_base *lock_timer_base(struct timer_list *timer,
  19111. + unsigned long *flags)
  19112. __acquires(timer->base->lock)
  19113. {
  19114. for (;;) {
  19115. + struct timer_base *base;
  19116. u32 tf = timer->flags;
  19117. - struct tvec_base *base;
  19118. if (!(tf & TIMER_MIGRATING)) {
  19119. - base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
  19120. - spin_lock_irqsave(&base->lock, *flags);
  19121. + base = get_timer_base(tf);
  19122. + raw_spin_lock_irqsave(&base->lock, *flags);
  19123. if (timer->flags == tf)
  19124. return base;
  19125. - spin_unlock_irqrestore(&base->lock, *flags);
  19126. + raw_spin_unlock_irqrestore(&base->lock, *flags);
  19127. }
  19128. cpu_relax();
  19129. }
  19130. }
  19131. static inline int
  19132. -__mod_timer(struct timer_list *timer, unsigned long expires,
  19133. - bool pending_only, int pinned)
  19134. +__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
  19135. {
  19136. - struct tvec_base *base, *new_base;
  19137. - unsigned long flags;
  19138. + struct timer_base *base, *new_base;
  19139. + unsigned int idx = UINT_MAX;
  19140. + unsigned long clk = 0, flags;
  19141. int ret = 0;
  19142. + /*
  19143. + * This is a common optimization triggered by the networking code - if
  19144. + * the timer is re-modified to be the same thing or ends up in the
  19145. + * same array bucket then just return:
  19146. + */
  19147. + if (timer_pending(timer)) {
  19148. + if (timer->expires == expires)
  19149. + return 1;
  19150. + /*
  19151. + * Take the current timer_jiffies of base, but without holding
  19152. + * the lock!
  19153. + */
  19154. + base = get_timer_base(timer->flags);
  19155. + clk = base->clk;
  19156. +
  19157. + idx = calc_wheel_index(expires, clk);
  19158. +
  19159. + /*
  19160. + * Retrieve and compare the array index of the pending
  19161. + * timer. If it matches set the expiry to the new value so a
  19162. + * subsequent call will exit in the expires check above.
  19163. + */
  19164. + if (idx == timer_get_idx(timer)) {
  19165. + timer->expires = expires;
  19166. + return 1;
  19167. + }
  19168. + }
  19169. +
  19170. timer_stats_timer_set_start_info(timer);
  19171. BUG_ON(!timer->function);
  19172. @@ -797,33 +1021,44 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
  19173. debug_activate(timer, expires);
  19174. - new_base = get_target_base(base, pinned);
  19175. + new_base = get_target_base(base, timer->flags);
  19176. if (base != new_base) {
  19177. /*
  19178. - * We are trying to schedule the timer on the local CPU.
  19179. + * We are trying to schedule the timer on the new base.
  19180. * However we can't change timer's base while it is running,
  19181. * otherwise del_timer_sync() can't detect that the timer's
  19182. - * handler yet has not finished. This also guarantees that
  19183. - * the timer is serialized wrt itself.
  19184. + * handler yet has not finished. This also guarantees that the
  19185. + * timer is serialized wrt itself.
  19186. */
  19187. if (likely(base->running_timer != timer)) {
  19188. /* See the comment in lock_timer_base() */
  19189. timer->flags |= TIMER_MIGRATING;
  19190. - spin_unlock(&base->lock);
  19191. + raw_spin_unlock(&base->lock);
  19192. base = new_base;
  19193. - spin_lock(&base->lock);
  19194. + raw_spin_lock(&base->lock);
  19195. WRITE_ONCE(timer->flags,
  19196. (timer->flags & ~TIMER_BASEMASK) | base->cpu);
  19197. }
  19198. }
  19199. timer->expires = expires;
  19200. - internal_add_timer(base, timer);
  19201. + /*
  19202. + * If idx was calculated above and the base time did not advance
  19203. + * between calculating idx and taking the lock, only enqueue_timer()
  19204. + * and trigger_dyntick_cpu() is required. Otherwise we need to
  19205. + * (re)calculate the wheel index via internal_add_timer().
  19206. + */
  19207. + if (idx != UINT_MAX && clk == base->clk) {
  19208. + enqueue_timer(base, timer, idx);
  19209. + trigger_dyntick_cpu(base, timer);
  19210. + } else {
  19211. + internal_add_timer(base, timer);
  19212. + }
  19213. out_unlock:
  19214. - spin_unlock_irqrestore(&base->lock, flags);
  19215. + raw_spin_unlock_irqrestore(&base->lock, flags);
  19216. return ret;
  19217. }
  19218. @@ -840,49 +1075,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
  19219. */
  19220. int mod_timer_pending(struct timer_list *timer, unsigned long expires)
  19221. {
  19222. - return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
  19223. + return __mod_timer(timer, expires, true);
  19224. }
  19225. EXPORT_SYMBOL(mod_timer_pending);
  19226. -/*
  19227. - * Decide where to put the timer while taking the slack into account
  19228. - *
  19229. - * Algorithm:
  19230. - * 1) calculate the maximum (absolute) time
  19231. - * 2) calculate the highest bit where the expires and new max are different
  19232. - * 3) use this bit to make a mask
  19233. - * 4) use the bitmask to round down the maximum time, so that all last
  19234. - * bits are zeros
  19235. - */
  19236. -static inline
  19237. -unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
  19238. -{
  19239. - unsigned long expires_limit, mask;
  19240. - int bit;
  19241. -
  19242. - if (timer->slack >= 0) {
  19243. - expires_limit = expires + timer->slack;
  19244. - } else {
  19245. - long delta = expires - jiffies;
  19246. -
  19247. - if (delta < 256)
  19248. - return expires;
  19249. -
  19250. - expires_limit = expires + delta / 256;
  19251. - }
  19252. - mask = expires ^ expires_limit;
  19253. - if (mask == 0)
  19254. - return expires;
  19255. -
  19256. - bit = __fls(mask);
  19257. -
  19258. - mask = (1UL << bit) - 1;
  19259. -
  19260. - expires_limit = expires_limit & ~(mask);
  19261. -
  19262. - return expires_limit;
  19263. -}
  19264. -
  19265. /**
  19266. * mod_timer - modify a timer's timeout
  19267. * @timer: the timer to be modified
  19268. @@ -905,49 +1101,11 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
  19269. */
  19270. int mod_timer(struct timer_list *timer, unsigned long expires)
  19271. {
  19272. - expires = apply_slack(timer, expires);
  19273. -
  19274. - /*
  19275. - * This is a common optimization triggered by the
  19276. - * networking code - if the timer is re-modified
  19277. - * to be the same thing then just return:
  19278. - */
  19279. - if (timer_pending(timer) && timer->expires == expires)
  19280. - return 1;
  19281. -
  19282. - return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
  19283. + return __mod_timer(timer, expires, false);
  19284. }
  19285. EXPORT_SYMBOL(mod_timer);
  19286. /**
  19287. - * mod_timer_pinned - modify a timer's timeout
  19288. - * @timer: the timer to be modified
  19289. - * @expires: new timeout in jiffies
  19290. - *
  19291. - * mod_timer_pinned() is a way to update the expire field of an
  19292. - * active timer (if the timer is inactive it will be activated)
  19293. - * and to ensure that the timer is scheduled on the current CPU.
  19294. - *
  19295. - * Note that this does not prevent the timer from being migrated
  19296. - * when the current CPU goes offline. If this is a problem for
  19297. - * you, use CPU-hotplug notifiers to handle it correctly, for
  19298. - * example, cancelling the timer when the corresponding CPU goes
  19299. - * offline.
  19300. - *
  19301. - * mod_timer_pinned(timer, expires) is equivalent to:
  19302. - *
  19303. - * del_timer(timer); timer->expires = expires; add_timer(timer);
  19304. - */
  19305. -int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
  19306. -{
  19307. - if (timer->expires == expires && timer_pending(timer))
  19308. - return 1;
  19309. -
  19310. - return __mod_timer(timer, expires, false, TIMER_PINNED);
  19311. -}
  19312. -EXPORT_SYMBOL(mod_timer_pinned);
  19313. -
  19314. -/**
  19315. * add_timer - start a timer
  19316. * @timer: the timer to be added
  19317. *
  19318. @@ -977,13 +1135,14 @@ EXPORT_SYMBOL(add_timer);
  19319. */
  19320. void add_timer_on(struct timer_list *timer, int cpu)
  19321. {
  19322. - struct tvec_base *new_base = per_cpu_ptr(&tvec_bases, cpu);
  19323. - struct tvec_base *base;
  19324. + struct timer_base *new_base, *base;
  19325. unsigned long flags;
  19326. timer_stats_timer_set_start_info(timer);
  19327. BUG_ON(timer_pending(timer) || !timer->function);
  19328. + new_base = get_timer_cpu_base(timer->flags, cpu);
  19329. +
  19330. /*
  19331. * If @timer was on a different CPU, it should be migrated with the
  19332. * old base locked to prevent other operations proceeding with the
  19333. @@ -993,19 +1152,46 @@ void add_timer_on(struct timer_list *timer, int cpu)
  19334. if (base != new_base) {
  19335. timer->flags |= TIMER_MIGRATING;
  19336. - spin_unlock(&base->lock);
  19337. + raw_spin_unlock(&base->lock);
  19338. base = new_base;
  19339. - spin_lock(&base->lock);
  19340. + raw_spin_lock(&base->lock);
  19341. WRITE_ONCE(timer->flags,
  19342. (timer->flags & ~TIMER_BASEMASK) | cpu);
  19343. }
  19344. debug_activate(timer, timer->expires);
  19345. internal_add_timer(base, timer);
  19346. - spin_unlock_irqrestore(&base->lock, flags);
  19347. + raw_spin_unlock_irqrestore(&base->lock, flags);
  19348. }
  19349. EXPORT_SYMBOL_GPL(add_timer_on);
  19350. +#ifdef CONFIG_PREEMPT_RT_FULL
  19351. +/*
  19352. + * Wait for a running timer
  19353. + */
  19354. +static void wait_for_running_timer(struct timer_list *timer)
  19355. +{
  19356. + struct timer_base *base;
  19357. + u32 tf = timer->flags;
  19358. +
  19359. + if (tf & TIMER_MIGRATING)
  19360. + return;
  19361. +
  19362. + base = get_timer_base(tf);
  19363. + swait_event(base->wait_for_running_timer,
  19364. + base->running_timer != timer);
  19365. +}
  19366. +
  19367. +# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer)
  19368. +#else
  19369. +static inline void wait_for_running_timer(struct timer_list *timer)
  19370. +{
  19371. + cpu_relax();
  19372. +}
  19373. +
  19374. +# define wakeup_timer_waiters(b) do { } while (0)
  19375. +#endif
  19376. +
  19377. /**
  19378. * del_timer - deactive a timer.
  19379. * @timer: the timer to be deactivated
  19380. @@ -1019,7 +1205,7 @@ EXPORT_SYMBOL_GPL(add_timer_on);
  19381. */
  19382. int del_timer(struct timer_list *timer)
  19383. {
  19384. - struct tvec_base *base;
  19385. + struct timer_base *base;
  19386. unsigned long flags;
  19387. int ret = 0;
  19388. @@ -1029,7 +1215,7 @@ int del_timer(struct timer_list *timer)
  19389. if (timer_pending(timer)) {
  19390. base = lock_timer_base(timer, &flags);
  19391. ret = detach_if_pending(timer, base, true);
  19392. - spin_unlock_irqrestore(&base->lock, flags);
  19393. + raw_spin_unlock_irqrestore(&base->lock, flags);
  19394. }
  19395. return ret;
  19396. @@ -1045,7 +1231,7 @@ EXPORT_SYMBOL(del_timer);
  19397. */
  19398. int try_to_del_timer_sync(struct timer_list *timer)
  19399. {
  19400. - struct tvec_base *base;
  19401. + struct timer_base *base;
  19402. unsigned long flags;
  19403. int ret = -1;
  19404. @@ -1057,13 +1243,13 @@ int try_to_del_timer_sync(struct timer_list *timer)
  19405. timer_stats_timer_clear_start_info(timer);
  19406. ret = detach_if_pending(timer, base, true);
  19407. }
  19408. - spin_unlock_irqrestore(&base->lock, flags);
  19409. + raw_spin_unlock_irqrestore(&base->lock, flags);
  19410. return ret;
  19411. }
  19412. EXPORT_SYMBOL(try_to_del_timer_sync);
  19413. -#ifdef CONFIG_SMP
  19414. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  19415. /**
  19416. * del_timer_sync - deactivate a timer and wait for the handler to finish.
  19417. * @timer: the timer to be deactivated
  19418. @@ -1123,33 +1309,12 @@ int del_timer_sync(struct timer_list *timer)
  19419. int ret = try_to_del_timer_sync(timer);
  19420. if (ret >= 0)
  19421. return ret;
  19422. - cpu_relax();
  19423. + wait_for_running_timer(timer);
  19424. }
  19425. }
  19426. EXPORT_SYMBOL(del_timer_sync);
  19427. #endif
  19428. -static int cascade(struct tvec_base *base, struct tvec *tv, int index)
  19429. -{
  19430. - /* cascade all the timers from tv up one level */
  19431. - struct timer_list *timer;
  19432. - struct hlist_node *tmp;
  19433. - struct hlist_head tv_list;
  19434. -
  19435. - hlist_move_list(tv->vec + index, &tv_list);
  19436. -
  19437. - /*
  19438. - * We are removing _all_ timers from the list, so we
  19439. - * don't have to detach them individually.
  19440. - */
  19441. - hlist_for_each_entry_safe(timer, tmp, &tv_list, entry) {
  19442. - /* No accounting, while moving them */
  19443. - __internal_add_timer(base, timer);
  19444. - }
  19445. -
  19446. - return index;
  19447. -}
  19448. -
  19449. static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
  19450. unsigned long data)
  19451. {
  19452. @@ -1193,147 +1358,144 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
  19453. }
  19454. }
  19455. -#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
  19456. -
  19457. -/**
  19458. - * __run_timers - run all expired timers (if any) on this CPU.
  19459. - * @base: the timer vector to be processed.
  19460. - *
  19461. - * This function cascades all vectors and executes all expired timer
  19462. - * vectors.
  19463. - */
  19464. -static inline void __run_timers(struct tvec_base *base)
  19465. +static void expire_timers(struct timer_base *base, struct hlist_head *head)
  19466. {
  19467. - struct timer_list *timer;
  19468. + while (!hlist_empty(head)) {
  19469. + struct timer_list *timer;
  19470. + void (*fn)(unsigned long);
  19471. + unsigned long data;
  19472. - spin_lock_irq(&base->lock);
  19473. + timer = hlist_entry(head->first, struct timer_list, entry);
  19474. + timer_stats_account_timer(timer);
  19475. - while (time_after_eq(jiffies, base->timer_jiffies)) {
  19476. - struct hlist_head work_list;
  19477. - struct hlist_head *head = &work_list;
  19478. - int index;
  19479. + base->running_timer = timer;
  19480. + detach_timer(timer, true);
  19481. - if (!base->all_timers) {
  19482. - base->timer_jiffies = jiffies;
  19483. - break;
  19484. - }
  19485. + fn = timer->function;
  19486. + data = timer->data;
  19487. - index = base->timer_jiffies & TVR_MASK;
  19488. -
  19489. - /*
  19490. - * Cascade timers:
  19491. - */
  19492. - if (!index &&
  19493. - (!cascade(base, &base->tv2, INDEX(0))) &&
  19494. - (!cascade(base, &base->tv3, INDEX(1))) &&
  19495. - !cascade(base, &base->tv4, INDEX(2)))
  19496. - cascade(base, &base->tv5, INDEX(3));
  19497. - ++base->timer_jiffies;
  19498. - hlist_move_list(base->tv1.vec + index, head);
  19499. - while (!hlist_empty(head)) {
  19500. - void (*fn)(unsigned long);
  19501. - unsigned long data;
  19502. - bool irqsafe;
  19503. -
  19504. - timer = hlist_entry(head->first, struct timer_list, entry);
  19505. - fn = timer->function;
  19506. - data = timer->data;
  19507. - irqsafe = timer->flags & TIMER_IRQSAFE;
  19508. -
  19509. - timer_stats_account_timer(timer);
  19510. -
  19511. - base->running_timer = timer;
  19512. - detach_expired_timer(timer, base);
  19513. -
  19514. - if (irqsafe) {
  19515. - spin_unlock(&base->lock);
  19516. - call_timer_fn(timer, fn, data);
  19517. - spin_lock(&base->lock);
  19518. - } else {
  19519. - spin_unlock_irq(&base->lock);
  19520. - call_timer_fn(timer, fn, data);
  19521. - spin_lock_irq(&base->lock);
  19522. - }
  19523. + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL) &&
  19524. + timer->flags & TIMER_IRQSAFE) {
  19525. + raw_spin_unlock(&base->lock);
  19526. + call_timer_fn(timer, fn, data);
  19527. + base->running_timer = NULL;
  19528. + raw_spin_lock(&base->lock);
  19529. + } else {
  19530. + raw_spin_unlock_irq(&base->lock);
  19531. + call_timer_fn(timer, fn, data);
  19532. + base->running_timer = NULL;
  19533. + raw_spin_lock_irq(&base->lock);
  19534. }
  19535. }
  19536. - base->running_timer = NULL;
  19537. - spin_unlock_irq(&base->lock);
  19538. +}
  19539. +
  19540. +static int __collect_expired_timers(struct timer_base *base,
  19541. + struct hlist_head *heads)
  19542. +{
  19543. + unsigned long clk = base->clk;
  19544. + struct hlist_head *vec;
  19545. + int i, levels = 0;
  19546. + unsigned int idx;
  19547. +
  19548. + for (i = 0; i < LVL_DEPTH; i++) {
  19549. + idx = (clk & LVL_MASK) + i * LVL_SIZE;
  19550. +
  19551. + if (__test_and_clear_bit(idx, base->pending_map)) {
  19552. + vec = base->vectors + idx;
  19553. + hlist_move_list(vec, heads++);
  19554. + levels++;
  19555. + }
  19556. + /* Is it time to look at the next level? */
  19557. + if (clk & LVL_CLK_MASK)
  19558. + break;
  19559. + /* Shift clock for the next level granularity */
  19560. + clk >>= LVL_CLK_SHIFT;
  19561. + }
  19562. + return levels;
  19563. }
  19564. #ifdef CONFIG_NO_HZ_COMMON
  19565. /*
  19566. - * Find out when the next timer event is due to happen. This
  19567. - * is used on S/390 to stop all activity when a CPU is idle.
  19568. - * This function needs to be called with interrupts disabled.
  19569. + * Find the next pending bucket of a level. Search from level start (@offset)
  19570. + * + @clk upwards and if nothing there, search from start of the level
  19571. + * (@offset) up to @offset + clk.
  19572. */
  19573. -static unsigned long __next_timer_interrupt(struct tvec_base *base)
  19574. +static int next_pending_bucket(struct timer_base *base, unsigned offset,
  19575. + unsigned clk)
  19576. {
  19577. - unsigned long timer_jiffies = base->timer_jiffies;
  19578. - unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
  19579. - int index, slot, array, found = 0;
  19580. - struct timer_list *nte;
  19581. - struct tvec *varray[4];
  19582. + unsigned pos, start = offset + clk;
  19583. + unsigned end = offset + LVL_SIZE;
  19584. - /* Look for timer events in tv1. */
  19585. - index = slot = timer_jiffies & TVR_MASK;
  19586. - do {
  19587. - hlist_for_each_entry(nte, base->tv1.vec + slot, entry) {
  19588. - if (nte->flags & TIMER_DEFERRABLE)
  19589. - continue;
  19590. + pos = find_next_bit(base->pending_map, end, start);
  19591. + if (pos < end)
  19592. + return pos - start;
  19593. - found = 1;
  19594. - expires = nte->expires;
  19595. - /* Look at the cascade bucket(s)? */
  19596. - if (!index || slot < index)
  19597. - goto cascade;
  19598. - return expires;
  19599. + pos = find_next_bit(base->pending_map, start, offset);
  19600. + return pos < start ? pos + LVL_SIZE - start : -1;
  19601. +}
  19602. +
  19603. +/*
  19604. + * Search the first expiring timer in the various clock levels. Caller must
  19605. + * hold base->lock.
  19606. + */
  19607. +static unsigned long __next_timer_interrupt(struct timer_base *base)
  19608. +{
  19609. + unsigned long clk, next, adj;
  19610. + unsigned lvl, offset = 0;
  19611. +
  19612. + next = base->clk + NEXT_TIMER_MAX_DELTA;
  19613. + clk = base->clk;
  19614. + for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) {
  19615. + int pos = next_pending_bucket(base, offset, clk & LVL_MASK);
  19616. +
  19617. + if (pos >= 0) {
  19618. + unsigned long tmp = clk + (unsigned long) pos;
  19619. +
  19620. + tmp <<= LVL_SHIFT(lvl);
  19621. + if (time_before(tmp, next))
  19622. + next = tmp;
  19623. }
  19624. - slot = (slot + 1) & TVR_MASK;
  19625. - } while (slot != index);
  19626. -
  19627. -cascade:
  19628. - /* Calculate the next cascade event */
  19629. - if (index)
  19630. - timer_jiffies += TVR_SIZE - index;
  19631. - timer_jiffies >>= TVR_BITS;
  19632. -
  19633. - /* Check tv2-tv5. */
  19634. - varray[0] = &base->tv2;
  19635. - varray[1] = &base->tv3;
  19636. - varray[2] = &base->tv4;
  19637. - varray[3] = &base->tv5;
  19638. -
  19639. - for (array = 0; array < 4; array++) {
  19640. - struct tvec *varp = varray[array];
  19641. -
  19642. - index = slot = timer_jiffies & TVN_MASK;
  19643. - do {
  19644. - hlist_for_each_entry(nte, varp->vec + slot, entry) {
  19645. - if (nte->flags & TIMER_DEFERRABLE)
  19646. - continue;
  19647. -
  19648. - found = 1;
  19649. - if (time_before(nte->expires, expires))
  19650. - expires = nte->expires;
  19651. - }
  19652. - /*
  19653. - * Do we still search for the first timer or are
  19654. - * we looking up the cascade buckets ?
  19655. - */
  19656. - if (found) {
  19657. - /* Look at the cascade bucket(s)? */
  19658. - if (!index || slot < index)
  19659. - break;
  19660. - return expires;
  19661. - }
  19662. - slot = (slot + 1) & TVN_MASK;
  19663. - } while (slot != index);
  19664. -
  19665. - if (index)
  19666. - timer_jiffies += TVN_SIZE - index;
  19667. - timer_jiffies >>= TVN_BITS;
  19668. + /*
  19669. + * Clock for the next level. If the current level clock lower
  19670. + * bits are zero, we look at the next level as is. If not we
  19671. + * need to advance it by one because that's going to be the
  19672. + * next expiring bucket in that level. base->clk is the next
  19673. + * expiring jiffie. So in case of:
  19674. + *
  19675. + * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
  19676. + * 0 0 0 0 0 0
  19677. + *
  19678. + * we have to look at all levels @index 0. With
  19679. + *
  19680. + * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
  19681. + * 0 0 0 0 0 2
  19682. + *
  19683. + * LVL0 has the next expiring bucket @index 2. The upper
  19684. + * levels have the next expiring bucket @index 1.
  19685. + *
  19686. + * In case that the propagation wraps the next level the same
  19687. + * rules apply:
  19688. + *
  19689. + * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
  19690. + * 0 0 0 0 F 2
  19691. + *
  19692. + * So after looking at LVL0 we get:
  19693. + *
  19694. + * LVL5 LVL4 LVL3 LVL2 LVL1
  19695. + * 0 0 0 1 0
  19696. + *
  19697. + * So no propagation from LVL1 to LVL2 because that happened
  19698. + * with the add already, but then we need to propagate further
  19699. + * from LVL2 to LVL3.
  19700. + *
  19701. + * So the simple check whether the lower bits of the current
  19702. + * level are 0 or not is sufficient for all cases.
  19703. + */
  19704. + adj = clk & LVL_CLK_MASK ? 1 : 0;
  19705. + clk >>= LVL_CLK_SHIFT;
  19706. + clk += adj;
  19707. }
  19708. - return expires;
  19709. + return next;
  19710. }
  19711. /*
  19712. @@ -1379,7 +1541,7 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
  19713. */
  19714. u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
  19715. {
  19716. - struct tvec_base *base = this_cpu_ptr(&tvec_bases);
  19717. + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
  19718. u64 expires = KTIME_MAX;
  19719. unsigned long nextevt;
  19720. @@ -1390,20 +1552,81 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
  19721. if (cpu_is_offline(smp_processor_id()))
  19722. return expires;
  19723. - spin_lock(&base->lock);
  19724. - if (base->active_timers) {
  19725. - if (time_before_eq(base->next_timer, base->timer_jiffies))
  19726. - base->next_timer = __next_timer_interrupt(base);
  19727. - nextevt = base->next_timer;
  19728. - if (time_before_eq(nextevt, basej))
  19729. - expires = basem;
  19730. - else
  19731. - expires = basem + (nextevt - basej) * TICK_NSEC;
  19732. + raw_spin_lock(&base->lock);
  19733. + nextevt = __next_timer_interrupt(base);
  19734. + base->next_expiry = nextevt;
  19735. + /*
  19736. + * We have a fresh next event. Check whether we can forward the base.
  19737. + */
  19738. + if (time_after(nextevt, jiffies))
  19739. + base->clk = jiffies;
  19740. + else if (time_after(nextevt, base->clk))
  19741. + base->clk = nextevt;
  19742. +
  19743. + if (time_before_eq(nextevt, basej)) {
  19744. + expires = basem;
  19745. + base->is_idle = false;
  19746. + } else {
  19747. + expires = basem + (nextevt - basej) * TICK_NSEC;
  19748. + /*
  19749. + * If we expect to sleep more than a tick, mark the base idle.
  19750. + */
  19751. + if ((expires - basem) > TICK_NSEC)
  19752. + base->is_idle = true;
  19753. }
  19754. - spin_unlock(&base->lock);
  19755. + raw_spin_unlock(&base->lock);
  19756. return cmp_next_hrtimer_event(basem, expires);
  19757. }
  19758. +
  19759. +/**
  19760. + * timer_clear_idle - Clear the idle state of the timer base
  19761. + *
  19762. + * Called with interrupts disabled
  19763. + */
  19764. +void timer_clear_idle(void)
  19765. +{
  19766. + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
  19767. +
  19768. + /*
  19769. + * We do this unlocked. The worst outcome is a remote enqueue sending
  19770. + * a pointless IPI, but taking the lock would just make the window for
  19771. + * sending the IPI a few instructions smaller for the cost of taking
  19772. + * the lock in the exit from idle path.
  19773. + */
  19774. + base->is_idle = false;
  19775. +}
  19776. +
  19777. +static int collect_expired_timers(struct timer_base *base,
  19778. + struct hlist_head *heads)
  19779. +{
  19780. + /*
  19781. + * NOHZ optimization. After a long idle sleep we need to forward the
  19782. + * base to current jiffies. Avoid a loop by searching the bitfield for
  19783. + * the next expiring timer.
  19784. + */
  19785. + if ((long)(jiffies - base->clk) > 2) {
  19786. + unsigned long next = __next_timer_interrupt(base);
  19787. +
  19788. + /*
  19789. + * If the next timer is ahead of time forward to current
  19790. + * jiffies, otherwise forward to the next expiry time.
  19791. + */
  19792. + if (time_after(next, jiffies)) {
  19793. + /* The call site will increment clock! */
  19794. + base->clk = jiffies - 1;
  19795. + return 0;
  19796. + }
  19797. + base->clk = next;
  19798. + }
  19799. + return __collect_expired_timers(base, heads);
  19800. +}
  19801. +#else
  19802. +static inline int collect_expired_timers(struct timer_base *base,
  19803. + struct hlist_head *heads)
  19804. +{
  19805. + return __collect_expired_timers(base, heads);
  19806. +}
  19807. #endif
  19808. /*
  19809. @@ -1416,25 +1639,54 @@ void update_process_times(int user_tick)
  19810. /* Note: this timer irq context must be accounted for as well. */
  19811. account_process_tick(p, user_tick);
  19812. + scheduler_tick();
  19813. run_local_timers();
  19814. rcu_check_callbacks(user_tick);
  19815. -#ifdef CONFIG_IRQ_WORK
  19816. +#if defined(CONFIG_IRQ_WORK)
  19817. if (in_irq())
  19818. irq_work_tick();
  19819. #endif
  19820. - scheduler_tick();
  19821. run_posix_cpu_timers(p);
  19822. }
  19823. +/**
  19824. + * __run_timers - run all expired timers (if any) on this CPU.
  19825. + * @base: the timer vector to be processed.
  19826. + */
  19827. +static inline void __run_timers(struct timer_base *base)
  19828. +{
  19829. + struct hlist_head heads[LVL_DEPTH];
  19830. + int levels;
  19831. +
  19832. + if (!time_after_eq(jiffies, base->clk))
  19833. + return;
  19834. +
  19835. + raw_spin_lock_irq(&base->lock);
  19836. +
  19837. + while (time_after_eq(jiffies, base->clk)) {
  19838. +
  19839. + levels = collect_expired_timers(base, heads);
  19840. + base->clk++;
  19841. +
  19842. + while (levels--)
  19843. + expire_timers(base, heads + levels);
  19844. + }
  19845. + raw_spin_unlock_irq(&base->lock);
  19846. + wakeup_timer_waiters(base);
  19847. +}
  19848. +
  19849. /*
  19850. * This function runs timers and the timer-tq in bottom half context.
  19851. */
  19852. static void run_timer_softirq(struct softirq_action *h)
  19853. {
  19854. - struct tvec_base *base = this_cpu_ptr(&tvec_bases);
  19855. + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
  19856. - if (time_after_eq(jiffies, base->timer_jiffies))
  19857. - __run_timers(base);
  19858. + irq_work_tick_soft();
  19859. +
  19860. + __run_timers(base);
  19861. + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
  19862. + __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
  19863. }
  19864. /*
  19865. @@ -1442,7 +1694,18 @@ static void run_timer_softirq(struct softirq_action *h)
  19866. */
  19867. void run_local_timers(void)
  19868. {
  19869. + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
  19870. +
  19871. hrtimer_run_queues();
  19872. + /* Raise the softirq only if required. */
  19873. + if (time_before(jiffies, base->clk)) {
  19874. + if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
  19875. + return;
  19876. + /* CPU is awake, so check the deferrable base. */
  19877. + base++;
  19878. + if (time_before(jiffies, base->clk))
  19879. + return;
  19880. + }
  19881. raise_softirq(TIMER_SOFTIRQ);
  19882. }
  19883. @@ -1527,7 +1790,7 @@ signed long __sched schedule_timeout(signed long timeout)
  19884. expire = timeout + jiffies;
  19885. setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
  19886. - __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
  19887. + __mod_timer(&timer, expire, false);
  19888. schedule();
  19889. del_singleshot_timer_sync(&timer);
  19890. @@ -1578,14 +1841,13 @@ signed long __sched schedule_timeout_idle(signed long timeout)
  19891. EXPORT_SYMBOL(schedule_timeout_idle);
  19892. #ifdef CONFIG_HOTPLUG_CPU
  19893. -static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
  19894. +static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head)
  19895. {
  19896. struct timer_list *timer;
  19897. int cpu = new_base->cpu;
  19898. while (!hlist_empty(head)) {
  19899. timer = hlist_entry(head->first, struct timer_list, entry);
  19900. - /* We ignore the accounting on the dying cpu */
  19901. detach_timer(timer, false);
  19902. timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
  19903. internal_add_timer(new_base, timer);
  19904. @@ -1594,37 +1856,31 @@ static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *he
  19905. static void migrate_timers(int cpu)
  19906. {
  19907. - struct tvec_base *old_base;
  19908. - struct tvec_base *new_base;
  19909. - int i;
  19910. + struct timer_base *old_base;
  19911. + struct timer_base *new_base;
  19912. + int b, i;
  19913. BUG_ON(cpu_online(cpu));
  19914. - old_base = per_cpu_ptr(&tvec_bases, cpu);
  19915. - new_base = get_cpu_ptr(&tvec_bases);
  19916. - /*
  19917. - * The caller is globally serialized and nobody else
  19918. - * takes two locks at once, deadlock is not possible.
  19919. - */
  19920. - spin_lock_irq(&new_base->lock);
  19921. - spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
  19922. - BUG_ON(old_base->running_timer);
  19923. + for (b = 0; b < NR_BASES; b++) {
  19924. + old_base = per_cpu_ptr(&timer_bases[b], cpu);
  19925. + new_base = get_cpu_ptr(&timer_bases[b]);
  19926. + /*
  19927. + * The caller is globally serialized and nobody else
  19928. + * takes two locks at once, deadlock is not possible.
  19929. + */
  19930. + raw_spin_lock_irq(&new_base->lock);
  19931. + raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
  19932. - for (i = 0; i < TVR_SIZE; i++)
  19933. - migrate_timer_list(new_base, old_base->tv1.vec + i);
  19934. - for (i = 0; i < TVN_SIZE; i++) {
  19935. - migrate_timer_list(new_base, old_base->tv2.vec + i);
  19936. - migrate_timer_list(new_base, old_base->tv3.vec + i);
  19937. - migrate_timer_list(new_base, old_base->tv4.vec + i);
  19938. - migrate_timer_list(new_base, old_base->tv5.vec + i);
  19939. + BUG_ON(old_base->running_timer);
  19940. +
  19941. + for (i = 0; i < WHEEL_SIZE; i++)
  19942. + migrate_timer_list(new_base, old_base->vectors + i);
  19943. +
  19944. + raw_spin_unlock(&old_base->lock);
  19945. + raw_spin_unlock_irq(&new_base->lock);
  19946. + put_cpu_ptr(&timer_bases);
  19947. }
  19948. -
  19949. - old_base->active_timers = 0;
  19950. - old_base->all_timers = 0;
  19951. -
  19952. - spin_unlock(&old_base->lock);
  19953. - spin_unlock_irq(&new_base->lock);
  19954. - put_cpu_ptr(&tvec_bases);
  19955. }
  19956. static int timer_cpu_notify(struct notifier_block *self,
  19957. @@ -1652,13 +1908,18 @@ static inline void timer_register_cpu_notifier(void) { }
  19958. static void __init init_timer_cpu(int cpu)
  19959. {
  19960. - struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
  19961. + struct timer_base *base;
  19962. + int i;
  19963. - base->cpu = cpu;
  19964. - spin_lock_init(&base->lock);
  19965. -
  19966. - base->timer_jiffies = jiffies;
  19967. - base->next_timer = base->timer_jiffies;
  19968. + for (i = 0; i < NR_BASES; i++) {
  19969. + base = per_cpu_ptr(&timer_bases[i], cpu);
  19970. + base->cpu = cpu;
  19971. + raw_spin_lock_init(&base->lock);
  19972. + base->clk = jiffies;
  19973. +#ifdef CONFIG_PREEMPT_RT_FULL
  19974. + init_swait_queue_head(&base->wait_for_running_timer);
  19975. +#endif
  19976. + }
  19977. }
  19978. static void __init init_timer_cpus(void)
  19979. diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
  19980. index e45db6b0d878..364ccd0eb57b 100644
  19981. --- a/kernel/trace/Kconfig
  19982. +++ b/kernel/trace/Kconfig
  19983. @@ -187,6 +187,24 @@ config IRQSOFF_TRACER
  19984. enabled. This option and the preempt-off timing option can be
  19985. used together or separately.)
  19986. +config INTERRUPT_OFF_HIST
  19987. + bool "Interrupts-off Latency Histogram"
  19988. + depends on IRQSOFF_TRACER
  19989. + help
  19990. + This option generates continuously updated histograms (one per cpu)
  19991. + of the duration of time periods with interrupts disabled. The
  19992. + histograms are disabled by default. To enable them, write a non-zero
  19993. + number to
  19994. +
  19995. + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
  19996. +
  19997. + If PREEMPT_OFF_HIST is also selected, additional histograms (one
  19998. + per cpu) are generated that accumulate the duration of time periods
  19999. + when both interrupts and preemption are disabled. The histogram data
  20000. + will be located in the debug file system at
  20001. +
  20002. + /sys/kernel/debug/tracing/latency_hist/irqsoff
  20003. +
  20004. config PREEMPT_TRACER
  20005. bool "Preemption-off Latency Tracer"
  20006. default n
  20007. @@ -211,6 +229,24 @@ config PREEMPT_TRACER
  20008. enabled. This option and the irqs-off timing option can be
  20009. used together or separately.)
  20010. +config PREEMPT_OFF_HIST
  20011. + bool "Preemption-off Latency Histogram"
  20012. + depends on PREEMPT_TRACER
  20013. + help
  20014. + This option generates continuously updated histograms (one per cpu)
  20015. + of the duration of time periods with preemption disabled. The
  20016. + histograms are disabled by default. To enable them, write a non-zero
  20017. + number to
  20018. +
  20019. + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
  20020. +
  20021. + If INTERRUPT_OFF_HIST is also selected, additional histograms (one
  20022. + per cpu) are generated that accumulate the duration of time periods
  20023. + when both interrupts and preemption are disabled. The histogram data
  20024. + will be located in the debug file system at
  20025. +
  20026. + /sys/kernel/debug/tracing/latency_hist/preemptoff
  20027. +
  20028. config SCHED_TRACER
  20029. bool "Scheduling Latency Tracer"
  20030. select GENERIC_TRACER
  20031. @@ -221,6 +257,74 @@ config SCHED_TRACER
  20032. This tracer tracks the latency of the highest priority task
  20033. to be scheduled in, starting from the point it has woken up.
  20034. +config WAKEUP_LATENCY_HIST
  20035. + bool "Scheduling Latency Histogram"
  20036. + depends on SCHED_TRACER
  20037. + help
  20038. + This option generates continuously updated histograms (one per cpu)
  20039. + of the scheduling latency of the highest priority task.
  20040. + The histograms are disabled by default. To enable them, write a
  20041. + non-zero number to
  20042. +
  20043. + /sys/kernel/debug/tracing/latency_hist/enable/wakeup
  20044. +
  20045. + Two different algorithms are used, one to determine the latency of
  20046. + processes that exclusively use the highest priority of the system and
  20047. + another one to determine the latency of processes that share the
  20048. + highest system priority with other processes. The former is used to
  20049. + improve hardware and system software, the latter to optimize the
  20050. + priority design of a given system. The histogram data will be
  20051. + located in the debug file system at
  20052. +
  20053. + /sys/kernel/debug/tracing/latency_hist/wakeup
  20054. +
  20055. + and
  20056. +
  20057. + /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio
  20058. +
  20059. + If both Scheduling Latency Histogram and Missed Timer Offsets
  20060. + Histogram are selected, additional histogram data will be collected
  20061. + that contain, in addition to the wakeup latency, the timer latency, in
  20062. + case the wakeup was triggered by an expired timer. These histograms
  20063. + are available in the
  20064. +
  20065. + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
  20066. +
  20067. + directory. They reflect the apparent interrupt and scheduling latency
  20068. + and are best suitable to determine the worst-case latency of a given
  20069. + system. To enable these histograms, write a non-zero number to
  20070. +
  20071. + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
  20072. +
  20073. +config MISSED_TIMER_OFFSETS_HIST
  20074. + depends on HIGH_RES_TIMERS
  20075. + select GENERIC_TRACER
  20076. + bool "Missed Timer Offsets Histogram"
  20077. + help
  20078. + Generate a histogram of missed timer offsets in microseconds. The
  20079. + histograms are disabled by default. To enable them, write a non-zero
  20080. + number to
  20081. +
  20082. + /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets
  20083. +
  20084. + The histogram data will be located in the debug file system at
  20085. +
  20086. + /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets
  20087. +
  20088. + If both Scheduling Latency Histogram and Missed Timer Offsets
  20089. + Histogram are selected, additional histogram data will be collected
  20090. + that contain, in addition to the wakeup latency, the timer latency, in
  20091. + case the wakeup was triggered by an expired timer. These histograms
  20092. + are available in the
  20093. +
  20094. + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
  20095. +
  20096. + directory. They reflect the apparent interrupt and scheduling latency
  20097. + and are best suitable to determine the worst-case latency of a given
  20098. + system. To enable these histograms, write a non-zero number to
  20099. +
  20100. + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
  20101. +
  20102. config ENABLE_DEFAULT_TRACERS
  20103. bool "Trace process context switches and events"
  20104. depends on !GENERIC_TRACER
  20105. diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
  20106. index 9b1044e936a6..3bbaea06824a 100644
  20107. --- a/kernel/trace/Makefile
  20108. +++ b/kernel/trace/Makefile
  20109. @@ -36,6 +36,10 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
  20110. obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
  20111. obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
  20112. obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
  20113. +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o
  20114. +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o
  20115. +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o
  20116. +obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o
  20117. obj-$(CONFIG_NOP_TRACER) += trace_nop.o
  20118. obj-$(CONFIG_STACK_TRACER) += trace_stack.o
  20119. obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
  20120. diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c
  20121. new file mode 100644
  20122. index 000000000000..7f6ee70dea41
  20123. --- /dev/null
  20124. +++ b/kernel/trace/latency_hist.c
  20125. @@ -0,0 +1,1178 @@
  20126. +/*
  20127. + * kernel/trace/latency_hist.c
  20128. + *
  20129. + * Add support for histograms of preemption-off latency and
  20130. + * interrupt-off latency and wakeup latency, it depends on
  20131. + * Real-Time Preemption Support.
  20132. + *
  20133. + * Copyright (C) 2005 MontaVista Software, Inc.
  20134. + * Yi Yang <yyang@ch.mvista.com>
  20135. + *
  20136. + * Converted to work with the new latency tracer.
  20137. + * Copyright (C) 2008 Red Hat, Inc.
  20138. + * Steven Rostedt <srostedt@redhat.com>
  20139. + *
  20140. + */
  20141. +#include <linux/module.h>
  20142. +#include <linux/debugfs.h>
  20143. +#include <linux/seq_file.h>
  20144. +#include <linux/percpu.h>
  20145. +#include <linux/kallsyms.h>
  20146. +#include <linux/uaccess.h>
  20147. +#include <linux/sched.h>
  20148. +#include <linux/sched/rt.h>
  20149. +#include <linux/slab.h>
  20150. +#include <linux/atomic.h>
  20151. +#include <asm/div64.h>
  20152. +
  20153. +#include "trace.h"
  20154. +#include <trace/events/sched.h>
  20155. +
  20156. +#define NSECS_PER_USECS 1000L
  20157. +
  20158. +#define CREATE_TRACE_POINTS
  20159. +#include <trace/events/hist.h>
  20160. +
  20161. +enum {
  20162. + IRQSOFF_LATENCY = 0,
  20163. + PREEMPTOFF_LATENCY,
  20164. + PREEMPTIRQSOFF_LATENCY,
  20165. + WAKEUP_LATENCY,
  20166. + WAKEUP_LATENCY_SHAREDPRIO,
  20167. + MISSED_TIMER_OFFSETS,
  20168. + TIMERANDWAKEUP_LATENCY,
  20169. + MAX_LATENCY_TYPE,
  20170. +};
  20171. +
  20172. +#define MAX_ENTRY_NUM 10240
  20173. +
  20174. +struct hist_data {
  20175. + atomic_t hist_mode; /* 0 log, 1 don't log */
  20176. + long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */
  20177. + long min_lat;
  20178. + long max_lat;
  20179. + unsigned long long below_hist_bound_samples;
  20180. + unsigned long long above_hist_bound_samples;
  20181. + long long accumulate_lat;
  20182. + unsigned long long total_samples;
  20183. + unsigned long long hist_array[MAX_ENTRY_NUM];
  20184. +};
  20185. +
  20186. +struct enable_data {
  20187. + int latency_type;
  20188. + int enabled;
  20189. +};
  20190. +
  20191. +static char *latency_hist_dir_root = "latency_hist";
  20192. +
  20193. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  20194. +static DEFINE_PER_CPU(struct hist_data, irqsoff_hist);
  20195. +static char *irqsoff_hist_dir = "irqsoff";
  20196. +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start);
  20197. +static DEFINE_PER_CPU(int, hist_irqsoff_counting);
  20198. +#endif
  20199. +
  20200. +#ifdef CONFIG_PREEMPT_OFF_HIST
  20201. +static DEFINE_PER_CPU(struct hist_data, preemptoff_hist);
  20202. +static char *preemptoff_hist_dir = "preemptoff";
  20203. +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start);
  20204. +static DEFINE_PER_CPU(int, hist_preemptoff_counting);
  20205. +#endif
  20206. +
  20207. +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
  20208. +static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist);
  20209. +static char *preemptirqsoff_hist_dir = "preemptirqsoff";
  20210. +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start);
  20211. +static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting);
  20212. +#endif
  20213. +
  20214. +#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST)
  20215. +static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start);
  20216. +static struct enable_data preemptirqsoff_enabled_data = {
  20217. + .latency_type = PREEMPTIRQSOFF_LATENCY,
  20218. + .enabled = 0,
  20219. +};
  20220. +#endif
  20221. +
  20222. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20223. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20224. +struct maxlatproc_data {
  20225. + char comm[FIELD_SIZEOF(struct task_struct, comm)];
  20226. + char current_comm[FIELD_SIZEOF(struct task_struct, comm)];
  20227. + int pid;
  20228. + int current_pid;
  20229. + int prio;
  20230. + int current_prio;
  20231. + long latency;
  20232. + long timeroffset;
  20233. + cycle_t timestamp;
  20234. +};
  20235. +#endif
  20236. +
  20237. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  20238. +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist);
  20239. +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio);
  20240. +static char *wakeup_latency_hist_dir = "wakeup";
  20241. +static char *wakeup_latency_hist_dir_sharedprio = "sharedprio";
  20242. +static notrace void probe_wakeup_latency_hist_start(void *v,
  20243. + struct task_struct *p);
  20244. +static notrace void probe_wakeup_latency_hist_stop(void *v,
  20245. + bool preempt, struct task_struct *prev, struct task_struct *next);
  20246. +static notrace void probe_sched_migrate_task(void *,
  20247. + struct task_struct *task, int cpu);
  20248. +static struct enable_data wakeup_latency_enabled_data = {
  20249. + .latency_type = WAKEUP_LATENCY,
  20250. + .enabled = 0,
  20251. +};
  20252. +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc);
  20253. +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio);
  20254. +static DEFINE_PER_CPU(struct task_struct *, wakeup_task);
  20255. +static DEFINE_PER_CPU(int, wakeup_sharedprio);
  20256. +static unsigned long wakeup_pid;
  20257. +#endif
  20258. +
  20259. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  20260. +static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets);
  20261. +static char *missed_timer_offsets_dir = "missed_timer_offsets";
  20262. +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
  20263. + long long offset, struct task_struct *curr, struct task_struct *task);
  20264. +static struct enable_data missed_timer_offsets_enabled_data = {
  20265. + .latency_type = MISSED_TIMER_OFFSETS,
  20266. + .enabled = 0,
  20267. +};
  20268. +static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc);
  20269. +static unsigned long missed_timer_offsets_pid;
  20270. +#endif
  20271. +
  20272. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  20273. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20274. +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
  20275. +static char *timerandwakeup_latency_hist_dir = "timerandwakeup";
  20276. +static struct enable_data timerandwakeup_enabled_data = {
  20277. + .latency_type = TIMERANDWAKEUP_LATENCY,
  20278. + .enabled = 0,
  20279. +};
  20280. +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
  20281. +#endif
  20282. +
  20283. +void notrace latency_hist(int latency_type, int cpu, long latency,
  20284. + long timeroffset, cycle_t stop,
  20285. + struct task_struct *p)
  20286. +{
  20287. + struct hist_data *my_hist;
  20288. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20289. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20290. + struct maxlatproc_data *mp = NULL;
  20291. +#endif
  20292. +
  20293. + if (!cpu_possible(cpu) || latency_type < 0 ||
  20294. + latency_type >= MAX_LATENCY_TYPE)
  20295. + return;
  20296. +
  20297. + switch (latency_type) {
  20298. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  20299. + case IRQSOFF_LATENCY:
  20300. + my_hist = &per_cpu(irqsoff_hist, cpu);
  20301. + break;
  20302. +#endif
  20303. +#ifdef CONFIG_PREEMPT_OFF_HIST
  20304. + case PREEMPTOFF_LATENCY:
  20305. + my_hist = &per_cpu(preemptoff_hist, cpu);
  20306. + break;
  20307. +#endif
  20308. +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
  20309. + case PREEMPTIRQSOFF_LATENCY:
  20310. + my_hist = &per_cpu(preemptirqsoff_hist, cpu);
  20311. + break;
  20312. +#endif
  20313. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  20314. + case WAKEUP_LATENCY:
  20315. + my_hist = &per_cpu(wakeup_latency_hist, cpu);
  20316. + mp = &per_cpu(wakeup_maxlatproc, cpu);
  20317. + break;
  20318. + case WAKEUP_LATENCY_SHAREDPRIO:
  20319. + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
  20320. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
  20321. + break;
  20322. +#endif
  20323. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  20324. + case MISSED_TIMER_OFFSETS:
  20325. + my_hist = &per_cpu(missed_timer_offsets, cpu);
  20326. + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
  20327. + break;
  20328. +#endif
  20329. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  20330. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20331. + case TIMERANDWAKEUP_LATENCY:
  20332. + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu);
  20333. + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
  20334. + break;
  20335. +#endif
  20336. +
  20337. + default:
  20338. + return;
  20339. + }
  20340. +
  20341. + latency += my_hist->offset;
  20342. +
  20343. + if (atomic_read(&my_hist->hist_mode) == 0)
  20344. + return;
  20345. +
  20346. + if (latency < 0 || latency >= MAX_ENTRY_NUM) {
  20347. + if (latency < 0)
  20348. + my_hist->below_hist_bound_samples++;
  20349. + else
  20350. + my_hist->above_hist_bound_samples++;
  20351. + } else
  20352. + my_hist->hist_array[latency]++;
  20353. +
  20354. + if (unlikely(latency > my_hist->max_lat ||
  20355. + my_hist->min_lat == LONG_MAX)) {
  20356. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20357. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20358. + if (latency_type == WAKEUP_LATENCY ||
  20359. + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
  20360. + latency_type == MISSED_TIMER_OFFSETS ||
  20361. + latency_type == TIMERANDWAKEUP_LATENCY) {
  20362. + strncpy(mp->comm, p->comm, sizeof(mp->comm));
  20363. + strncpy(mp->current_comm, current->comm,
  20364. + sizeof(mp->current_comm));
  20365. + mp->pid = task_pid_nr(p);
  20366. + mp->current_pid = task_pid_nr(current);
  20367. + mp->prio = p->prio;
  20368. + mp->current_prio = current->prio;
  20369. + mp->latency = latency;
  20370. + mp->timeroffset = timeroffset;
  20371. + mp->timestamp = stop;
  20372. + }
  20373. +#endif
  20374. + my_hist->max_lat = latency;
  20375. + }
  20376. + if (unlikely(latency < my_hist->min_lat))
  20377. + my_hist->min_lat = latency;
  20378. + my_hist->total_samples++;
  20379. + my_hist->accumulate_lat += latency;
  20380. +}
  20381. +
  20382. +static void *l_start(struct seq_file *m, loff_t *pos)
  20383. +{
  20384. + loff_t *index_ptr = NULL;
  20385. + loff_t index = *pos;
  20386. + struct hist_data *my_hist = m->private;
  20387. +
  20388. + if (index == 0) {
  20389. + char minstr[32], avgstr[32], maxstr[32];
  20390. +
  20391. + atomic_dec(&my_hist->hist_mode);
  20392. +
  20393. + if (likely(my_hist->total_samples)) {
  20394. + long avg = (long) div64_s64(my_hist->accumulate_lat,
  20395. + my_hist->total_samples);
  20396. + snprintf(minstr, sizeof(minstr), "%ld",
  20397. + my_hist->min_lat - my_hist->offset);
  20398. + snprintf(avgstr, sizeof(avgstr), "%ld",
  20399. + avg - my_hist->offset);
  20400. + snprintf(maxstr, sizeof(maxstr), "%ld",
  20401. + my_hist->max_lat - my_hist->offset);
  20402. + } else {
  20403. + strcpy(minstr, "<undef>");
  20404. + strcpy(avgstr, minstr);
  20405. + strcpy(maxstr, minstr);
  20406. + }
  20407. +
  20408. + seq_printf(m, "#Minimum latency: %s microseconds\n"
  20409. + "#Average latency: %s microseconds\n"
  20410. + "#Maximum latency: %s microseconds\n"
  20411. + "#Total samples: %llu\n"
  20412. + "#There are %llu samples lower than %ld"
  20413. + " microseconds.\n"
  20414. + "#There are %llu samples greater or equal"
  20415. + " than %ld microseconds.\n"
  20416. + "#usecs\t%16s\n",
  20417. + minstr, avgstr, maxstr,
  20418. + my_hist->total_samples,
  20419. + my_hist->below_hist_bound_samples,
  20420. + -my_hist->offset,
  20421. + my_hist->above_hist_bound_samples,
  20422. + MAX_ENTRY_NUM - my_hist->offset,
  20423. + "samples");
  20424. + }
  20425. + if (index < MAX_ENTRY_NUM) {
  20426. + index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL);
  20427. + if (index_ptr)
  20428. + *index_ptr = index;
  20429. + }
  20430. +
  20431. + return index_ptr;
  20432. +}
  20433. +
  20434. +static void *l_next(struct seq_file *m, void *p, loff_t *pos)
  20435. +{
  20436. + loff_t *index_ptr = p;
  20437. + struct hist_data *my_hist = m->private;
  20438. +
  20439. + if (++*pos >= MAX_ENTRY_NUM) {
  20440. + atomic_inc(&my_hist->hist_mode);
  20441. + return NULL;
  20442. + }
  20443. + *index_ptr = *pos;
  20444. + return index_ptr;
  20445. +}
  20446. +
  20447. +static void l_stop(struct seq_file *m, void *p)
  20448. +{
  20449. + kfree(p);
  20450. +}
  20451. +
  20452. +static int l_show(struct seq_file *m, void *p)
  20453. +{
  20454. + int index = *(loff_t *) p;
  20455. + struct hist_data *my_hist = m->private;
  20456. +
  20457. + seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset,
  20458. + my_hist->hist_array[index]);
  20459. + return 0;
  20460. +}
  20461. +
  20462. +static const struct seq_operations latency_hist_seq_op = {
  20463. + .start = l_start,
  20464. + .next = l_next,
  20465. + .stop = l_stop,
  20466. + .show = l_show
  20467. +};
  20468. +
  20469. +static int latency_hist_open(struct inode *inode, struct file *file)
  20470. +{
  20471. + int ret;
  20472. +
  20473. + ret = seq_open(file, &latency_hist_seq_op);
  20474. + if (!ret) {
  20475. + struct seq_file *seq = file->private_data;
  20476. + seq->private = inode->i_private;
  20477. + }
  20478. + return ret;
  20479. +}
  20480. +
  20481. +static const struct file_operations latency_hist_fops = {
  20482. + .open = latency_hist_open,
  20483. + .read = seq_read,
  20484. + .llseek = seq_lseek,
  20485. + .release = seq_release,
  20486. +};
  20487. +
  20488. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20489. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20490. +static void clear_maxlatprocdata(struct maxlatproc_data *mp)
  20491. +{
  20492. + mp->comm[0] = mp->current_comm[0] = '\0';
  20493. + mp->prio = mp->current_prio = mp->pid = mp->current_pid =
  20494. + mp->latency = mp->timeroffset = -1;
  20495. + mp->timestamp = 0;
  20496. +}
  20497. +#endif
  20498. +
  20499. +static void hist_reset(struct hist_data *hist)
  20500. +{
  20501. + atomic_dec(&hist->hist_mode);
  20502. +
  20503. + memset(hist->hist_array, 0, sizeof(hist->hist_array));
  20504. + hist->below_hist_bound_samples = 0ULL;
  20505. + hist->above_hist_bound_samples = 0ULL;
  20506. + hist->min_lat = LONG_MAX;
  20507. + hist->max_lat = LONG_MIN;
  20508. + hist->total_samples = 0ULL;
  20509. + hist->accumulate_lat = 0LL;
  20510. +
  20511. + atomic_inc(&hist->hist_mode);
  20512. +}
  20513. +
  20514. +static ssize_t
  20515. +latency_hist_reset(struct file *file, const char __user *a,
  20516. + size_t size, loff_t *off)
  20517. +{
  20518. + int cpu;
  20519. + struct hist_data *hist = NULL;
  20520. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20521. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20522. + struct maxlatproc_data *mp = NULL;
  20523. +#endif
  20524. + off_t latency_type = (off_t) file->private_data;
  20525. +
  20526. + for_each_online_cpu(cpu) {
  20527. +
  20528. + switch (latency_type) {
  20529. +#ifdef CONFIG_PREEMPT_OFF_HIST
  20530. + case PREEMPTOFF_LATENCY:
  20531. + hist = &per_cpu(preemptoff_hist, cpu);
  20532. + break;
  20533. +#endif
  20534. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  20535. + case IRQSOFF_LATENCY:
  20536. + hist = &per_cpu(irqsoff_hist, cpu);
  20537. + break;
  20538. +#endif
  20539. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  20540. + case PREEMPTIRQSOFF_LATENCY:
  20541. + hist = &per_cpu(preemptirqsoff_hist, cpu);
  20542. + break;
  20543. +#endif
  20544. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  20545. + case WAKEUP_LATENCY:
  20546. + hist = &per_cpu(wakeup_latency_hist, cpu);
  20547. + mp = &per_cpu(wakeup_maxlatproc, cpu);
  20548. + break;
  20549. + case WAKEUP_LATENCY_SHAREDPRIO:
  20550. + hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
  20551. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
  20552. + break;
  20553. +#endif
  20554. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  20555. + case MISSED_TIMER_OFFSETS:
  20556. + hist = &per_cpu(missed_timer_offsets, cpu);
  20557. + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
  20558. + break;
  20559. +#endif
  20560. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  20561. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20562. + case TIMERANDWAKEUP_LATENCY:
  20563. + hist = &per_cpu(timerandwakeup_latency_hist, cpu);
  20564. + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
  20565. + break;
  20566. +#endif
  20567. + }
  20568. +
  20569. + hist_reset(hist);
  20570. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20571. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20572. + if (latency_type == WAKEUP_LATENCY ||
  20573. + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
  20574. + latency_type == MISSED_TIMER_OFFSETS ||
  20575. + latency_type == TIMERANDWAKEUP_LATENCY)
  20576. + clear_maxlatprocdata(mp);
  20577. +#endif
  20578. + }
  20579. +
  20580. + return size;
  20581. +}
  20582. +
  20583. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20584. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20585. +static ssize_t
  20586. +show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  20587. +{
  20588. + char buf[64];
  20589. + int r;
  20590. + unsigned long *this_pid = file->private_data;
  20591. +
  20592. + r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid);
  20593. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  20594. +}
  20595. +
  20596. +static ssize_t do_pid(struct file *file, const char __user *ubuf,
  20597. + size_t cnt, loff_t *ppos)
  20598. +{
  20599. + char buf[64];
  20600. + unsigned long pid;
  20601. + unsigned long *this_pid = file->private_data;
  20602. +
  20603. + if (cnt >= sizeof(buf))
  20604. + return -EINVAL;
  20605. +
  20606. + if (copy_from_user(&buf, ubuf, cnt))
  20607. + return -EFAULT;
  20608. +
  20609. + buf[cnt] = '\0';
  20610. +
  20611. + if (kstrtoul(buf, 10, &pid))
  20612. + return -EINVAL;
  20613. +
  20614. + *this_pid = pid;
  20615. +
  20616. + return cnt;
  20617. +}
  20618. +#endif
  20619. +
  20620. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20621. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20622. +static ssize_t
  20623. +show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  20624. +{
  20625. + int r;
  20626. + struct maxlatproc_data *mp = file->private_data;
  20627. + int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
  20628. + unsigned long long t;
  20629. + unsigned long usecs, secs;
  20630. + char *buf;
  20631. +
  20632. + if (mp->pid == -1 || mp->current_pid == -1) {
  20633. + buf = "(none)\n";
  20634. + return simple_read_from_buffer(ubuf, cnt, ppos, buf,
  20635. + strlen(buf));
  20636. + }
  20637. +
  20638. + buf = kmalloc(strmaxlen, GFP_KERNEL);
  20639. + if (buf == NULL)
  20640. + return -ENOMEM;
  20641. +
  20642. + t = ns2usecs(mp->timestamp);
  20643. + usecs = do_div(t, USEC_PER_SEC);
  20644. + secs = (unsigned long) t;
  20645. + r = snprintf(buf, strmaxlen,
  20646. + "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
  20647. + MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
  20648. + mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
  20649. + secs, usecs);
  20650. + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  20651. + kfree(buf);
  20652. + return r;
  20653. +}
  20654. +#endif
  20655. +
  20656. +static ssize_t
  20657. +show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  20658. +{
  20659. + char buf[64];
  20660. + struct enable_data *ed = file->private_data;
  20661. + int r;
  20662. +
  20663. + r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled);
  20664. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  20665. +}
  20666. +
  20667. +static ssize_t
  20668. +do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
  20669. +{
  20670. + char buf[64];
  20671. + long enable;
  20672. + struct enable_data *ed = file->private_data;
  20673. +
  20674. + if (cnt >= sizeof(buf))
  20675. + return -EINVAL;
  20676. +
  20677. + if (copy_from_user(&buf, ubuf, cnt))
  20678. + return -EFAULT;
  20679. +
  20680. + buf[cnt] = 0;
  20681. +
  20682. + if (kstrtoul(buf, 10, &enable))
  20683. + return -EINVAL;
  20684. +
  20685. + if ((enable && ed->enabled) || (!enable && !ed->enabled))
  20686. + return cnt;
  20687. +
  20688. + if (enable) {
  20689. + int ret;
  20690. +
  20691. + switch (ed->latency_type) {
  20692. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  20693. + case PREEMPTIRQSOFF_LATENCY:
  20694. + ret = register_trace_preemptirqsoff_hist(
  20695. + probe_preemptirqsoff_hist, NULL);
  20696. + if (ret) {
  20697. + pr_info("wakeup trace: Couldn't assign "
  20698. + "probe_preemptirqsoff_hist "
  20699. + "to trace_preemptirqsoff_hist\n");
  20700. + return ret;
  20701. + }
  20702. + break;
  20703. +#endif
  20704. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  20705. + case WAKEUP_LATENCY:
  20706. + ret = register_trace_sched_wakeup(
  20707. + probe_wakeup_latency_hist_start, NULL);
  20708. + if (ret) {
  20709. + pr_info("wakeup trace: Couldn't assign "
  20710. + "probe_wakeup_latency_hist_start "
  20711. + "to trace_sched_wakeup\n");
  20712. + return ret;
  20713. + }
  20714. + ret = register_trace_sched_wakeup_new(
  20715. + probe_wakeup_latency_hist_start, NULL);
  20716. + if (ret) {
  20717. + pr_info("wakeup trace: Couldn't assign "
  20718. + "probe_wakeup_latency_hist_start "
  20719. + "to trace_sched_wakeup_new\n");
  20720. + unregister_trace_sched_wakeup(
  20721. + probe_wakeup_latency_hist_start, NULL);
  20722. + return ret;
  20723. + }
  20724. + ret = register_trace_sched_switch(
  20725. + probe_wakeup_latency_hist_stop, NULL);
  20726. + if (ret) {
  20727. + pr_info("wakeup trace: Couldn't assign "
  20728. + "probe_wakeup_latency_hist_stop "
  20729. + "to trace_sched_switch\n");
  20730. + unregister_trace_sched_wakeup(
  20731. + probe_wakeup_latency_hist_start, NULL);
  20732. + unregister_trace_sched_wakeup_new(
  20733. + probe_wakeup_latency_hist_start, NULL);
  20734. + return ret;
  20735. + }
  20736. + ret = register_trace_sched_migrate_task(
  20737. + probe_sched_migrate_task, NULL);
  20738. + if (ret) {
  20739. + pr_info("wakeup trace: Couldn't assign "
  20740. + "probe_sched_migrate_task "
  20741. + "to trace_sched_migrate_task\n");
  20742. + unregister_trace_sched_wakeup(
  20743. + probe_wakeup_latency_hist_start, NULL);
  20744. + unregister_trace_sched_wakeup_new(
  20745. + probe_wakeup_latency_hist_start, NULL);
  20746. + unregister_trace_sched_switch(
  20747. + probe_wakeup_latency_hist_stop, NULL);
  20748. + return ret;
  20749. + }
  20750. + break;
  20751. +#endif
  20752. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  20753. + case MISSED_TIMER_OFFSETS:
  20754. + ret = register_trace_hrtimer_interrupt(
  20755. + probe_hrtimer_interrupt, NULL);
  20756. + if (ret) {
  20757. + pr_info("wakeup trace: Couldn't assign "
  20758. + "probe_hrtimer_interrupt "
  20759. + "to trace_hrtimer_interrupt\n");
  20760. + return ret;
  20761. + }
  20762. + break;
  20763. +#endif
  20764. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  20765. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20766. + case TIMERANDWAKEUP_LATENCY:
  20767. + if (!wakeup_latency_enabled_data.enabled ||
  20768. + !missed_timer_offsets_enabled_data.enabled)
  20769. + return -EINVAL;
  20770. + break;
  20771. +#endif
  20772. + default:
  20773. + break;
  20774. + }
  20775. + } else {
  20776. + switch (ed->latency_type) {
  20777. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  20778. + case PREEMPTIRQSOFF_LATENCY:
  20779. + {
  20780. + int cpu;
  20781. +
  20782. + unregister_trace_preemptirqsoff_hist(
  20783. + probe_preemptirqsoff_hist, NULL);
  20784. + for_each_online_cpu(cpu) {
  20785. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  20786. + per_cpu(hist_irqsoff_counting,
  20787. + cpu) = 0;
  20788. +#endif
  20789. +#ifdef CONFIG_PREEMPT_OFF_HIST
  20790. + per_cpu(hist_preemptoff_counting,
  20791. + cpu) = 0;
  20792. +#endif
  20793. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  20794. + per_cpu(hist_preemptirqsoff_counting,
  20795. + cpu) = 0;
  20796. +#endif
  20797. + }
  20798. + }
  20799. + break;
  20800. +#endif
  20801. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  20802. + case WAKEUP_LATENCY:
  20803. + {
  20804. + int cpu;
  20805. +
  20806. + unregister_trace_sched_wakeup(
  20807. + probe_wakeup_latency_hist_start, NULL);
  20808. + unregister_trace_sched_wakeup_new(
  20809. + probe_wakeup_latency_hist_start, NULL);
  20810. + unregister_trace_sched_switch(
  20811. + probe_wakeup_latency_hist_stop, NULL);
  20812. + unregister_trace_sched_migrate_task(
  20813. + probe_sched_migrate_task, NULL);
  20814. +
  20815. + for_each_online_cpu(cpu) {
  20816. + per_cpu(wakeup_task, cpu) = NULL;
  20817. + per_cpu(wakeup_sharedprio, cpu) = 0;
  20818. + }
  20819. + }
  20820. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  20821. + timerandwakeup_enabled_data.enabled = 0;
  20822. +#endif
  20823. + break;
  20824. +#endif
  20825. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  20826. + case MISSED_TIMER_OFFSETS:
  20827. + unregister_trace_hrtimer_interrupt(
  20828. + probe_hrtimer_interrupt, NULL);
  20829. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  20830. + timerandwakeup_enabled_data.enabled = 0;
  20831. +#endif
  20832. + break;
  20833. +#endif
  20834. + default:
  20835. + break;
  20836. + }
  20837. + }
  20838. + ed->enabled = enable;
  20839. + return cnt;
  20840. +}
  20841. +
  20842. +static const struct file_operations latency_hist_reset_fops = {
  20843. + .open = tracing_open_generic,
  20844. + .write = latency_hist_reset,
  20845. +};
  20846. +
  20847. +static const struct file_operations enable_fops = {
  20848. + .open = tracing_open_generic,
  20849. + .read = show_enable,
  20850. + .write = do_enable,
  20851. +};
  20852. +
  20853. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  20854. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  20855. +static const struct file_operations pid_fops = {
  20856. + .open = tracing_open_generic,
  20857. + .read = show_pid,
  20858. + .write = do_pid,
  20859. +};
  20860. +
  20861. +static const struct file_operations maxlatproc_fops = {
  20862. + .open = tracing_open_generic,
  20863. + .read = show_maxlatproc,
  20864. +};
  20865. +#endif
  20866. +
  20867. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  20868. +static notrace void probe_preemptirqsoff_hist(void *v, int reason,
  20869. + int starthist)
  20870. +{
  20871. + int cpu = raw_smp_processor_id();
  20872. + int time_set = 0;
  20873. +
  20874. + if (starthist) {
  20875. + cycle_t uninitialized_var(start);
  20876. +
  20877. + if (!preempt_count() && !irqs_disabled())
  20878. + return;
  20879. +
  20880. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  20881. + if ((reason == IRQS_OFF || reason == TRACE_START) &&
  20882. + !per_cpu(hist_irqsoff_counting, cpu)) {
  20883. + per_cpu(hist_irqsoff_counting, cpu) = 1;
  20884. + start = ftrace_now(cpu);
  20885. + time_set++;
  20886. + per_cpu(hist_irqsoff_start, cpu) = start;
  20887. + }
  20888. +#endif
  20889. +
  20890. +#ifdef CONFIG_PREEMPT_OFF_HIST
  20891. + if ((reason == PREEMPT_OFF || reason == TRACE_START) &&
  20892. + !per_cpu(hist_preemptoff_counting, cpu)) {
  20893. + per_cpu(hist_preemptoff_counting, cpu) = 1;
  20894. + if (!(time_set++))
  20895. + start = ftrace_now(cpu);
  20896. + per_cpu(hist_preemptoff_start, cpu) = start;
  20897. + }
  20898. +#endif
  20899. +
  20900. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  20901. + if (per_cpu(hist_irqsoff_counting, cpu) &&
  20902. + per_cpu(hist_preemptoff_counting, cpu) &&
  20903. + !per_cpu(hist_preemptirqsoff_counting, cpu)) {
  20904. + per_cpu(hist_preemptirqsoff_counting, cpu) = 1;
  20905. + if (!time_set)
  20906. + start = ftrace_now(cpu);
  20907. + per_cpu(hist_preemptirqsoff_start, cpu) = start;
  20908. + }
  20909. +#endif
  20910. + } else {
  20911. + cycle_t uninitialized_var(stop);
  20912. +
  20913. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  20914. + if ((reason == IRQS_ON || reason == TRACE_STOP) &&
  20915. + per_cpu(hist_irqsoff_counting, cpu)) {
  20916. + cycle_t start = per_cpu(hist_irqsoff_start, cpu);
  20917. +
  20918. + stop = ftrace_now(cpu);
  20919. + time_set++;
  20920. + if (start) {
  20921. + long latency = ((long) (stop - start)) /
  20922. + NSECS_PER_USECS;
  20923. +
  20924. + latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
  20925. + stop, NULL);
  20926. + }
  20927. + per_cpu(hist_irqsoff_counting, cpu) = 0;
  20928. + }
  20929. +#endif
  20930. +
  20931. +#ifdef CONFIG_PREEMPT_OFF_HIST
  20932. + if ((reason == PREEMPT_ON || reason == TRACE_STOP) &&
  20933. + per_cpu(hist_preemptoff_counting, cpu)) {
  20934. + cycle_t start = per_cpu(hist_preemptoff_start, cpu);
  20935. +
  20936. + if (!(time_set++))
  20937. + stop = ftrace_now(cpu);
  20938. + if (start) {
  20939. + long latency = ((long) (stop - start)) /
  20940. + NSECS_PER_USECS;
  20941. +
  20942. + latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
  20943. + 0, stop, NULL);
  20944. + }
  20945. + per_cpu(hist_preemptoff_counting, cpu) = 0;
  20946. + }
  20947. +#endif
  20948. +
  20949. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  20950. + if ((!per_cpu(hist_irqsoff_counting, cpu) ||
  20951. + !per_cpu(hist_preemptoff_counting, cpu)) &&
  20952. + per_cpu(hist_preemptirqsoff_counting, cpu)) {
  20953. + cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu);
  20954. +
  20955. + if (!time_set)
  20956. + stop = ftrace_now(cpu);
  20957. + if (start) {
  20958. + long latency = ((long) (stop - start)) /
  20959. + NSECS_PER_USECS;
  20960. +
  20961. + latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
  20962. + latency, 0, stop, NULL);
  20963. + }
  20964. + per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
  20965. + }
  20966. +#endif
  20967. + }
  20968. +}
  20969. +#endif
  20970. +
  20971. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  20972. +static DEFINE_RAW_SPINLOCK(wakeup_lock);
  20973. +static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
  20974. + int cpu)
  20975. +{
  20976. + int old_cpu = task_cpu(task);
  20977. +
  20978. + if (cpu != old_cpu) {
  20979. + unsigned long flags;
  20980. + struct task_struct *cpu_wakeup_task;
  20981. +
  20982. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  20983. +
  20984. + cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
  20985. + if (task == cpu_wakeup_task) {
  20986. + put_task_struct(cpu_wakeup_task);
  20987. + per_cpu(wakeup_task, old_cpu) = NULL;
  20988. + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
  20989. + get_task_struct(cpu_wakeup_task);
  20990. + }
  20991. +
  20992. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  20993. + }
  20994. +}
  20995. +
  20996. +static notrace void probe_wakeup_latency_hist_start(void *v,
  20997. + struct task_struct *p)
  20998. +{
  20999. + unsigned long flags;
  21000. + struct task_struct *curr = current;
  21001. + int cpu = task_cpu(p);
  21002. + struct task_struct *cpu_wakeup_task;
  21003. +
  21004. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  21005. +
  21006. + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
  21007. +
  21008. + if (wakeup_pid) {
  21009. + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
  21010. + p->prio == curr->prio)
  21011. + per_cpu(wakeup_sharedprio, cpu) = 1;
  21012. + if (likely(wakeup_pid != task_pid_nr(p)))
  21013. + goto out;
  21014. + } else {
  21015. + if (likely(!rt_task(p)) ||
  21016. + (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) ||
  21017. + p->prio > curr->prio)
  21018. + goto out;
  21019. + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
  21020. + p->prio == curr->prio)
  21021. + per_cpu(wakeup_sharedprio, cpu) = 1;
  21022. + }
  21023. +
  21024. + if (cpu_wakeup_task)
  21025. + put_task_struct(cpu_wakeup_task);
  21026. + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p;
  21027. + get_task_struct(cpu_wakeup_task);
  21028. + cpu_wakeup_task->preempt_timestamp_hist =
  21029. + ftrace_now(raw_smp_processor_id());
  21030. +out:
  21031. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  21032. +}
  21033. +
  21034. +static notrace void probe_wakeup_latency_hist_stop(void *v,
  21035. + bool preempt, struct task_struct *prev, struct task_struct *next)
  21036. +{
  21037. + unsigned long flags;
  21038. + int cpu = task_cpu(next);
  21039. + long latency;
  21040. + cycle_t stop;
  21041. + struct task_struct *cpu_wakeup_task;
  21042. +
  21043. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  21044. +
  21045. + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
  21046. +
  21047. + if (cpu_wakeup_task == NULL)
  21048. + goto out;
  21049. +
  21050. + /* Already running? */
  21051. + if (unlikely(current == cpu_wakeup_task))
  21052. + goto out_reset;
  21053. +
  21054. + if (next != cpu_wakeup_task) {
  21055. + if (next->prio < cpu_wakeup_task->prio)
  21056. + goto out_reset;
  21057. +
  21058. + if (next->prio == cpu_wakeup_task->prio)
  21059. + per_cpu(wakeup_sharedprio, cpu) = 1;
  21060. +
  21061. + goto out;
  21062. + }
  21063. +
  21064. + if (current->prio == cpu_wakeup_task->prio)
  21065. + per_cpu(wakeup_sharedprio, cpu) = 1;
  21066. +
  21067. + /*
  21068. + * The task we are waiting for is about to be switched to.
  21069. + * Calculate latency and store it in histogram.
  21070. + */
  21071. + stop = ftrace_now(raw_smp_processor_id());
  21072. +
  21073. + latency = ((long) (stop - next->preempt_timestamp_hist)) /
  21074. + NSECS_PER_USECS;
  21075. +
  21076. + if (per_cpu(wakeup_sharedprio, cpu)) {
  21077. + latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
  21078. + next);
  21079. + per_cpu(wakeup_sharedprio, cpu) = 0;
  21080. + } else {
  21081. + latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
  21082. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  21083. + if (timerandwakeup_enabled_data.enabled) {
  21084. + latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
  21085. + next->timer_offset + latency, next->timer_offset,
  21086. + stop, next);
  21087. + }
  21088. +#endif
  21089. + }
  21090. +
  21091. +out_reset:
  21092. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  21093. + next->timer_offset = 0;
  21094. +#endif
  21095. + put_task_struct(cpu_wakeup_task);
  21096. + per_cpu(wakeup_task, cpu) = NULL;
  21097. +out:
  21098. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  21099. +}
  21100. +#endif
  21101. +
  21102. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  21103. +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
  21104. + long long latency_ns, struct task_struct *curr,
  21105. + struct task_struct *task)
  21106. +{
  21107. + if (latency_ns <= 0 && task != NULL && rt_task(task) &&
  21108. + (task->prio < curr->prio ||
  21109. + (task->prio == curr->prio &&
  21110. + !cpumask_test_cpu(cpu, &task->cpus_allowed)))) {
  21111. + long latency;
  21112. + cycle_t now;
  21113. +
  21114. + if (missed_timer_offsets_pid) {
  21115. + if (likely(missed_timer_offsets_pid !=
  21116. + task_pid_nr(task)))
  21117. + return;
  21118. + }
  21119. +
  21120. + now = ftrace_now(cpu);
  21121. + latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
  21122. + latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
  21123. + task);
  21124. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  21125. + task->timer_offset = latency;
  21126. +#endif
  21127. + }
  21128. +}
  21129. +#endif
  21130. +
  21131. +static __init int latency_hist_init(void)
  21132. +{
  21133. + struct dentry *latency_hist_root = NULL;
  21134. + struct dentry *dentry;
  21135. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  21136. + struct dentry *dentry_sharedprio;
  21137. +#endif
  21138. + struct dentry *entry;
  21139. + struct dentry *enable_root;
  21140. + int i = 0;
  21141. + struct hist_data *my_hist;
  21142. + char name[64];
  21143. + char *cpufmt = "CPU%d";
  21144. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  21145. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  21146. + char *cpufmt_maxlatproc = "max_latency-CPU%d";
  21147. + struct maxlatproc_data *mp = NULL;
  21148. +#endif
  21149. +
  21150. + dentry = tracing_init_dentry();
  21151. + latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry);
  21152. + enable_root = debugfs_create_dir("enable", latency_hist_root);
  21153. +
  21154. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  21155. + dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root);
  21156. + for_each_possible_cpu(i) {
  21157. + sprintf(name, cpufmt, i);
  21158. + entry = debugfs_create_file(name, 0444, dentry,
  21159. + &per_cpu(irqsoff_hist, i), &latency_hist_fops);
  21160. + my_hist = &per_cpu(irqsoff_hist, i);
  21161. + atomic_set(&my_hist->hist_mode, 1);
  21162. + my_hist->min_lat = LONG_MAX;
  21163. + }
  21164. + entry = debugfs_create_file("reset", 0644, dentry,
  21165. + (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops);
  21166. +#endif
  21167. +
  21168. +#ifdef CONFIG_PREEMPT_OFF_HIST
  21169. + dentry = debugfs_create_dir(preemptoff_hist_dir,
  21170. + latency_hist_root);
  21171. + for_each_possible_cpu(i) {
  21172. + sprintf(name, cpufmt, i);
  21173. + entry = debugfs_create_file(name, 0444, dentry,
  21174. + &per_cpu(preemptoff_hist, i), &latency_hist_fops);
  21175. + my_hist = &per_cpu(preemptoff_hist, i);
  21176. + atomic_set(&my_hist->hist_mode, 1);
  21177. + my_hist->min_lat = LONG_MAX;
  21178. + }
  21179. + entry = debugfs_create_file("reset", 0644, dentry,
  21180. + (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops);
  21181. +#endif
  21182. +
  21183. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  21184. + dentry = debugfs_create_dir(preemptirqsoff_hist_dir,
  21185. + latency_hist_root);
  21186. + for_each_possible_cpu(i) {
  21187. + sprintf(name, cpufmt, i);
  21188. + entry = debugfs_create_file(name, 0444, dentry,
  21189. + &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops);
  21190. + my_hist = &per_cpu(preemptirqsoff_hist, i);
  21191. + atomic_set(&my_hist->hist_mode, 1);
  21192. + my_hist->min_lat = LONG_MAX;
  21193. + }
  21194. + entry = debugfs_create_file("reset", 0644, dentry,
  21195. + (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops);
  21196. +#endif
  21197. +
  21198. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  21199. + entry = debugfs_create_file("preemptirqsoff", 0644,
  21200. + enable_root, (void *)&preemptirqsoff_enabled_data,
  21201. + &enable_fops);
  21202. +#endif
  21203. +
  21204. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  21205. + dentry = debugfs_create_dir(wakeup_latency_hist_dir,
  21206. + latency_hist_root);
  21207. + dentry_sharedprio = debugfs_create_dir(
  21208. + wakeup_latency_hist_dir_sharedprio, dentry);
  21209. + for_each_possible_cpu(i) {
  21210. + sprintf(name, cpufmt, i);
  21211. +
  21212. + entry = debugfs_create_file(name, 0444, dentry,
  21213. + &per_cpu(wakeup_latency_hist, i),
  21214. + &latency_hist_fops);
  21215. + my_hist = &per_cpu(wakeup_latency_hist, i);
  21216. + atomic_set(&my_hist->hist_mode, 1);
  21217. + my_hist->min_lat = LONG_MAX;
  21218. +
  21219. + entry = debugfs_create_file(name, 0444, dentry_sharedprio,
  21220. + &per_cpu(wakeup_latency_hist_sharedprio, i),
  21221. + &latency_hist_fops);
  21222. + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i);
  21223. + atomic_set(&my_hist->hist_mode, 1);
  21224. + my_hist->min_lat = LONG_MAX;
  21225. +
  21226. + sprintf(name, cpufmt_maxlatproc, i);
  21227. +
  21228. + mp = &per_cpu(wakeup_maxlatproc, i);
  21229. + entry = debugfs_create_file(name, 0444, dentry, mp,
  21230. + &maxlatproc_fops);
  21231. + clear_maxlatprocdata(mp);
  21232. +
  21233. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, i);
  21234. + entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp,
  21235. + &maxlatproc_fops);
  21236. + clear_maxlatprocdata(mp);
  21237. + }
  21238. + entry = debugfs_create_file("pid", 0644, dentry,
  21239. + (void *)&wakeup_pid, &pid_fops);
  21240. + entry = debugfs_create_file("reset", 0644, dentry,
  21241. + (void *)WAKEUP_LATENCY, &latency_hist_reset_fops);
  21242. + entry = debugfs_create_file("reset", 0644, dentry_sharedprio,
  21243. + (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops);
  21244. + entry = debugfs_create_file("wakeup", 0644,
  21245. + enable_root, (void *)&wakeup_latency_enabled_data,
  21246. + &enable_fops);
  21247. +#endif
  21248. +
  21249. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  21250. + dentry = debugfs_create_dir(missed_timer_offsets_dir,
  21251. + latency_hist_root);
  21252. + for_each_possible_cpu(i) {
  21253. + sprintf(name, cpufmt, i);
  21254. + entry = debugfs_create_file(name, 0444, dentry,
  21255. + &per_cpu(missed_timer_offsets, i), &latency_hist_fops);
  21256. + my_hist = &per_cpu(missed_timer_offsets, i);
  21257. + atomic_set(&my_hist->hist_mode, 1);
  21258. + my_hist->min_lat = LONG_MAX;
  21259. +
  21260. + sprintf(name, cpufmt_maxlatproc, i);
  21261. + mp = &per_cpu(missed_timer_offsets_maxlatproc, i);
  21262. + entry = debugfs_create_file(name, 0444, dentry, mp,
  21263. + &maxlatproc_fops);
  21264. + clear_maxlatprocdata(mp);
  21265. + }
  21266. + entry = debugfs_create_file("pid", 0644, dentry,
  21267. + (void *)&missed_timer_offsets_pid, &pid_fops);
  21268. + entry = debugfs_create_file("reset", 0644, dentry,
  21269. + (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops);
  21270. + entry = debugfs_create_file("missed_timer_offsets", 0644,
  21271. + enable_root, (void *)&missed_timer_offsets_enabled_data,
  21272. + &enable_fops);
  21273. +#endif
  21274. +
  21275. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  21276. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  21277. + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
  21278. + latency_hist_root);
  21279. + for_each_possible_cpu(i) {
  21280. + sprintf(name, cpufmt, i);
  21281. + entry = debugfs_create_file(name, 0444, dentry,
  21282. + &per_cpu(timerandwakeup_latency_hist, i),
  21283. + &latency_hist_fops);
  21284. + my_hist = &per_cpu(timerandwakeup_latency_hist, i);
  21285. + atomic_set(&my_hist->hist_mode, 1);
  21286. + my_hist->min_lat = LONG_MAX;
  21287. +
  21288. + sprintf(name, cpufmt_maxlatproc, i);
  21289. + mp = &per_cpu(timerandwakeup_maxlatproc, i);
  21290. + entry = debugfs_create_file(name, 0444, dentry, mp,
  21291. + &maxlatproc_fops);
  21292. + clear_maxlatprocdata(mp);
  21293. + }
  21294. + entry = debugfs_create_file("reset", 0644, dentry,
  21295. + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops);
  21296. + entry = debugfs_create_file("timerandwakeup", 0644,
  21297. + enable_root, (void *)&timerandwakeup_enabled_data,
  21298. + &enable_fops);
  21299. +#endif
  21300. + return 0;
  21301. +}
  21302. +
  21303. +device_initcall(latency_hist_init);
  21304. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
  21305. index a2f0b9f33e9b..3ffea48d795c 100644
  21306. --- a/kernel/trace/trace.c
  21307. +++ b/kernel/trace/trace.c
  21308. @@ -1657,6 +1657,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
  21309. struct task_struct *tsk = current;
  21310. entry->preempt_count = pc & 0xff;
  21311. + entry->preempt_lazy_count = preempt_lazy_count();
  21312. entry->pid = (tsk) ? tsk->pid : 0;
  21313. entry->flags =
  21314. #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
  21315. @@ -1667,8 +1668,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
  21316. ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
  21317. ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
  21318. ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
  21319. - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
  21320. + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) |
  21321. + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) |
  21322. (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
  21323. +
  21324. + entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
  21325. }
  21326. EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
  21327. @@ -2561,14 +2565,17 @@ get_total_entries(struct trace_buffer *buf,
  21328. static void print_lat_help_header(struct seq_file *m)
  21329. {
  21330. - seq_puts(m, "# _------=> CPU# \n"
  21331. - "# / _-----=> irqs-off \n"
  21332. - "# | / _----=> need-resched \n"
  21333. - "# || / _---=> hardirq/softirq \n"
  21334. - "# ||| / _--=> preempt-depth \n"
  21335. - "# |||| / delay \n"
  21336. - "# cmd pid ||||| time | caller \n"
  21337. - "# \\ / ||||| \\ | / \n");
  21338. + seq_puts(m, "# _--------=> CPU# \n"
  21339. + "# / _-------=> irqs-off \n"
  21340. + "# | / _------=> need-resched \n"
  21341. + "# || / _-----=> need-resched_lazy \n"
  21342. + "# ||| / _----=> hardirq/softirq \n"
  21343. + "# |||| / _---=> preempt-depth \n"
  21344. + "# ||||| / _--=> preempt-lazy-depth\n"
  21345. + "# |||||| / _-=> migrate-disable \n"
  21346. + "# ||||||| / delay \n"
  21347. + "# cmd pid |||||||| time | caller \n"
  21348. + "# \\ / |||||||| \\ | / \n");
  21349. }
  21350. static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
  21351. @@ -2594,11 +2601,14 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file
  21352. print_event_info(buf, m);
  21353. seq_puts(m, "# _-----=> irqs-off\n"
  21354. "# / _----=> need-resched\n"
  21355. - "# | / _---=> hardirq/softirq\n"
  21356. - "# || / _--=> preempt-depth\n"
  21357. - "# ||| / delay\n"
  21358. - "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
  21359. - "# | | | |||| | |\n");
  21360. + "# |/ _-----=> need-resched_lazy\n"
  21361. + "# || / _---=> hardirq/softirq\n"
  21362. + "# ||| / _--=> preempt-depth\n"
  21363. + "# |||| /_--=> preempt-lazy-depth\n"
  21364. + "# ||||| _-=> migrate-disable \n"
  21365. + "# ||||| / delay\n"
  21366. + "# TASK-PID CPU# |||||| TIMESTAMP FUNCTION\n"
  21367. + "# | | | |||||| | |\n");
  21368. }
  21369. void
  21370. diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
  21371. index 3fff4adfd431..acb00bc2b0e0 100644
  21372. --- a/kernel/trace/trace.h
  21373. +++ b/kernel/trace/trace.h
  21374. @@ -117,6 +117,7 @@ struct kretprobe_trace_entry_head {
  21375. * NEED_RESCHED - reschedule is requested
  21376. * HARDIRQ - inside an interrupt handler
  21377. * SOFTIRQ - inside a softirq handler
  21378. + * NEED_RESCHED_LAZY - lazy reschedule is requested
  21379. */
  21380. enum trace_flag_type {
  21381. TRACE_FLAG_IRQS_OFF = 0x01,
  21382. @@ -126,6 +127,7 @@ enum trace_flag_type {
  21383. TRACE_FLAG_SOFTIRQ = 0x10,
  21384. TRACE_FLAG_PREEMPT_RESCHED = 0x20,
  21385. TRACE_FLAG_NMI = 0x40,
  21386. + TRACE_FLAG_NEED_RESCHED_LAZY = 0x80,
  21387. };
  21388. #define TRACE_BUF_SIZE 1024
  21389. diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
  21390. index 6f965864cc02..72a4cf8dbef5 100644
  21391. --- a/kernel/trace/trace_events.c
  21392. +++ b/kernel/trace/trace_events.c
  21393. @@ -188,6 +188,8 @@ static int trace_define_common_fields(void)
  21394. __common_field(unsigned char, flags);
  21395. __common_field(unsigned char, preempt_count);
  21396. __common_field(int, pid);
  21397. + __common_field(unsigned short, migrate_disable);
  21398. + __common_field(unsigned short, padding);
  21399. return ret;
  21400. }
  21401. @@ -244,6 +246,14 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
  21402. local_save_flags(fbuffer->flags);
  21403. fbuffer->pc = preempt_count();
  21404. + /*
  21405. + * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables
  21406. + * preemption (adding one to the preempt_count). Since we are
  21407. + * interested in the preempt_count at the time the tracepoint was
  21408. + * hit, we need to subtract one to offset the increment.
  21409. + */
  21410. + if (IS_ENABLED(CONFIG_PREEMPT))
  21411. + fbuffer->pc--;
  21412. fbuffer->trace_file = trace_file;
  21413. fbuffer->event =
  21414. diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
  21415. index 03cdff84d026..940bd10b4406 100644
  21416. --- a/kernel/trace/trace_irqsoff.c
  21417. +++ b/kernel/trace/trace_irqsoff.c
  21418. @@ -13,6 +13,7 @@
  21419. #include <linux/uaccess.h>
  21420. #include <linux/module.h>
  21421. #include <linux/ftrace.h>
  21422. +#include <trace/events/hist.h>
  21423. #include "trace.h"
  21424. @@ -424,11 +425,13 @@ void start_critical_timings(void)
  21425. {
  21426. if (preempt_trace() || irq_trace())
  21427. start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  21428. + trace_preemptirqsoff_hist_rcuidle(TRACE_START, 1);
  21429. }
  21430. EXPORT_SYMBOL_GPL(start_critical_timings);
  21431. void stop_critical_timings(void)
  21432. {
  21433. + trace_preemptirqsoff_hist_rcuidle(TRACE_STOP, 0);
  21434. if (preempt_trace() || irq_trace())
  21435. stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  21436. }
  21437. @@ -438,6 +441,7 @@ EXPORT_SYMBOL_GPL(stop_critical_timings);
  21438. #ifdef CONFIG_PROVE_LOCKING
  21439. void time_hardirqs_on(unsigned long a0, unsigned long a1)
  21440. {
  21441. + trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0);
  21442. if (!preempt_trace() && irq_trace())
  21443. stop_critical_timing(a0, a1);
  21444. }
  21445. @@ -446,6 +450,7 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1)
  21446. {
  21447. if (!preempt_trace() && irq_trace())
  21448. start_critical_timing(a0, a1);
  21449. + trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1);
  21450. }
  21451. #else /* !CONFIG_PROVE_LOCKING */
  21452. @@ -471,6 +476,7 @@ inline void print_irqtrace_events(struct task_struct *curr)
  21453. */
  21454. void trace_hardirqs_on(void)
  21455. {
  21456. + trace_preemptirqsoff_hist(IRQS_ON, 0);
  21457. if (!preempt_trace() && irq_trace())
  21458. stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  21459. }
  21460. @@ -480,11 +486,13 @@ void trace_hardirqs_off(void)
  21461. {
  21462. if (!preempt_trace() && irq_trace())
  21463. start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  21464. + trace_preemptirqsoff_hist(IRQS_OFF, 1);
  21465. }
  21466. EXPORT_SYMBOL(trace_hardirqs_off);
  21467. __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
  21468. {
  21469. + trace_preemptirqsoff_hist(IRQS_ON, 0);
  21470. if (!preempt_trace() && irq_trace())
  21471. stop_critical_timing(CALLER_ADDR0, caller_addr);
  21472. }
  21473. @@ -494,6 +502,7 @@ __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
  21474. {
  21475. if (!preempt_trace() && irq_trace())
  21476. start_critical_timing(CALLER_ADDR0, caller_addr);
  21477. + trace_preemptirqsoff_hist(IRQS_OFF, 1);
  21478. }
  21479. EXPORT_SYMBOL(trace_hardirqs_off_caller);
  21480. @@ -503,12 +512,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
  21481. #ifdef CONFIG_PREEMPT_TRACER
  21482. void trace_preempt_on(unsigned long a0, unsigned long a1)
  21483. {
  21484. + trace_preemptirqsoff_hist(PREEMPT_ON, 0);
  21485. if (preempt_trace() && !irq_trace())
  21486. stop_critical_timing(a0, a1);
  21487. }
  21488. void trace_preempt_off(unsigned long a0, unsigned long a1)
  21489. {
  21490. + trace_preemptirqsoff_hist(PREEMPT_ON, 1);
  21491. if (preempt_trace() && !irq_trace())
  21492. start_critical_timing(a0, a1);
  21493. }
  21494. diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
  21495. index 0bb9cf2d53e6..455a7464772f 100644
  21496. --- a/kernel/trace/trace_output.c
  21497. +++ b/kernel/trace/trace_output.c
  21498. @@ -386,6 +386,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
  21499. {
  21500. char hardsoft_irq;
  21501. char need_resched;
  21502. + char need_resched_lazy;
  21503. char irqs_off;
  21504. int hardirq;
  21505. int softirq;
  21506. @@ -416,6 +417,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
  21507. break;
  21508. }
  21509. + need_resched_lazy =
  21510. + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
  21511. +
  21512. hardsoft_irq =
  21513. (nmi && hardirq) ? 'Z' :
  21514. nmi ? 'z' :
  21515. @@ -424,14 +428,25 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
  21516. softirq ? 's' :
  21517. '.' ;
  21518. - trace_seq_printf(s, "%c%c%c",
  21519. - irqs_off, need_resched, hardsoft_irq);
  21520. + trace_seq_printf(s, "%c%c%c%c",
  21521. + irqs_off, need_resched, need_resched_lazy,
  21522. + hardsoft_irq);
  21523. if (entry->preempt_count)
  21524. trace_seq_printf(s, "%x", entry->preempt_count);
  21525. else
  21526. trace_seq_putc(s, '.');
  21527. + if (entry->preempt_lazy_count)
  21528. + trace_seq_printf(s, "%x", entry->preempt_lazy_count);
  21529. + else
  21530. + trace_seq_putc(s, '.');
  21531. +
  21532. + if (entry->migrate_disable)
  21533. + trace_seq_printf(s, "%x", entry->migrate_disable);
  21534. + else
  21535. + trace_seq_putc(s, '.');
  21536. +
  21537. return !trace_seq_has_overflowed(s);
  21538. }
  21539. diff --git a/kernel/user.c b/kernel/user.c
  21540. index b069ccbfb0b0..1a2e88e98b5e 100644
  21541. --- a/kernel/user.c
  21542. +++ b/kernel/user.c
  21543. @@ -161,11 +161,11 @@ void free_uid(struct user_struct *up)
  21544. if (!up)
  21545. return;
  21546. - local_irq_save(flags);
  21547. + local_irq_save_nort(flags);
  21548. if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
  21549. free_user(up, flags);
  21550. else
  21551. - local_irq_restore(flags);
  21552. + local_irq_restore_nort(flags);
  21553. }
  21554. struct user_struct *alloc_uid(kuid_t uid)
  21555. diff --git a/kernel/watchdog.c b/kernel/watchdog.c
  21556. index 9acb29f280ec..caba62080411 100644
  21557. --- a/kernel/watchdog.c
  21558. +++ b/kernel/watchdog.c
  21559. @@ -315,6 +315,8 @@ static int is_softlockup(unsigned long touch_ts)
  21560. #ifdef CONFIG_HARDLOCKUP_DETECTOR
  21561. +static DEFINE_RAW_SPINLOCK(watchdog_output_lock);
  21562. +
  21563. static struct perf_event_attr wd_hw_attr = {
  21564. .type = PERF_TYPE_HARDWARE,
  21565. .config = PERF_COUNT_HW_CPU_CYCLES,
  21566. @@ -349,6 +351,13 @@ static void watchdog_overflow_callback(struct perf_event *event,
  21567. /* only print hardlockups once */
  21568. if (__this_cpu_read(hard_watchdog_warn) == true)
  21569. return;
  21570. + /*
  21571. + * If early-printk is enabled then make sure we do not
  21572. + * lock up in printk() and kill console logging:
  21573. + */
  21574. + printk_kill();
  21575. +
  21576. + raw_spin_lock(&watchdog_output_lock);
  21577. pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
  21578. print_modules();
  21579. @@ -366,6 +375,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
  21580. !test_and_set_bit(0, &hardlockup_allcpu_dumped))
  21581. trigger_allbutself_cpu_backtrace();
  21582. + raw_spin_unlock(&watchdog_output_lock);
  21583. if (hardlockup_panic)
  21584. nmi_panic(regs, "Hard LOCKUP");
  21585. @@ -513,6 +523,7 @@ static void watchdog_enable(unsigned int cpu)
  21586. /* kick off the timer for the hardlockup detector */
  21587. hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  21588. hrtimer->function = watchdog_timer_fn;
  21589. + hrtimer->irqsafe = 1;
  21590. /* Enable the perf event */
  21591. watchdog_nmi_enable(cpu);
  21592. diff --git a/kernel/workqueue.c b/kernel/workqueue.c
  21593. index 5f5068e94003..54fd5dfb05b1 100644
  21594. --- a/kernel/workqueue.c
  21595. +++ b/kernel/workqueue.c
  21596. @@ -48,6 +48,8 @@
  21597. #include <linux/nodemask.h>
  21598. #include <linux/moduleparam.h>
  21599. #include <linux/uaccess.h>
  21600. +#include <linux/locallock.h>
  21601. +#include <linux/delay.h>
  21602. #include "workqueue_internal.h"
  21603. @@ -121,11 +123,16 @@ enum {
  21604. * cpu or grabbing pool->lock is enough for read access. If
  21605. * POOL_DISASSOCIATED is set, it's identical to L.
  21606. *
  21607. + * On RT we need the extra protection via rt_lock_idle_list() for
  21608. + * the list manipulations against read access from
  21609. + * wq_worker_sleeping(). All other places are nicely serialized via
  21610. + * pool->lock.
  21611. + *
  21612. * A: pool->attach_mutex protected.
  21613. *
  21614. * PL: wq_pool_mutex protected.
  21615. *
  21616. - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
  21617. + * PR: wq_pool_mutex protected for writes. RCU protected for reads.
  21618. *
  21619. * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
  21620. *
  21621. @@ -134,7 +141,7 @@ enum {
  21622. *
  21623. * WQ: wq->mutex protected.
  21624. *
  21625. - * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
  21626. + * WR: wq->mutex protected for writes. RCU protected for reads.
  21627. *
  21628. * MD: wq_mayday_lock protected.
  21629. */
  21630. @@ -185,7 +192,7 @@ struct worker_pool {
  21631. atomic_t nr_running ____cacheline_aligned_in_smp;
  21632. /*
  21633. - * Destruction of pool is sched-RCU protected to allow dereferences
  21634. + * Destruction of pool is RCU protected to allow dereferences
  21635. * from get_work_pool().
  21636. */
  21637. struct rcu_head rcu;
  21638. @@ -214,7 +221,7 @@ struct pool_workqueue {
  21639. /*
  21640. * Release of unbound pwq is punted to system_wq. See put_pwq()
  21641. * and pwq_unbound_release_workfn() for details. pool_workqueue
  21642. - * itself is also sched-RCU protected so that the first pwq can be
  21643. + * itself is also RCU protected so that the first pwq can be
  21644. * determined without grabbing wq->mutex.
  21645. */
  21646. struct work_struct unbound_release_work;
  21647. @@ -348,6 +355,8 @@ EXPORT_SYMBOL_GPL(system_power_efficient_wq);
  21648. struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
  21649. EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
  21650. +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock);
  21651. +
  21652. static int worker_thread(void *__worker);
  21653. static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  21654. @@ -355,20 +364,20 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  21655. #include <trace/events/workqueue.h>
  21656. #define assert_rcu_or_pool_mutex() \
  21657. - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
  21658. + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
  21659. !lockdep_is_held(&wq_pool_mutex), \
  21660. - "sched RCU or wq_pool_mutex should be held")
  21661. + "RCU or wq_pool_mutex should be held")
  21662. #define assert_rcu_or_wq_mutex(wq) \
  21663. - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
  21664. + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
  21665. !lockdep_is_held(&wq->mutex), \
  21666. - "sched RCU or wq->mutex should be held")
  21667. + "RCU or wq->mutex should be held")
  21668. #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
  21669. - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
  21670. + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
  21671. !lockdep_is_held(&wq->mutex) && \
  21672. !lockdep_is_held(&wq_pool_mutex), \
  21673. - "sched RCU, wq->mutex or wq_pool_mutex should be held")
  21674. + "RCU, wq->mutex or wq_pool_mutex should be held")
  21675. #define for_each_cpu_worker_pool(pool, cpu) \
  21676. for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
  21677. @@ -380,7 +389,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  21678. * @pool: iteration cursor
  21679. * @pi: integer used for iteration
  21680. *
  21681. - * This must be called either with wq_pool_mutex held or sched RCU read
  21682. + * This must be called either with wq_pool_mutex held or RCU read
  21683. * locked. If the pool needs to be used beyond the locking in effect, the
  21684. * caller is responsible for guaranteeing that the pool stays online.
  21685. *
  21686. @@ -412,7 +421,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  21687. * @pwq: iteration cursor
  21688. * @wq: the target workqueue
  21689. *
  21690. - * This must be called either with wq->mutex held or sched RCU read locked.
  21691. + * This must be called either with wq->mutex held or RCU read locked.
  21692. * If the pwq needs to be used beyond the locking in effect, the caller is
  21693. * responsible for guaranteeing that the pwq stays online.
  21694. *
  21695. @@ -424,6 +433,31 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  21696. if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
  21697. else
  21698. +#ifdef CONFIG_PREEMPT_RT_BASE
  21699. +static inline void rt_lock_idle_list(struct worker_pool *pool)
  21700. +{
  21701. + preempt_disable();
  21702. +}
  21703. +static inline void rt_unlock_idle_list(struct worker_pool *pool)
  21704. +{
  21705. + preempt_enable();
  21706. +}
  21707. +static inline void sched_lock_idle_list(struct worker_pool *pool) { }
  21708. +static inline void sched_unlock_idle_list(struct worker_pool *pool) { }
  21709. +#else
  21710. +static inline void rt_lock_idle_list(struct worker_pool *pool) { }
  21711. +static inline void rt_unlock_idle_list(struct worker_pool *pool) { }
  21712. +static inline void sched_lock_idle_list(struct worker_pool *pool)
  21713. +{
  21714. + spin_lock_irq(&pool->lock);
  21715. +}
  21716. +static inline void sched_unlock_idle_list(struct worker_pool *pool)
  21717. +{
  21718. + spin_unlock_irq(&pool->lock);
  21719. +}
  21720. +#endif
  21721. +
  21722. +
  21723. #ifdef CONFIG_DEBUG_OBJECTS_WORK
  21724. static struct debug_obj_descr work_debug_descr;
  21725. @@ -574,7 +608,7 @@ static int worker_pool_assign_id(struct worker_pool *pool)
  21726. * @wq: the target workqueue
  21727. * @node: the node ID
  21728. *
  21729. - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
  21730. + * This must be called with any of wq_pool_mutex, wq->mutex or RCU
  21731. * read locked.
  21732. * If the pwq needs to be used beyond the locking in effect, the caller is
  21733. * responsible for guaranteeing that the pwq stays online.
  21734. @@ -718,8 +752,8 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
  21735. * @work: the work item of interest
  21736. *
  21737. * Pools are created and destroyed under wq_pool_mutex, and allows read
  21738. - * access under sched-RCU read lock. As such, this function should be
  21739. - * called under wq_pool_mutex or with preemption disabled.
  21740. + * access under RCU read lock. As such, this function should be
  21741. + * called under wq_pool_mutex or inside of a rcu_read_lock() region.
  21742. *
  21743. * All fields of the returned pool are accessible as long as the above
  21744. * mentioned locking is in effect. If the returned pool needs to be used
  21745. @@ -856,50 +890,45 @@ static struct worker *first_idle_worker(struct worker_pool *pool)
  21746. */
  21747. static void wake_up_worker(struct worker_pool *pool)
  21748. {
  21749. - struct worker *worker = first_idle_worker(pool);
  21750. + struct worker *worker;
  21751. +
  21752. + rt_lock_idle_list(pool);
  21753. +
  21754. + worker = first_idle_worker(pool);
  21755. if (likely(worker))
  21756. wake_up_process(worker->task);
  21757. +
  21758. + rt_unlock_idle_list(pool);
  21759. }
  21760. /**
  21761. - * wq_worker_waking_up - a worker is waking up
  21762. - * @task: task waking up
  21763. + * wq_worker_running - a worker is running again
  21764. * @cpu: CPU @task is waking up to
  21765. *
  21766. - * This function is called during try_to_wake_up() when a worker is
  21767. - * being awoken.
  21768. - *
  21769. - * CONTEXT:
  21770. - * spin_lock_irq(rq->lock)
  21771. + * This function is called when a worker returns from schedule()
  21772. */
  21773. -void wq_worker_waking_up(struct task_struct *task, int cpu)
  21774. +void wq_worker_running(struct task_struct *task)
  21775. {
  21776. struct worker *worker = kthread_data(task);
  21777. - if (!(worker->flags & WORKER_NOT_RUNNING)) {
  21778. - WARN_ON_ONCE(worker->pool->cpu != cpu);
  21779. + if (!worker->sleeping)
  21780. + return;
  21781. + if (!(worker->flags & WORKER_NOT_RUNNING))
  21782. atomic_inc(&worker->pool->nr_running);
  21783. - }
  21784. + worker->sleeping = 0;
  21785. }
  21786. /**
  21787. * wq_worker_sleeping - a worker is going to sleep
  21788. * @task: task going to sleep
  21789. *
  21790. - * This function is called during schedule() when a busy worker is
  21791. - * going to sleep. Worker on the same cpu can be woken up by
  21792. - * returning pointer to its task.
  21793. - *
  21794. - * CONTEXT:
  21795. - * spin_lock_irq(rq->lock)
  21796. - *
  21797. - * Return:
  21798. - * Worker task on @cpu to wake up, %NULL if none.
  21799. + * This function is called from schedule() when a busy worker is
  21800. + * going to sleep.
  21801. */
  21802. -struct task_struct *wq_worker_sleeping(struct task_struct *task)
  21803. +void wq_worker_sleeping(struct task_struct *task)
  21804. {
  21805. - struct worker *worker = kthread_data(task), *to_wakeup = NULL;
  21806. + struct worker *worker = kthread_data(task);
  21807. struct worker_pool *pool;
  21808. /*
  21809. @@ -908,29 +937,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
  21810. * checking NOT_RUNNING.
  21811. */
  21812. if (worker->flags & WORKER_NOT_RUNNING)
  21813. - return NULL;
  21814. + return;
  21815. pool = worker->pool;
  21816. - /* this can only happen on the local cpu */
  21817. - if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
  21818. - return NULL;
  21819. + if (WARN_ON_ONCE(worker->sleeping))
  21820. + return;
  21821. +
  21822. + worker->sleeping = 1;
  21823. /*
  21824. * The counterpart of the following dec_and_test, implied mb,
  21825. * worklist not empty test sequence is in insert_work().
  21826. * Please read comment there.
  21827. - *
  21828. - * NOT_RUNNING is clear. This means that we're bound to and
  21829. - * running on the local cpu w/ rq lock held and preemption
  21830. - * disabled, which in turn means that none else could be
  21831. - * manipulating idle_list, so dereferencing idle_list without pool
  21832. - * lock is safe.
  21833. */
  21834. if (atomic_dec_and_test(&pool->nr_running) &&
  21835. - !list_empty(&pool->worklist))
  21836. - to_wakeup = first_idle_worker(pool);
  21837. - return to_wakeup ? to_wakeup->task : NULL;
  21838. + !list_empty(&pool->worklist)) {
  21839. + sched_lock_idle_list(pool);
  21840. + wake_up_worker(pool);
  21841. + sched_unlock_idle_list(pool);
  21842. + }
  21843. }
  21844. /**
  21845. @@ -1124,12 +1150,12 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)
  21846. {
  21847. if (pwq) {
  21848. /*
  21849. - * As both pwqs and pools are sched-RCU protected, the
  21850. + * As both pwqs and pools are RCU protected, the
  21851. * following lock operations are safe.
  21852. */
  21853. - spin_lock_irq(&pwq->pool->lock);
  21854. + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock);
  21855. put_pwq(pwq);
  21856. - spin_unlock_irq(&pwq->pool->lock);
  21857. + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock);
  21858. }
  21859. }
  21860. @@ -1233,7 +1259,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
  21861. struct worker_pool *pool;
  21862. struct pool_workqueue *pwq;
  21863. - local_irq_save(*flags);
  21864. + local_lock_irqsave(pendingb_lock, *flags);
  21865. /* try to steal the timer if it exists */
  21866. if (is_dwork) {
  21867. @@ -1252,6 +1278,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
  21868. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
  21869. return 0;
  21870. + rcu_read_lock();
  21871. /*
  21872. * The queueing is in progress, or it is already queued. Try to
  21873. * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
  21874. @@ -1290,14 +1317,16 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
  21875. set_work_pool_and_keep_pending(work, pool->id);
  21876. spin_unlock(&pool->lock);
  21877. + rcu_read_unlock();
  21878. return 1;
  21879. }
  21880. spin_unlock(&pool->lock);
  21881. fail:
  21882. - local_irq_restore(*flags);
  21883. + rcu_read_unlock();
  21884. + local_unlock_irqrestore(pendingb_lock, *flags);
  21885. if (work_is_canceling(work))
  21886. return -ENOENT;
  21887. - cpu_relax();
  21888. + cpu_chill();
  21889. return -EAGAIN;
  21890. }
  21891. @@ -1399,7 +1428,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
  21892. * queued or lose PENDING. Grabbing PENDING and queueing should
  21893. * happen with IRQ disabled.
  21894. */
  21895. - WARN_ON_ONCE(!irqs_disabled());
  21896. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  21897. debug_work_activate(work);
  21898. @@ -1407,6 +1436,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
  21899. if (unlikely(wq->flags & __WQ_DRAINING) &&
  21900. WARN_ON_ONCE(!is_chained_work(wq)))
  21901. return;
  21902. + rcu_read_lock();
  21903. retry:
  21904. if (req_cpu == WORK_CPU_UNBOUND)
  21905. cpu = wq_select_unbound_cpu(raw_smp_processor_id());
  21906. @@ -1463,10 +1493,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
  21907. /* pwq determined, queue */
  21908. trace_workqueue_queue_work(req_cpu, pwq, work);
  21909. - if (WARN_ON(!list_empty(&work->entry))) {
  21910. - spin_unlock(&pwq->pool->lock);
  21911. - return;
  21912. - }
  21913. + if (WARN_ON(!list_empty(&work->entry)))
  21914. + goto out;
  21915. pwq->nr_in_flight[pwq->work_color]++;
  21916. work_flags = work_color_to_flags(pwq->work_color);
  21917. @@ -1484,7 +1512,9 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
  21918. insert_work(pwq, work, worklist, work_flags);
  21919. +out:
  21920. spin_unlock(&pwq->pool->lock);
  21921. + rcu_read_unlock();
  21922. }
  21923. /**
  21924. @@ -1504,14 +1534,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
  21925. bool ret = false;
  21926. unsigned long flags;
  21927. - local_irq_save(flags);
  21928. + local_lock_irqsave(pendingb_lock,flags);
  21929. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
  21930. __queue_work(cpu, wq, work);
  21931. ret = true;
  21932. }
  21933. - local_irq_restore(flags);
  21934. + local_unlock_irqrestore(pendingb_lock, flags);
  21935. return ret;
  21936. }
  21937. EXPORT_SYMBOL(queue_work_on);
  21938. @@ -1578,14 +1608,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
  21939. unsigned long flags;
  21940. /* read the comment in __queue_work() */
  21941. - local_irq_save(flags);
  21942. + local_lock_irqsave(pendingb_lock, flags);
  21943. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
  21944. __queue_delayed_work(cpu, wq, dwork, delay);
  21945. ret = true;
  21946. }
  21947. - local_irq_restore(flags);
  21948. + local_unlock_irqrestore(pendingb_lock, flags);
  21949. return ret;
  21950. }
  21951. EXPORT_SYMBOL(queue_delayed_work_on);
  21952. @@ -1620,7 +1650,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
  21953. if (likely(ret >= 0)) {
  21954. __queue_delayed_work(cpu, wq, dwork, delay);
  21955. - local_irq_restore(flags);
  21956. + local_unlock_irqrestore(pendingb_lock, flags);
  21957. }
  21958. /* -ENOENT from try_to_grab_pending() becomes %true */
  21959. @@ -1653,7 +1683,9 @@ static void worker_enter_idle(struct worker *worker)
  21960. worker->last_active = jiffies;
  21961. /* idle_list is LIFO */
  21962. + rt_lock_idle_list(pool);
  21963. list_add(&worker->entry, &pool->idle_list);
  21964. + rt_unlock_idle_list(pool);
  21965. if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
  21966. mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
  21967. @@ -1686,7 +1718,9 @@ static void worker_leave_idle(struct worker *worker)
  21968. return;
  21969. worker_clr_flags(worker, WORKER_IDLE);
  21970. pool->nr_idle--;
  21971. + rt_lock_idle_list(pool);
  21972. list_del_init(&worker->entry);
  21973. + rt_unlock_idle_list(pool);
  21974. }
  21975. static struct worker *alloc_worker(int node)
  21976. @@ -1852,7 +1886,9 @@ static void destroy_worker(struct worker *worker)
  21977. pool->nr_workers--;
  21978. pool->nr_idle--;
  21979. + rt_lock_idle_list(pool);
  21980. list_del_init(&worker->entry);
  21981. + rt_unlock_idle_list(pool);
  21982. worker->flags |= WORKER_DIE;
  21983. wake_up_process(worker->task);
  21984. }
  21985. @@ -2811,14 +2847,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
  21986. might_sleep();
  21987. - local_irq_disable();
  21988. + rcu_read_lock();
  21989. pool = get_work_pool(work);
  21990. if (!pool) {
  21991. - local_irq_enable();
  21992. + rcu_read_unlock();
  21993. return false;
  21994. }
  21995. - spin_lock(&pool->lock);
  21996. + spin_lock_irq(&pool->lock);
  21997. /* see the comment in try_to_grab_pending() with the same code */
  21998. pwq = get_work_pwq(work);
  21999. if (pwq) {
  22000. @@ -2847,10 +2883,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
  22001. else
  22002. lock_map_acquire_read(&pwq->wq->lockdep_map);
  22003. lock_map_release(&pwq->wq->lockdep_map);
  22004. -
  22005. + rcu_read_unlock();
  22006. return true;
  22007. already_gone:
  22008. spin_unlock_irq(&pool->lock);
  22009. + rcu_read_unlock();
  22010. return false;
  22011. }
  22012. @@ -2937,7 +2974,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
  22013. /* tell other tasks trying to grab @work to back off */
  22014. mark_work_canceling(work);
  22015. - local_irq_restore(flags);
  22016. + local_unlock_irqrestore(pendingb_lock, flags);
  22017. flush_work(work);
  22018. clear_work_data(work);
  22019. @@ -2992,10 +3029,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync);
  22020. */
  22021. bool flush_delayed_work(struct delayed_work *dwork)
  22022. {
  22023. - local_irq_disable();
  22024. + local_lock_irq(pendingb_lock);
  22025. if (del_timer_sync(&dwork->timer))
  22026. __queue_work(dwork->cpu, dwork->wq, &dwork->work);
  22027. - local_irq_enable();
  22028. + local_unlock_irq(pendingb_lock);
  22029. return flush_work(&dwork->work);
  22030. }
  22031. EXPORT_SYMBOL(flush_delayed_work);
  22032. @@ -3030,7 +3067,7 @@ bool cancel_delayed_work(struct delayed_work *dwork)
  22033. set_work_pool_and_clear_pending(&dwork->work,
  22034. get_work_pool_id(&dwork->work));
  22035. - local_irq_restore(flags);
  22036. + local_unlock_irqrestore(pendingb_lock, flags);
  22037. return ret;
  22038. }
  22039. EXPORT_SYMBOL(cancel_delayed_work);
  22040. @@ -3259,7 +3296,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
  22041. * put_unbound_pool - put a worker_pool
  22042. * @pool: worker_pool to put
  22043. *
  22044. - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
  22045. + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
  22046. * safe manner. get_unbound_pool() calls this function on its failure path
  22047. * and this function should be able to release pools which went through,
  22048. * successfully or not, init_worker_pool().
  22049. @@ -3313,8 +3350,8 @@ static void put_unbound_pool(struct worker_pool *pool)
  22050. del_timer_sync(&pool->idle_timer);
  22051. del_timer_sync(&pool->mayday_timer);
  22052. - /* sched-RCU protected to allow dereferences from get_work_pool() */
  22053. - call_rcu_sched(&pool->rcu, rcu_free_pool);
  22054. + /* RCU protected to allow dereferences from get_work_pool() */
  22055. + call_rcu(&pool->rcu, rcu_free_pool);
  22056. }
  22057. /**
  22058. @@ -3421,14 +3458,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
  22059. put_unbound_pool(pool);
  22060. mutex_unlock(&wq_pool_mutex);
  22061. - call_rcu_sched(&pwq->rcu, rcu_free_pwq);
  22062. + call_rcu(&pwq->rcu, rcu_free_pwq);
  22063. /*
  22064. * If we're the last pwq going away, @wq is already dead and no one
  22065. * is gonna access it anymore. Schedule RCU free.
  22066. */
  22067. if (is_last)
  22068. - call_rcu_sched(&wq->rcu, rcu_free_wq);
  22069. + call_rcu(&wq->rcu, rcu_free_wq);
  22070. }
  22071. /**
  22072. @@ -4078,7 +4115,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
  22073. * The base ref is never dropped on per-cpu pwqs. Directly
  22074. * schedule RCU free.
  22075. */
  22076. - call_rcu_sched(&wq->rcu, rcu_free_wq);
  22077. + call_rcu(&wq->rcu, rcu_free_wq);
  22078. } else {
  22079. /*
  22080. * We're the sole accessor of @wq at this point. Directly
  22081. @@ -4171,7 +4208,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
  22082. struct pool_workqueue *pwq;
  22083. bool ret;
  22084. - rcu_read_lock_sched();
  22085. + rcu_read_lock();
  22086. + preempt_disable();
  22087. if (cpu == WORK_CPU_UNBOUND)
  22088. cpu = smp_processor_id();
  22089. @@ -4182,7 +4220,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
  22090. pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
  22091. ret = !list_empty(&pwq->delayed_works);
  22092. - rcu_read_unlock_sched();
  22093. + preempt_enable();
  22094. + rcu_read_unlock();
  22095. return ret;
  22096. }
  22097. @@ -4208,15 +4247,15 @@ unsigned int work_busy(struct work_struct *work)
  22098. if (work_pending(work))
  22099. ret |= WORK_BUSY_PENDING;
  22100. - local_irq_save(flags);
  22101. + rcu_read_lock();
  22102. pool = get_work_pool(work);
  22103. if (pool) {
  22104. - spin_lock(&pool->lock);
  22105. + spin_lock_irqsave(&pool->lock, flags);
  22106. if (find_worker_executing_work(pool, work))
  22107. ret |= WORK_BUSY_RUNNING;
  22108. - spin_unlock(&pool->lock);
  22109. + spin_unlock_irqrestore(&pool->lock, flags);
  22110. }
  22111. - local_irq_restore(flags);
  22112. + rcu_read_unlock();
  22113. return ret;
  22114. }
  22115. @@ -4405,7 +4444,7 @@ void show_workqueue_state(void)
  22116. unsigned long flags;
  22117. int pi;
  22118. - rcu_read_lock_sched();
  22119. + rcu_read_lock();
  22120. pr_info("Showing busy workqueues and worker pools:\n");
  22121. @@ -4458,7 +4497,7 @@ void show_workqueue_state(void)
  22122. spin_unlock_irqrestore(&pool->lock, flags);
  22123. }
  22124. - rcu_read_unlock_sched();
  22125. + rcu_read_unlock();
  22126. }
  22127. /*
  22128. @@ -4819,16 +4858,16 @@ bool freeze_workqueues_busy(void)
  22129. * nr_active is monotonically decreasing. It's safe
  22130. * to peek without lock.
  22131. */
  22132. - rcu_read_lock_sched();
  22133. + rcu_read_lock();
  22134. for_each_pwq(pwq, wq) {
  22135. WARN_ON_ONCE(pwq->nr_active < 0);
  22136. if (pwq->nr_active) {
  22137. busy = true;
  22138. - rcu_read_unlock_sched();
  22139. + rcu_read_unlock();
  22140. goto out_unlock;
  22141. }
  22142. }
  22143. - rcu_read_unlock_sched();
  22144. + rcu_read_unlock();
  22145. }
  22146. out_unlock:
  22147. mutex_unlock(&wq_pool_mutex);
  22148. @@ -5018,7 +5057,8 @@ static ssize_t wq_pool_ids_show(struct device *dev,
  22149. const char *delim = "";
  22150. int node, written = 0;
  22151. - rcu_read_lock_sched();
  22152. + get_online_cpus();
  22153. + rcu_read_lock();
  22154. for_each_node(node) {
  22155. written += scnprintf(buf + written, PAGE_SIZE - written,
  22156. "%s%d:%d", delim, node,
  22157. @@ -5026,7 +5066,8 @@ static ssize_t wq_pool_ids_show(struct device *dev,
  22158. delim = " ";
  22159. }
  22160. written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
  22161. - rcu_read_unlock_sched();
  22162. + rcu_read_unlock();
  22163. + put_online_cpus();
  22164. return written;
  22165. }
  22166. diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
  22167. index 8635417c587b..f000c4d6917e 100644
  22168. --- a/kernel/workqueue_internal.h
  22169. +++ b/kernel/workqueue_internal.h
  22170. @@ -43,6 +43,7 @@ struct worker {
  22171. unsigned long last_active; /* L: last active timestamp */
  22172. unsigned int flags; /* X: flags */
  22173. int id; /* I: worker id */
  22174. + int sleeping; /* None */
  22175. /*
  22176. * Opaque string set with work_set_desc(). Printed out with task
  22177. @@ -68,7 +69,7 @@ static inline struct worker *current_wq_worker(void)
  22178. * Scheduler hooks for concurrency managed workqueue. Only to be used from
  22179. * sched/core.c and workqueue.c.
  22180. */
  22181. -void wq_worker_waking_up(struct task_struct *task, int cpu);
  22182. -struct task_struct *wq_worker_sleeping(struct task_struct *task);
  22183. +void wq_worker_running(struct task_struct *task);
  22184. +void wq_worker_sleeping(struct task_struct *task);
  22185. #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
  22186. diff --git a/lib/Kconfig b/lib/Kconfig
  22187. index 3cca1222578e..b89fc373331f 100644
  22188. --- a/lib/Kconfig
  22189. +++ b/lib/Kconfig
  22190. @@ -397,6 +397,7 @@ config CHECK_SIGNATURE
  22191. config CPUMASK_OFFSTACK
  22192. bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
  22193. + depends on !PREEMPT_RT_FULL
  22194. help
  22195. Use dynamic allocation for cpumask_var_t, instead of putting
  22196. them on the stack. This is a bit more expensive, but avoids
  22197. diff --git a/lib/debugobjects.c b/lib/debugobjects.c
  22198. index 519b5a10fd70..5970701e8f1e 100644
  22199. --- a/lib/debugobjects.c
  22200. +++ b/lib/debugobjects.c
  22201. @@ -309,7 +309,10 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
  22202. struct debug_obj *obj;
  22203. unsigned long flags;
  22204. - fill_pool();
  22205. +#ifdef CONFIG_PREEMPT_RT_FULL
  22206. + if (preempt_count() == 0 && !irqs_disabled())
  22207. +#endif
  22208. + fill_pool();
  22209. db = get_bucket((unsigned long) addr);
  22210. diff --git a/lib/idr.c b/lib/idr.c
  22211. index 6098336df267..9decbe914595 100644
  22212. --- a/lib/idr.c
  22213. +++ b/lib/idr.c
  22214. @@ -30,6 +30,7 @@
  22215. #include <linux/idr.h>
  22216. #include <linux/spinlock.h>
  22217. #include <linux/percpu.h>
  22218. +#include <linux/locallock.h>
  22219. #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1)
  22220. #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
  22221. @@ -45,6 +46,37 @@ static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head);
  22222. static DEFINE_PER_CPU(int, idr_preload_cnt);
  22223. static DEFINE_SPINLOCK(simple_ida_lock);
  22224. +#ifdef CONFIG_PREEMPT_RT_FULL
  22225. +static DEFINE_LOCAL_IRQ_LOCK(idr_lock);
  22226. +
  22227. +static inline void idr_preload_lock(void)
  22228. +{
  22229. + local_lock(idr_lock);
  22230. +}
  22231. +
  22232. +static inline void idr_preload_unlock(void)
  22233. +{
  22234. + local_unlock(idr_lock);
  22235. +}
  22236. +
  22237. +void idr_preload_end(void)
  22238. +{
  22239. + idr_preload_unlock();
  22240. +}
  22241. +EXPORT_SYMBOL(idr_preload_end);
  22242. +#else
  22243. +static inline void idr_preload_lock(void)
  22244. +{
  22245. + preempt_disable();
  22246. +}
  22247. +
  22248. +static inline void idr_preload_unlock(void)
  22249. +{
  22250. + preempt_enable();
  22251. +}
  22252. +#endif
  22253. +
  22254. +
  22255. /* the maximum ID which can be allocated given idr->layers */
  22256. static int idr_max(int layers)
  22257. {
  22258. @@ -115,14 +147,14 @@ static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr)
  22259. * context. See idr_preload() for details.
  22260. */
  22261. if (!in_interrupt()) {
  22262. - preempt_disable();
  22263. + idr_preload_lock();
  22264. new = __this_cpu_read(idr_preload_head);
  22265. if (new) {
  22266. __this_cpu_write(idr_preload_head, new->ary[0]);
  22267. __this_cpu_dec(idr_preload_cnt);
  22268. new->ary[0] = NULL;
  22269. }
  22270. - preempt_enable();
  22271. + idr_preload_unlock();
  22272. if (new)
  22273. return new;
  22274. }
  22275. @@ -366,7 +398,6 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id,
  22276. idr_mark_full(pa, id);
  22277. }
  22278. -
  22279. /**
  22280. * idr_preload - preload for idr_alloc()
  22281. * @gfp_mask: allocation mask to use for preloading
  22282. @@ -401,7 +432,7 @@ void idr_preload(gfp_t gfp_mask)
  22283. WARN_ON_ONCE(in_interrupt());
  22284. might_sleep_if(gfpflags_allow_blocking(gfp_mask));
  22285. - preempt_disable();
  22286. + idr_preload_lock();
  22287. /*
  22288. * idr_alloc() is likely to succeed w/o full idr_layer buffer and
  22289. @@ -413,9 +444,9 @@ void idr_preload(gfp_t gfp_mask)
  22290. while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
  22291. struct idr_layer *new;
  22292. - preempt_enable();
  22293. + idr_preload_unlock();
  22294. new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
  22295. - preempt_disable();
  22296. + idr_preload_lock();
  22297. if (!new)
  22298. break;
  22299. diff --git a/lib/irq_poll.c b/lib/irq_poll.c
  22300. index 836f7db4e548..709d4eed1df9 100644
  22301. --- a/lib/irq_poll.c
  22302. +++ b/lib/irq_poll.c
  22303. @@ -36,6 +36,7 @@ void irq_poll_sched(struct irq_poll *iop)
  22304. list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
  22305. __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
  22306. local_irq_restore(flags);
  22307. + preempt_check_resched_rt();
  22308. }
  22309. EXPORT_SYMBOL(irq_poll_sched);
  22310. @@ -71,6 +72,7 @@ void irq_poll_complete(struct irq_poll *iop)
  22311. local_irq_save(flags);
  22312. __irq_poll_complete(iop);
  22313. local_irq_restore(flags);
  22314. + preempt_check_resched_rt();
  22315. }
  22316. EXPORT_SYMBOL(irq_poll_complete);
  22317. @@ -95,6 +97,7 @@ static void irq_poll_softirq(struct softirq_action *h)
  22318. }
  22319. local_irq_enable();
  22320. + preempt_check_resched_rt();
  22321. /* Even though interrupts have been re-enabled, this
  22322. * access is safe because interrupts can only add new
  22323. @@ -132,6 +135,7 @@ static void irq_poll_softirq(struct softirq_action *h)
  22324. __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
  22325. local_irq_enable();
  22326. + preempt_check_resched_rt();
  22327. }
  22328. /**
  22329. @@ -199,6 +203,7 @@ static int irq_poll_cpu_notify(struct notifier_block *self,
  22330. this_cpu_ptr(&blk_cpu_iopoll));
  22331. __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
  22332. local_irq_enable();
  22333. + preempt_check_resched_rt();
  22334. }
  22335. return NOTIFY_OK;
  22336. diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
  22337. index 872a15a2a637..b93a6103fa4d 100644
  22338. --- a/lib/locking-selftest.c
  22339. +++ b/lib/locking-selftest.c
  22340. @@ -590,6 +590,8 @@ GENERATE_TESTCASE(init_held_rsem)
  22341. #include "locking-selftest-spin-hardirq.h"
  22342. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
  22343. +#ifndef CONFIG_PREEMPT_RT_FULL
  22344. +
  22345. #include "locking-selftest-rlock-hardirq.h"
  22346. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
  22347. @@ -605,9 +607,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock)
  22348. #include "locking-selftest-wlock-softirq.h"
  22349. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
  22350. +#endif
  22351. +
  22352. #undef E1
  22353. #undef E2
  22354. +#ifndef CONFIG_PREEMPT_RT_FULL
  22355. /*
  22356. * Enabling hardirqs with a softirq-safe lock held:
  22357. */
  22358. @@ -640,6 +645,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
  22359. #undef E1
  22360. #undef E2
  22361. +#endif
  22362. +
  22363. /*
  22364. * Enabling irqs with an irq-safe lock held:
  22365. */
  22366. @@ -663,6 +670,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
  22367. #include "locking-selftest-spin-hardirq.h"
  22368. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
  22369. +#ifndef CONFIG_PREEMPT_RT_FULL
  22370. +
  22371. #include "locking-selftest-rlock-hardirq.h"
  22372. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
  22373. @@ -678,6 +687,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock)
  22374. #include "locking-selftest-wlock-softirq.h"
  22375. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
  22376. +#endif
  22377. +
  22378. #undef E1
  22379. #undef E2
  22380. @@ -709,6 +720,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
  22381. #include "locking-selftest-spin-hardirq.h"
  22382. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
  22383. +#ifndef CONFIG_PREEMPT_RT_FULL
  22384. +
  22385. #include "locking-selftest-rlock-hardirq.h"
  22386. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
  22387. @@ -724,6 +737,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock)
  22388. #include "locking-selftest-wlock-softirq.h"
  22389. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
  22390. +#endif
  22391. +
  22392. #undef E1
  22393. #undef E2
  22394. #undef E3
  22395. @@ -757,6 +772,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
  22396. #include "locking-selftest-spin-hardirq.h"
  22397. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
  22398. +#ifndef CONFIG_PREEMPT_RT_FULL
  22399. +
  22400. #include "locking-selftest-rlock-hardirq.h"
  22401. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
  22402. @@ -772,10 +789,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock)
  22403. #include "locking-selftest-wlock-softirq.h"
  22404. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
  22405. +#endif
  22406. +
  22407. #undef E1
  22408. #undef E2
  22409. #undef E3
  22410. +#ifndef CONFIG_PREEMPT_RT_FULL
  22411. +
  22412. /*
  22413. * read-lock / write-lock irq inversion.
  22414. *
  22415. @@ -838,6 +859,10 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
  22416. #undef E2
  22417. #undef E3
  22418. +#endif
  22419. +
  22420. +#ifndef CONFIG_PREEMPT_RT_FULL
  22421. +
  22422. /*
  22423. * read-lock / write-lock recursion that is actually safe.
  22424. */
  22425. @@ -876,6 +901,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
  22426. #undef E2
  22427. #undef E3
  22428. +#endif
  22429. +
  22430. /*
  22431. * read-lock / write-lock recursion that is unsafe.
  22432. */
  22433. @@ -1858,6 +1885,7 @@ void locking_selftest(void)
  22434. printk(" --------------------------------------------------------------------------\n");
  22435. +#ifndef CONFIG_PREEMPT_RT_FULL
  22436. /*
  22437. * irq-context testcases:
  22438. */
  22439. @@ -1870,6 +1898,28 @@ void locking_selftest(void)
  22440. DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
  22441. // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
  22442. +#else
  22443. + /* On -rt, we only do hardirq context test for raw spinlock */
  22444. + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12);
  22445. + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21);
  22446. +
  22447. + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12);
  22448. + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21);
  22449. +
  22450. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123);
  22451. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132);
  22452. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213);
  22453. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231);
  22454. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312);
  22455. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321);
  22456. +
  22457. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123);
  22458. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132);
  22459. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213);
  22460. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231);
  22461. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312);
  22462. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321);
  22463. +#endif
  22464. ww_tests();
  22465. diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
  22466. index 6d40944960de..822a2c027e72 100644
  22467. --- a/lib/percpu_ida.c
  22468. +++ b/lib/percpu_ida.c
  22469. @@ -26,6 +26,9 @@
  22470. #include <linux/string.h>
  22471. #include <linux/spinlock.h>
  22472. #include <linux/percpu_ida.h>
  22473. +#include <linux/locallock.h>
  22474. +
  22475. +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock);
  22476. struct percpu_ida_cpu {
  22477. /*
  22478. @@ -148,13 +151,13 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
  22479. unsigned long flags;
  22480. int tag;
  22481. - local_irq_save(flags);
  22482. + local_lock_irqsave(irq_off_lock, flags);
  22483. tags = this_cpu_ptr(pool->tag_cpu);
  22484. /* Fastpath */
  22485. tag = alloc_local_tag(tags);
  22486. if (likely(tag >= 0)) {
  22487. - local_irq_restore(flags);
  22488. + local_unlock_irqrestore(irq_off_lock, flags);
  22489. return tag;
  22490. }
  22491. @@ -173,6 +176,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
  22492. if (!tags->nr_free)
  22493. alloc_global_tags(pool, tags);
  22494. +
  22495. if (!tags->nr_free)
  22496. steal_tags(pool, tags);
  22497. @@ -184,7 +188,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
  22498. }
  22499. spin_unlock(&pool->lock);
  22500. - local_irq_restore(flags);
  22501. + local_unlock_irqrestore(irq_off_lock, flags);
  22502. if (tag >= 0 || state == TASK_RUNNING)
  22503. break;
  22504. @@ -196,7 +200,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
  22505. schedule();
  22506. - local_irq_save(flags);
  22507. + local_lock_irqsave(irq_off_lock, flags);
  22508. tags = this_cpu_ptr(pool->tag_cpu);
  22509. }
  22510. if (state != TASK_RUNNING)
  22511. @@ -221,7 +225,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
  22512. BUG_ON(tag >= pool->nr_tags);
  22513. - local_irq_save(flags);
  22514. + local_lock_irqsave(irq_off_lock, flags);
  22515. tags = this_cpu_ptr(pool->tag_cpu);
  22516. spin_lock(&tags->lock);
  22517. @@ -253,7 +257,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
  22518. spin_unlock(&pool->lock);
  22519. }
  22520. - local_irq_restore(flags);
  22521. + local_unlock_irqrestore(irq_off_lock, flags);
  22522. }
  22523. EXPORT_SYMBOL_GPL(percpu_ida_free);
  22524. @@ -345,7 +349,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
  22525. struct percpu_ida_cpu *remote;
  22526. unsigned cpu, i, err = 0;
  22527. - local_irq_save(flags);
  22528. + local_lock_irqsave(irq_off_lock, flags);
  22529. for_each_possible_cpu(cpu) {
  22530. remote = per_cpu_ptr(pool->tag_cpu, cpu);
  22531. spin_lock(&remote->lock);
  22532. @@ -367,7 +371,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
  22533. }
  22534. spin_unlock(&pool->lock);
  22535. out:
  22536. - local_irq_restore(flags);
  22537. + local_unlock_irqrestore(irq_off_lock, flags);
  22538. return err;
  22539. }
  22540. EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
  22541. diff --git a/lib/radix-tree.c b/lib/radix-tree.c
  22542. index 1624c4117961..5f53e83b2859 100644
  22543. --- a/lib/radix-tree.c
  22544. +++ b/lib/radix-tree.c
  22545. @@ -240,13 +240,14 @@ radix_tree_node_alloc(struct radix_tree_root *root)
  22546. * succeed in getting a node here (and never reach
  22547. * kmem_cache_alloc)
  22548. */
  22549. - rtp = this_cpu_ptr(&radix_tree_preloads);
  22550. + rtp = &get_cpu_var(radix_tree_preloads);
  22551. if (rtp->nr) {
  22552. ret = rtp->nodes;
  22553. rtp->nodes = ret->private_data;
  22554. ret->private_data = NULL;
  22555. rtp->nr--;
  22556. }
  22557. + put_cpu_var(radix_tree_preloads);
  22558. /*
  22559. * Update the allocation stack trace as this is more useful
  22560. * for debugging.
  22561. @@ -287,6 +288,7 @@ radix_tree_node_free(struct radix_tree_node *node)
  22562. call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
  22563. }
  22564. +#ifndef CONFIG_PREEMPT_RT_FULL
  22565. /*
  22566. * Load up this CPU's radix_tree_node buffer with sufficient objects to
  22567. * ensure that the addition of a single element in the tree cannot fail. On
  22568. @@ -355,6 +357,7 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
  22569. return 0;
  22570. }
  22571. EXPORT_SYMBOL(radix_tree_maybe_preload);
  22572. +#endif
  22573. /*
  22574. * Return the maximum key which can be store into a
  22575. diff --git a/lib/random32.c b/lib/random32.c
  22576. index 510d1ce7d4d2..69ed593aab07 100644
  22577. --- a/lib/random32.c
  22578. +++ b/lib/random32.c
  22579. @@ -233,7 +233,6 @@ static void __prandom_timer(unsigned long dontcare)
  22580. static void __init __prandom_start_seed_timer(void)
  22581. {
  22582. - set_timer_slack(&seed_timer, HZ);
  22583. seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC);
  22584. add_timer(&seed_timer);
  22585. }
  22586. diff --git a/lib/rbtree.c b/lib/rbtree.c
  22587. index 1356454e36de..d15d6c4327f1 100644
  22588. --- a/lib/rbtree.c
  22589. +++ b/lib/rbtree.c
  22590. @@ -23,6 +23,7 @@
  22591. #include <linux/rbtree_augmented.h>
  22592. #include <linux/export.h>
  22593. +#include <linux/rcupdate.h>
  22594. /*
  22595. * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree
  22596. @@ -590,3 +591,13 @@ struct rb_node *rb_first_postorder(const struct rb_root *root)
  22597. return rb_left_deepest_node(root->rb_node);
  22598. }
  22599. EXPORT_SYMBOL(rb_first_postorder);
  22600. +
  22601. +void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
  22602. + struct rb_node **rb_link)
  22603. +{
  22604. + node->__rb_parent_color = (unsigned long)parent;
  22605. + node->rb_left = node->rb_right = NULL;
  22606. +
  22607. + rcu_assign_pointer(*rb_link, node);
  22608. +}
  22609. +EXPORT_SYMBOL(rb_link_node_rcu);
  22610. diff --git a/lib/scatterlist.c b/lib/scatterlist.c
  22611. index 004fc70fc56a..ccc46992a517 100644
  22612. --- a/lib/scatterlist.c
  22613. +++ b/lib/scatterlist.c
  22614. @@ -620,7 +620,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
  22615. flush_kernel_dcache_page(miter->page);
  22616. if (miter->__flags & SG_MITER_ATOMIC) {
  22617. - WARN_ON_ONCE(preemptible());
  22618. + WARN_ON_ONCE(!pagefault_disabled());
  22619. kunmap_atomic(miter->addr);
  22620. } else
  22621. kunmap(miter->page);
  22622. @@ -664,7 +664,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
  22623. if (!sg_miter_skip(&miter, skip))
  22624. return false;
  22625. - local_irq_save(flags);
  22626. + local_irq_save_nort(flags);
  22627. while (sg_miter_next(&miter) && offset < buflen) {
  22628. unsigned int len;
  22629. @@ -681,7 +681,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
  22630. sg_miter_stop(&miter);
  22631. - local_irq_restore(flags);
  22632. + local_irq_restore_nort(flags);
  22633. return offset;
  22634. }
  22635. EXPORT_SYMBOL(sg_copy_buffer);
  22636. diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
  22637. index 1afec32de6f2..11fa431046a8 100644
  22638. --- a/lib/smp_processor_id.c
  22639. +++ b/lib/smp_processor_id.c
  22640. @@ -39,8 +39,9 @@ notrace static unsigned int check_preemption_disabled(const char *what1,
  22641. if (!printk_ratelimit())
  22642. goto out_enable;
  22643. - printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n",
  22644. - what1, what2, preempt_count() - 1, current->comm, current->pid);
  22645. + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x %08x] code: %s/%d\n",
  22646. + what1, what2, preempt_count() - 1, __migrate_disabled(current),
  22647. + current->comm, current->pid);
  22648. print_symbol("caller is %s\n", (long)__builtin_return_address(0));
  22649. dump_stack();
  22650. diff --git a/mm/Kconfig b/mm/Kconfig
  22651. index 989f8f3d77e0..1df53d6c7ec5 100644
  22652. --- a/mm/Kconfig
  22653. +++ b/mm/Kconfig
  22654. @@ -391,7 +391,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
  22655. config TRANSPARENT_HUGEPAGE
  22656. bool "Transparent Hugepage Support"
  22657. - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
  22658. + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
  22659. select COMPACTION
  22660. help
  22661. Transparent Hugepages allows the kernel to use huge pages and
  22662. diff --git a/mm/backing-dev.c b/mm/backing-dev.c
  22663. index 0c6317b7db38..1e6ab5fbadb4 100644
  22664. --- a/mm/backing-dev.c
  22665. +++ b/mm/backing-dev.c
  22666. @@ -457,9 +457,9 @@ void wb_congested_put(struct bdi_writeback_congested *congested)
  22667. {
  22668. unsigned long flags;
  22669. - local_irq_save(flags);
  22670. + local_irq_save_nort(flags);
  22671. if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
  22672. - local_irq_restore(flags);
  22673. + local_irq_restore_nort(flags);
  22674. return;
  22675. }
  22676. diff --git a/mm/compaction.c b/mm/compaction.c
  22677. index f8e925eb479b..14cfc4714e57 100644
  22678. --- a/mm/compaction.c
  22679. +++ b/mm/compaction.c
  22680. @@ -1414,10 +1414,12 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
  22681. cc->migrate_pfn & ~((1UL << cc->order) - 1);
  22682. if (cc->last_migrated_pfn < current_block_start) {
  22683. - cpu = get_cpu();
  22684. + cpu = get_cpu_light();
  22685. + local_lock_irq(swapvec_lock);
  22686. lru_add_drain_cpu(cpu);
  22687. + local_unlock_irq(swapvec_lock);
  22688. drain_local_pages(zone);
  22689. - put_cpu();
  22690. + put_cpu_light();
  22691. /* No more flushing until we migrate again */
  22692. cc->last_migrated_pfn = 0;
  22693. }
  22694. diff --git a/mm/filemap.c b/mm/filemap.c
  22695. index f2479af09da9..a87b65c2e402 100644
  22696. --- a/mm/filemap.c
  22697. +++ b/mm/filemap.c
  22698. @@ -169,7 +169,9 @@ static void page_cache_tree_delete(struct address_space *mapping,
  22699. if (!workingset_node_pages(node) &&
  22700. list_empty(&node->private_list)) {
  22701. node->private_data = mapping;
  22702. - list_lru_add(&workingset_shadow_nodes, &node->private_list);
  22703. + local_lock(workingset_shadow_lock);
  22704. + list_lru_add(&__workingset_shadow_nodes, &node->private_list);
  22705. + local_unlock(workingset_shadow_lock);
  22706. }
  22707. }
  22708. @@ -618,9 +620,12 @@ static int page_cache_tree_insert(struct address_space *mapping,
  22709. * node->private_list is protected by
  22710. * mapping->tree_lock.
  22711. */
  22712. - if (!list_empty(&node->private_list))
  22713. - list_lru_del(&workingset_shadow_nodes,
  22714. + if (!list_empty(&node->private_list)) {
  22715. + local_lock(workingset_shadow_lock);
  22716. + list_lru_del(&__workingset_shadow_nodes,
  22717. &node->private_list);
  22718. + local_unlock(workingset_shadow_lock);
  22719. + }
  22720. }
  22721. return 0;
  22722. }
  22723. diff --git a/mm/highmem.c b/mm/highmem.c
  22724. index 123bcd3ed4f2..16e8cf26d38a 100644
  22725. --- a/mm/highmem.c
  22726. +++ b/mm/highmem.c
  22727. @@ -29,10 +29,11 @@
  22728. #include <linux/kgdb.h>
  22729. #include <asm/tlbflush.h>
  22730. -
  22731. +#ifndef CONFIG_PREEMPT_RT_FULL
  22732. #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
  22733. DEFINE_PER_CPU(int, __kmap_atomic_idx);
  22734. #endif
  22735. +#endif
  22736. /*
  22737. * Virtual_count is not a pure "count".
  22738. @@ -107,8 +108,9 @@ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color)
  22739. unsigned long totalhigh_pages __read_mostly;
  22740. EXPORT_SYMBOL(totalhigh_pages);
  22741. -
  22742. +#ifndef CONFIG_PREEMPT_RT_FULL
  22743. EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
  22744. +#endif
  22745. unsigned int nr_free_highpages (void)
  22746. {
  22747. diff --git a/mm/memcontrol.c b/mm/memcontrol.c
  22748. index a2e79b83920f..9764ec471f4d 100644
  22749. --- a/mm/memcontrol.c
  22750. +++ b/mm/memcontrol.c
  22751. @@ -67,6 +67,7 @@
  22752. #include <net/sock.h>
  22753. #include <net/ip.h>
  22754. #include "slab.h"
  22755. +#include <linux/locallock.h>
  22756. #include <asm/uaccess.h>
  22757. @@ -92,6 +93,8 @@ int do_swap_account __read_mostly;
  22758. #define do_swap_account 0
  22759. #endif
  22760. +static DEFINE_LOCAL_IRQ_LOCK(event_lock);
  22761. +
  22762. /* Whether legacy memory+swap accounting is active */
  22763. static bool do_memsw_account(void)
  22764. {
  22765. @@ -1825,14 +1828,17 @@ static void drain_local_stock(struct work_struct *dummy)
  22766. */
  22767. static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
  22768. {
  22769. - struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
  22770. + struct memcg_stock_pcp *stock;
  22771. + int cpu = get_cpu_light();
  22772. +
  22773. + stock = &per_cpu(memcg_stock, cpu);
  22774. if (stock->cached != memcg) { /* reset if necessary */
  22775. drain_stock(stock);
  22776. stock->cached = memcg;
  22777. }
  22778. stock->nr_pages += nr_pages;
  22779. - put_cpu_var(memcg_stock);
  22780. + put_cpu_light();
  22781. }
  22782. /*
  22783. @@ -1848,7 +1854,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
  22784. return;
  22785. /* Notify other cpus that system-wide "drain" is running */
  22786. get_online_cpus();
  22787. - curcpu = get_cpu();
  22788. + curcpu = get_cpu_light();
  22789. for_each_online_cpu(cpu) {
  22790. struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
  22791. struct mem_cgroup *memcg;
  22792. @@ -1865,7 +1871,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
  22793. schedule_work_on(cpu, &stock->work);
  22794. }
  22795. }
  22796. - put_cpu();
  22797. + put_cpu_light();
  22798. put_online_cpus();
  22799. mutex_unlock(&percpu_charge_mutex);
  22800. }
  22801. @@ -4487,12 +4493,12 @@ static int mem_cgroup_move_account(struct page *page,
  22802. ret = 0;
  22803. - local_irq_disable();
  22804. + local_lock_irq(event_lock);
  22805. mem_cgroup_charge_statistics(to, page, compound, nr_pages);
  22806. memcg_check_events(to, page);
  22807. mem_cgroup_charge_statistics(from, page, compound, -nr_pages);
  22808. memcg_check_events(from, page);
  22809. - local_irq_enable();
  22810. + local_unlock_irq(event_lock);
  22811. out_unlock:
  22812. unlock_page(page);
  22813. out:
  22814. @@ -5342,10 +5348,10 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
  22815. commit_charge(page, memcg, lrucare);
  22816. - local_irq_disable();
  22817. + local_lock_irq(event_lock);
  22818. mem_cgroup_charge_statistics(memcg, page, compound, nr_pages);
  22819. memcg_check_events(memcg, page);
  22820. - local_irq_enable();
  22821. + local_unlock_irq(event_lock);
  22822. if (do_memsw_account() && PageSwapCache(page)) {
  22823. swp_entry_t entry = { .val = page_private(page) };
  22824. @@ -5397,14 +5403,14 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
  22825. memcg_oom_recover(memcg);
  22826. }
  22827. - local_irq_save(flags);
  22828. + local_lock_irqsave(event_lock, flags);
  22829. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
  22830. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
  22831. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
  22832. __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
  22833. __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
  22834. memcg_check_events(memcg, dummy_page);
  22835. - local_irq_restore(flags);
  22836. + local_unlock_irqrestore(event_lock, flags);
  22837. if (!mem_cgroup_is_root(memcg))
  22838. css_put_many(&memcg->css, nr_pages);
  22839. @@ -5554,10 +5560,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
  22840. commit_charge(newpage, memcg, false);
  22841. - local_irq_disable();
  22842. + local_lock_irq(event_lock);
  22843. mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages);
  22844. memcg_check_events(memcg, newpage);
  22845. - local_irq_enable();
  22846. + local_unlock_irq(event_lock);
  22847. }
  22848. DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
  22849. @@ -5722,6 +5728,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
  22850. {
  22851. struct mem_cgroup *memcg;
  22852. unsigned short oldid;
  22853. + unsigned long flags;
  22854. VM_BUG_ON_PAGE(PageLRU(page), page);
  22855. VM_BUG_ON_PAGE(page_count(page), page);
  22856. @@ -5750,9 +5757,13 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
  22857. * important here to have the interrupts disabled because it is the
  22858. * only synchronisation we have for udpating the per-CPU variables.
  22859. */
  22860. + local_lock_irqsave(event_lock, flags);
  22861. +#ifndef CONFIG_PREEMPT_RT_BASE
  22862. VM_BUG_ON(!irqs_disabled());
  22863. +#endif
  22864. mem_cgroup_charge_statistics(memcg, page, false, -1);
  22865. memcg_check_events(memcg, page);
  22866. + local_unlock_irqrestore(event_lock, flags);
  22867. }
  22868. /*
  22869. diff --git a/mm/mmu_context.c b/mm/mmu_context.c
  22870. index f802c2d216a7..b1b6f238e42d 100644
  22871. --- a/mm/mmu_context.c
  22872. +++ b/mm/mmu_context.c
  22873. @@ -23,6 +23,7 @@ void use_mm(struct mm_struct *mm)
  22874. struct task_struct *tsk = current;
  22875. task_lock(tsk);
  22876. + preempt_disable_rt();
  22877. active_mm = tsk->active_mm;
  22878. if (active_mm != mm) {
  22879. atomic_inc(&mm->mm_count);
  22880. @@ -30,6 +31,7 @@ void use_mm(struct mm_struct *mm)
  22881. }
  22882. tsk->mm = mm;
  22883. switch_mm(active_mm, mm, tsk);
  22884. + preempt_enable_rt();
  22885. task_unlock(tsk);
  22886. #ifdef finish_arch_post_lock_switch
  22887. finish_arch_post_lock_switch();
  22888. diff --git a/mm/page_alloc.c b/mm/page_alloc.c
  22889. index 898fe3f909f9..301140302ff8 100644
  22890. --- a/mm/page_alloc.c
  22891. +++ b/mm/page_alloc.c
  22892. @@ -61,6 +61,7 @@
  22893. #include <linux/page_ext.h>
  22894. #include <linux/hugetlb.h>
  22895. #include <linux/sched/rt.h>
  22896. +#include <linux/locallock.h>
  22897. #include <linux/page_owner.h>
  22898. #include <linux/kthread.h>
  22899. @@ -275,6 +276,18 @@ EXPORT_SYMBOL(nr_node_ids);
  22900. EXPORT_SYMBOL(nr_online_nodes);
  22901. #endif
  22902. +static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
  22903. +
  22904. +#ifdef CONFIG_PREEMPT_RT_BASE
  22905. +# define cpu_lock_irqsave(cpu, flags) \
  22906. + local_lock_irqsave_on(pa_lock, flags, cpu)
  22907. +# define cpu_unlock_irqrestore(cpu, flags) \
  22908. + local_unlock_irqrestore_on(pa_lock, flags, cpu)
  22909. +#else
  22910. +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
  22911. +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
  22912. +#endif
  22913. +
  22914. int page_group_by_mobility_disabled __read_mostly;
  22915. #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
  22916. @@ -814,7 +827,7 @@ static inline int free_pages_check(struct page *page)
  22917. }
  22918. /*
  22919. - * Frees a number of pages from the PCP lists
  22920. + * Frees a number of pages which have been collected from the pcp lists.
  22921. * Assumes all pages on list are in same zone, and of same order.
  22922. * count is the number of pages to free.
  22923. *
  22924. @@ -825,18 +838,53 @@ static inline int free_pages_check(struct page *page)
  22925. * pinned" detection logic.
  22926. */
  22927. static void free_pcppages_bulk(struct zone *zone, int count,
  22928. - struct per_cpu_pages *pcp)
  22929. + struct list_head *list)
  22930. {
  22931. - int migratetype = 0;
  22932. - int batch_free = 0;
  22933. int to_free = count;
  22934. unsigned long nr_scanned;
  22935. + unsigned long flags;
  22936. +
  22937. + spin_lock_irqsave(&zone->lock, flags);
  22938. - spin_lock(&zone->lock);
  22939. nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
  22940. if (nr_scanned)
  22941. __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
  22942. + while (!list_empty(list)) {
  22943. + struct page *page = list_first_entry(list, struct page, lru);
  22944. + int mt; /* migratetype of the to-be-freed page */
  22945. +
  22946. + /* must delete as __free_one_page list manipulates */
  22947. + list_del(&page->lru);
  22948. +
  22949. + mt = get_pcppage_migratetype(page);
  22950. + /* MIGRATE_ISOLATE page should not go to pcplists */
  22951. + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
  22952. + /* Pageblock could have been isolated meanwhile */
  22953. + if (unlikely(has_isolate_pageblock(zone)))
  22954. + mt = get_pageblock_migratetype(page);
  22955. +
  22956. + __free_one_page(page, page_to_pfn(page), zone, 0, mt);
  22957. + trace_mm_page_pcpu_drain(page, 0, mt);
  22958. + to_free--;
  22959. + }
  22960. + WARN_ON(to_free != 0);
  22961. + spin_unlock_irqrestore(&zone->lock, flags);
  22962. +}
  22963. +
  22964. +/*
  22965. + * Moves a number of pages from the PCP lists to free list which
  22966. + * is freed outside of the locked region.
  22967. + *
  22968. + * Assumes all pages on list are in same zone, and of same order.
  22969. + * count is the number of pages to free.
  22970. + */
  22971. +static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src,
  22972. + struct list_head *dst)
  22973. +{
  22974. + int migratetype = 0;
  22975. + int batch_free = 0;
  22976. +
  22977. while (to_free) {
  22978. struct page *page;
  22979. struct list_head *list;
  22980. @@ -852,7 +900,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
  22981. batch_free++;
  22982. if (++migratetype == MIGRATE_PCPTYPES)
  22983. migratetype = 0;
  22984. - list = &pcp->lists[migratetype];
  22985. + list = &src->lists[migratetype];
  22986. } while (list_empty(list));
  22987. /* This is the only non-empty list. Free them all. */
  22988. @@ -860,24 +908,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
  22989. batch_free = to_free;
  22990. do {
  22991. - int mt; /* migratetype of the to-be-freed page */
  22992. -
  22993. page = list_last_entry(list, struct page, lru);
  22994. - /* must delete as __free_one_page list manipulates */
  22995. list_del(&page->lru);
  22996. - mt = get_pcppage_migratetype(page);
  22997. - /* MIGRATE_ISOLATE page should not go to pcplists */
  22998. - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
  22999. - /* Pageblock could have been isolated meanwhile */
  23000. - if (unlikely(has_isolate_pageblock(zone)))
  23001. - mt = get_pageblock_migratetype(page);
  23002. -
  23003. - __free_one_page(page, page_to_pfn(page), zone, 0, mt);
  23004. - trace_mm_page_pcpu_drain(page, 0, mt);
  23005. + list_add(&page->lru, dst);
  23006. } while (--to_free && --batch_free && !list_empty(list));
  23007. }
  23008. - spin_unlock(&zone->lock);
  23009. }
  23010. static void free_one_page(struct zone *zone,
  23011. @@ -886,7 +922,9 @@ static void free_one_page(struct zone *zone,
  23012. int migratetype)
  23013. {
  23014. unsigned long nr_scanned;
  23015. - spin_lock(&zone->lock);
  23016. + unsigned long flags;
  23017. +
  23018. + spin_lock_irqsave(&zone->lock, flags);
  23019. nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
  23020. if (nr_scanned)
  23021. __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
  23022. @@ -896,7 +934,7 @@ static void free_one_page(struct zone *zone,
  23023. migratetype = get_pfnblock_migratetype(page, pfn);
  23024. }
  23025. __free_one_page(page, pfn, zone, order, migratetype);
  23026. - spin_unlock(&zone->lock);
  23027. + spin_unlock_irqrestore(&zone->lock, flags);
  23028. }
  23029. static int free_tail_pages_check(struct page *head_page, struct page *page)
  23030. @@ -1070,10 +1108,10 @@ static void __free_pages_ok(struct page *page, unsigned int order)
  23031. return;
  23032. migratetype = get_pfnblock_migratetype(page, pfn);
  23033. - local_irq_save(flags);
  23034. + local_lock_irqsave(pa_lock, flags);
  23035. __count_vm_events(PGFREE, 1 << order);
  23036. free_one_page(page_zone(page), page, pfn, order, migratetype);
  23037. - local_irq_restore(flags);
  23038. + local_unlock_irqrestore(pa_lock, flags);
  23039. }
  23040. static void __init __free_pages_boot_core(struct page *page,
  23041. @@ -2015,16 +2053,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
  23042. void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
  23043. {
  23044. unsigned long flags;
  23045. + LIST_HEAD(dst);
  23046. int to_drain, batch;
  23047. - local_irq_save(flags);
  23048. + local_lock_irqsave(pa_lock, flags);
  23049. batch = READ_ONCE(pcp->batch);
  23050. to_drain = min(pcp->count, batch);
  23051. if (to_drain > 0) {
  23052. - free_pcppages_bulk(zone, to_drain, pcp);
  23053. + isolate_pcp_pages(to_drain, pcp, &dst);
  23054. pcp->count -= to_drain;
  23055. }
  23056. - local_irq_restore(flags);
  23057. + local_unlock_irqrestore(pa_lock, flags);
  23058. + free_pcppages_bulk(zone, to_drain, &dst);
  23059. }
  23060. #endif
  23061. @@ -2040,16 +2080,21 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
  23062. unsigned long flags;
  23063. struct per_cpu_pageset *pset;
  23064. struct per_cpu_pages *pcp;
  23065. + LIST_HEAD(dst);
  23066. + int count;
  23067. - local_irq_save(flags);
  23068. + cpu_lock_irqsave(cpu, flags);
  23069. pset = per_cpu_ptr(zone->pageset, cpu);
  23070. pcp = &pset->pcp;
  23071. - if (pcp->count) {
  23072. - free_pcppages_bulk(zone, pcp->count, pcp);
  23073. + count = pcp->count;
  23074. + if (count) {
  23075. + isolate_pcp_pages(count, pcp, &dst);
  23076. pcp->count = 0;
  23077. }
  23078. - local_irq_restore(flags);
  23079. + cpu_unlock_irqrestore(cpu, flags);
  23080. + if (count)
  23081. + free_pcppages_bulk(zone, count, &dst);
  23082. }
  23083. /*
  23084. @@ -2135,8 +2180,17 @@ void drain_all_pages(struct zone *zone)
  23085. else
  23086. cpumask_clear_cpu(cpu, &cpus_with_pcps);
  23087. }
  23088. +#ifndef CONFIG_PREEMPT_RT_BASE
  23089. on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
  23090. zone, 1);
  23091. +#else
  23092. + for_each_cpu(cpu, &cpus_with_pcps) {
  23093. + if (zone)
  23094. + drain_pages_zone(cpu, zone);
  23095. + else
  23096. + drain_pages(cpu);
  23097. + }
  23098. +#endif
  23099. }
  23100. #ifdef CONFIG_HIBERNATION
  23101. @@ -2192,7 +2246,7 @@ void free_hot_cold_page(struct page *page, bool cold)
  23102. migratetype = get_pfnblock_migratetype(page, pfn);
  23103. set_pcppage_migratetype(page, migratetype);
  23104. - local_irq_save(flags);
  23105. + local_lock_irqsave(pa_lock, flags);
  23106. __count_vm_event(PGFREE);
  23107. /*
  23108. @@ -2218,12 +2272,17 @@ void free_hot_cold_page(struct page *page, bool cold)
  23109. pcp->count++;
  23110. if (pcp->count >= pcp->high) {
  23111. unsigned long batch = READ_ONCE(pcp->batch);
  23112. - free_pcppages_bulk(zone, batch, pcp);
  23113. + LIST_HEAD(dst);
  23114. +
  23115. + isolate_pcp_pages(batch, pcp, &dst);
  23116. pcp->count -= batch;
  23117. + local_unlock_irqrestore(pa_lock, flags);
  23118. + free_pcppages_bulk(zone, batch, &dst);
  23119. + return;
  23120. }
  23121. out:
  23122. - local_irq_restore(flags);
  23123. + local_unlock_irqrestore(pa_lock, flags);
  23124. }
  23125. /*
  23126. @@ -2358,7 +2417,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
  23127. struct per_cpu_pages *pcp;
  23128. struct list_head *list;
  23129. - local_irq_save(flags);
  23130. + local_lock_irqsave(pa_lock, flags);
  23131. pcp = &this_cpu_ptr(zone->pageset)->pcp;
  23132. list = &pcp->lists[migratetype];
  23133. if (list_empty(list)) {
  23134. @@ -2382,7 +2441,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
  23135. * allocate greater than order-1 page units with __GFP_NOFAIL.
  23136. */
  23137. WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
  23138. - spin_lock_irqsave(&zone->lock, flags);
  23139. + local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
  23140. page = NULL;
  23141. if (alloc_flags & ALLOC_HARDER) {
  23142. @@ -2392,11 +2451,13 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
  23143. }
  23144. if (!page)
  23145. page = __rmqueue(zone, order, migratetype);
  23146. - spin_unlock(&zone->lock);
  23147. - if (!page)
  23148. + if (!page) {
  23149. + spin_unlock(&zone->lock);
  23150. goto failed;
  23151. + }
  23152. __mod_zone_freepage_state(zone, -(1 << order),
  23153. get_pcppage_migratetype(page));
  23154. + spin_unlock(&zone->lock);
  23155. }
  23156. __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
  23157. @@ -2406,13 +2467,13 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
  23158. __count_zone_vm_events(PGALLOC, zone, 1 << order);
  23159. zone_statistics(preferred_zone, zone, gfp_flags);
  23160. - local_irq_restore(flags);
  23161. + local_unlock_irqrestore(pa_lock, flags);
  23162. VM_BUG_ON_PAGE(bad_range(zone, page), page);
  23163. return page;
  23164. failed:
  23165. - local_irq_restore(flags);
  23166. + local_unlock_irqrestore(pa_lock, flags);
  23167. return NULL;
  23168. }
  23169. @@ -6213,7 +6274,9 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
  23170. int cpu = (unsigned long)hcpu;
  23171. if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
  23172. + local_lock_irq_on(swapvec_lock, cpu);
  23173. lru_add_drain_cpu(cpu);
  23174. + local_unlock_irq_on(swapvec_lock, cpu);
  23175. drain_pages(cpu);
  23176. /*
  23177. @@ -6239,6 +6302,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
  23178. void __init page_alloc_init(void)
  23179. {
  23180. hotcpu_notifier(page_alloc_cpu_notify, 0);
  23181. + local_irq_lock_init(pa_lock);
  23182. }
  23183. /*
  23184. @@ -7163,7 +7227,7 @@ void zone_pcp_reset(struct zone *zone)
  23185. struct per_cpu_pageset *pset;
  23186. /* avoid races with drain_pages() */
  23187. - local_irq_save(flags);
  23188. + local_lock_irqsave(pa_lock, flags);
  23189. if (zone->pageset != &boot_pageset) {
  23190. for_each_online_cpu(cpu) {
  23191. pset = per_cpu_ptr(zone->pageset, cpu);
  23192. @@ -7172,7 +7236,7 @@ void zone_pcp_reset(struct zone *zone)
  23193. free_percpu(zone->pageset);
  23194. zone->pageset = &boot_pageset;
  23195. }
  23196. - local_irq_restore(flags);
  23197. + local_unlock_irqrestore(pa_lock, flags);
  23198. }
  23199. #ifdef CONFIG_MEMORY_HOTREMOVE
  23200. diff --git a/mm/slab.h b/mm/slab.h
  23201. index 5969769fbee6..b85c60f6dba8 100644
  23202. --- a/mm/slab.h
  23203. +++ b/mm/slab.h
  23204. @@ -415,7 +415,11 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
  23205. * The slab lists for all objects.
  23206. */
  23207. struct kmem_cache_node {
  23208. +#ifdef CONFIG_SLUB
  23209. + raw_spinlock_t list_lock;
  23210. +#else
  23211. spinlock_t list_lock;
  23212. +#endif
  23213. #ifdef CONFIG_SLAB
  23214. struct list_head slabs_partial; /* partial list first, better asm code */
  23215. diff --git a/mm/slub.c b/mm/slub.c
  23216. index 4dbb109eb8cd..2d10cc519150 100644
  23217. --- a/mm/slub.c
  23218. +++ b/mm/slub.c
  23219. @@ -1143,7 +1143,7 @@ static noinline int free_debug_processing(
  23220. unsigned long uninitialized_var(flags);
  23221. int ret = 0;
  23222. - spin_lock_irqsave(&n->list_lock, flags);
  23223. + raw_spin_lock_irqsave(&n->list_lock, flags);
  23224. slab_lock(page);
  23225. if (s->flags & SLAB_CONSISTENCY_CHECKS) {
  23226. @@ -1178,7 +1178,7 @@ static noinline int free_debug_processing(
  23227. bulk_cnt, cnt);
  23228. slab_unlock(page);
  23229. - spin_unlock_irqrestore(&n->list_lock, flags);
  23230. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  23231. if (!ret)
  23232. slab_fix(s, "Object at 0x%p not freed", object);
  23233. return ret;
  23234. @@ -1306,6 +1306,12 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node,
  23235. #endif /* CONFIG_SLUB_DEBUG */
  23236. +struct slub_free_list {
  23237. + raw_spinlock_t lock;
  23238. + struct list_head list;
  23239. +};
  23240. +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
  23241. +
  23242. /*
  23243. * Hooks for other subsystems that check memory allocations. In a typical
  23244. * production configuration these hooks all should produce no code at all.
  23245. @@ -1412,10 +1418,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
  23246. gfp_t alloc_gfp;
  23247. void *start, *p;
  23248. int idx, order;
  23249. + bool enableirqs = false;
  23250. flags &= gfp_allowed_mask;
  23251. if (gfpflags_allow_blocking(flags))
  23252. + enableirqs = true;
  23253. +#ifdef CONFIG_PREEMPT_RT_FULL
  23254. + if (system_state == SYSTEM_RUNNING)
  23255. + enableirqs = true;
  23256. +#endif
  23257. + if (enableirqs)
  23258. local_irq_enable();
  23259. flags |= s->allocflags;
  23260. @@ -1486,7 +1499,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
  23261. page->frozen = 1;
  23262. out:
  23263. - if (gfpflags_allow_blocking(flags))
  23264. + if (enableirqs)
  23265. local_irq_disable();
  23266. if (!page)
  23267. return NULL;
  23268. @@ -1543,6 +1556,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
  23269. __free_pages(page, order);
  23270. }
  23271. +static void free_delayed(struct list_head *h)
  23272. +{
  23273. + while(!list_empty(h)) {
  23274. + struct page *page = list_first_entry(h, struct page, lru);
  23275. +
  23276. + list_del(&page->lru);
  23277. + __free_slab(page->slab_cache, page);
  23278. + }
  23279. +}
  23280. +
  23281. #define need_reserve_slab_rcu \
  23282. (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
  23283. @@ -1574,6 +1597,12 @@ static void free_slab(struct kmem_cache *s, struct page *page)
  23284. }
  23285. call_rcu(head, rcu_free_slab);
  23286. + } else if (irqs_disabled()) {
  23287. + struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
  23288. +
  23289. + raw_spin_lock(&f->lock);
  23290. + list_add(&page->lru, &f->list);
  23291. + raw_spin_unlock(&f->lock);
  23292. } else
  23293. __free_slab(s, page);
  23294. }
  23295. @@ -1681,7 +1710,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
  23296. if (!n || !n->nr_partial)
  23297. return NULL;
  23298. - spin_lock(&n->list_lock);
  23299. + raw_spin_lock(&n->list_lock);
  23300. list_for_each_entry_safe(page, page2, &n->partial, lru) {
  23301. void *t;
  23302. @@ -1706,7 +1735,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
  23303. break;
  23304. }
  23305. - spin_unlock(&n->list_lock);
  23306. + raw_spin_unlock(&n->list_lock);
  23307. return object;
  23308. }
  23309. @@ -1952,7 +1981,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
  23310. * that acquire_slab() will see a slab page that
  23311. * is frozen
  23312. */
  23313. - spin_lock(&n->list_lock);
  23314. + raw_spin_lock(&n->list_lock);
  23315. }
  23316. } else {
  23317. m = M_FULL;
  23318. @@ -1963,7 +1992,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
  23319. * slabs from diagnostic functions will not see
  23320. * any frozen slabs.
  23321. */
  23322. - spin_lock(&n->list_lock);
  23323. + raw_spin_lock(&n->list_lock);
  23324. }
  23325. }
  23326. @@ -1998,7 +2027,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
  23327. goto redo;
  23328. if (lock)
  23329. - spin_unlock(&n->list_lock);
  23330. + raw_spin_unlock(&n->list_lock);
  23331. if (m == M_FREE) {
  23332. stat(s, DEACTIVATE_EMPTY);
  23333. @@ -2030,10 +2059,10 @@ static void unfreeze_partials(struct kmem_cache *s,
  23334. n2 = get_node(s, page_to_nid(page));
  23335. if (n != n2) {
  23336. if (n)
  23337. - spin_unlock(&n->list_lock);
  23338. + raw_spin_unlock(&n->list_lock);
  23339. n = n2;
  23340. - spin_lock(&n->list_lock);
  23341. + raw_spin_lock(&n->list_lock);
  23342. }
  23343. do {
  23344. @@ -2062,7 +2091,7 @@ static void unfreeze_partials(struct kmem_cache *s,
  23345. }
  23346. if (n)
  23347. - spin_unlock(&n->list_lock);
  23348. + raw_spin_unlock(&n->list_lock);
  23349. while (discard_page) {
  23350. page = discard_page;
  23351. @@ -2101,14 +2130,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
  23352. pobjects = oldpage->pobjects;
  23353. pages = oldpage->pages;
  23354. if (drain && pobjects > s->cpu_partial) {
  23355. + struct slub_free_list *f;
  23356. unsigned long flags;
  23357. + LIST_HEAD(tofree);
  23358. /*
  23359. * partial array is full. Move the existing
  23360. * set to the per node partial list.
  23361. */
  23362. local_irq_save(flags);
  23363. unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
  23364. + f = this_cpu_ptr(&slub_free_list);
  23365. + raw_spin_lock(&f->lock);
  23366. + list_splice_init(&f->list, &tofree);
  23367. + raw_spin_unlock(&f->lock);
  23368. local_irq_restore(flags);
  23369. + free_delayed(&tofree);
  23370. oldpage = NULL;
  23371. pobjects = 0;
  23372. pages = 0;
  23373. @@ -2180,7 +2216,22 @@ static bool has_cpu_slab(int cpu, void *info)
  23374. static void flush_all(struct kmem_cache *s)
  23375. {
  23376. + LIST_HEAD(tofree);
  23377. + int cpu;
  23378. +
  23379. on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
  23380. + for_each_online_cpu(cpu) {
  23381. + struct slub_free_list *f;
  23382. +
  23383. + if (!has_cpu_slab(cpu, s))
  23384. + continue;
  23385. +
  23386. + f = &per_cpu(slub_free_list, cpu);
  23387. + raw_spin_lock_irq(&f->lock);
  23388. + list_splice_init(&f->list, &tofree);
  23389. + raw_spin_unlock_irq(&f->lock);
  23390. + free_delayed(&tofree);
  23391. + }
  23392. }
  23393. /*
  23394. @@ -2216,10 +2267,10 @@ static unsigned long count_partial(struct kmem_cache_node *n,
  23395. unsigned long x = 0;
  23396. struct page *page;
  23397. - spin_lock_irqsave(&n->list_lock, flags);
  23398. + raw_spin_lock_irqsave(&n->list_lock, flags);
  23399. list_for_each_entry(page, &n->partial, lru)
  23400. x += get_count(page);
  23401. - spin_unlock_irqrestore(&n->list_lock, flags);
  23402. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  23403. return x;
  23404. }
  23405. #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
  23406. @@ -2357,8 +2408,10 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
  23407. * already disabled (which is the case for bulk allocation).
  23408. */
  23409. static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  23410. - unsigned long addr, struct kmem_cache_cpu *c)
  23411. + unsigned long addr, struct kmem_cache_cpu *c,
  23412. + struct list_head *to_free)
  23413. {
  23414. + struct slub_free_list *f;
  23415. void *freelist;
  23416. struct page *page;
  23417. @@ -2418,6 +2471,13 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  23418. VM_BUG_ON(!c->page->frozen);
  23419. c->freelist = get_freepointer(s, freelist);
  23420. c->tid = next_tid(c->tid);
  23421. +
  23422. +out:
  23423. + f = this_cpu_ptr(&slub_free_list);
  23424. + raw_spin_lock(&f->lock);
  23425. + list_splice_init(&f->list, to_free);
  23426. + raw_spin_unlock(&f->lock);
  23427. +
  23428. return freelist;
  23429. new_slab:
  23430. @@ -2449,7 +2509,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  23431. deactivate_slab(s, page, get_freepointer(s, freelist));
  23432. c->page = NULL;
  23433. c->freelist = NULL;
  23434. - return freelist;
  23435. + goto out;
  23436. }
  23437. /*
  23438. @@ -2461,6 +2521,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  23439. {
  23440. void *p;
  23441. unsigned long flags;
  23442. + LIST_HEAD(tofree);
  23443. local_irq_save(flags);
  23444. #ifdef CONFIG_PREEMPT
  23445. @@ -2472,8 +2533,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  23446. c = this_cpu_ptr(s->cpu_slab);
  23447. #endif
  23448. - p = ___slab_alloc(s, gfpflags, node, addr, c);
  23449. + p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
  23450. local_irq_restore(flags);
  23451. + free_delayed(&tofree);
  23452. return p;
  23453. }
  23454. @@ -2659,7 +2721,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  23455. do {
  23456. if (unlikely(n)) {
  23457. - spin_unlock_irqrestore(&n->list_lock, flags);
  23458. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  23459. n = NULL;
  23460. }
  23461. prior = page->freelist;
  23462. @@ -2691,7 +2753,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  23463. * Otherwise the list_lock will synchronize with
  23464. * other processors updating the list of slabs.
  23465. */
  23466. - spin_lock_irqsave(&n->list_lock, flags);
  23467. + raw_spin_lock_irqsave(&n->list_lock, flags);
  23468. }
  23469. }
  23470. @@ -2733,7 +2795,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  23471. add_partial(n, page, DEACTIVATE_TO_TAIL);
  23472. stat(s, FREE_ADD_PARTIAL);
  23473. }
  23474. - spin_unlock_irqrestore(&n->list_lock, flags);
  23475. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  23476. return;
  23477. slab_empty:
  23478. @@ -2748,7 +2810,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  23479. remove_full(s, n, page);
  23480. }
  23481. - spin_unlock_irqrestore(&n->list_lock, flags);
  23482. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  23483. stat(s, FREE_SLAB);
  23484. discard_slab(s, page);
  23485. }
  23486. @@ -2935,6 +2997,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
  23487. void **p)
  23488. {
  23489. struct kmem_cache_cpu *c;
  23490. + LIST_HEAD(to_free);
  23491. int i;
  23492. /* memcg and kmem_cache debug support */
  23493. @@ -2958,7 +3021,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
  23494. * of re-populating per CPU c->freelist
  23495. */
  23496. p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
  23497. - _RET_IP_, c);
  23498. + _RET_IP_, c, &to_free);
  23499. if (unlikely(!p[i]))
  23500. goto error;
  23501. @@ -2970,6 +3033,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
  23502. }
  23503. c->tid = next_tid(c->tid);
  23504. local_irq_enable();
  23505. + free_delayed(&to_free);
  23506. /* Clear memory outside IRQ disabled fastpath loop */
  23507. if (unlikely(flags & __GFP_ZERO)) {
  23508. @@ -3117,7 +3181,7 @@ static void
  23509. init_kmem_cache_node(struct kmem_cache_node *n)
  23510. {
  23511. n->nr_partial = 0;
  23512. - spin_lock_init(&n->list_lock);
  23513. + raw_spin_lock_init(&n->list_lock);
  23514. INIT_LIST_HEAD(&n->partial);
  23515. #ifdef CONFIG_SLUB_DEBUG
  23516. atomic_long_set(&n->nr_slabs, 0);
  23517. @@ -3450,6 +3514,10 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
  23518. const char *text)
  23519. {
  23520. #ifdef CONFIG_SLUB_DEBUG
  23521. +#ifdef CONFIG_PREEMPT_RT_BASE
  23522. + /* XXX move out of irq-off section */
  23523. + slab_err(s, page, text, s->name);
  23524. +#else
  23525. void *addr = page_address(page);
  23526. void *p;
  23527. unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
  23528. @@ -3470,6 +3538,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
  23529. slab_unlock(page);
  23530. kfree(map);
  23531. #endif
  23532. +#endif
  23533. }
  23534. /*
  23535. @@ -3482,7 +3551,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
  23536. struct page *page, *h;
  23537. BUG_ON(irqs_disabled());
  23538. - spin_lock_irq(&n->list_lock);
  23539. + raw_spin_lock_irq(&n->list_lock);
  23540. list_for_each_entry_safe(page, h, &n->partial, lru) {
  23541. if (!page->inuse) {
  23542. remove_partial(n, page);
  23543. @@ -3492,7 +3561,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
  23544. "Objects remaining in %s on __kmem_cache_shutdown()");
  23545. }
  23546. }
  23547. - spin_unlock_irq(&n->list_lock);
  23548. + raw_spin_unlock_irq(&n->list_lock);
  23549. }
  23550. /*
  23551. @@ -3706,7 +3775,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate)
  23552. for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
  23553. INIT_LIST_HEAD(promote + i);
  23554. - spin_lock_irqsave(&n->list_lock, flags);
  23555. + raw_spin_lock_irqsave(&n->list_lock, flags);
  23556. /*
  23557. * Build lists of slabs to discard or promote.
  23558. @@ -3737,7 +3806,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate)
  23559. for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
  23560. list_splice(promote + i, &n->partial);
  23561. - spin_unlock_irqrestore(&n->list_lock, flags);
  23562. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  23563. /* Release empty slabs */
  23564. list_for_each_entry_safe(page, t, &discard, lru)
  23565. @@ -3913,6 +3982,12 @@ void __init kmem_cache_init(void)
  23566. {
  23567. static __initdata struct kmem_cache boot_kmem_cache,
  23568. boot_kmem_cache_node;
  23569. + int cpu;
  23570. +
  23571. + for_each_possible_cpu(cpu) {
  23572. + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
  23573. + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
  23574. + }
  23575. if (debug_guardpage_minorder())
  23576. slub_max_order = 0;
  23577. @@ -4156,7 +4231,7 @@ static int validate_slab_node(struct kmem_cache *s,
  23578. struct page *page;
  23579. unsigned long flags;
  23580. - spin_lock_irqsave(&n->list_lock, flags);
  23581. + raw_spin_lock_irqsave(&n->list_lock, flags);
  23582. list_for_each_entry(page, &n->partial, lru) {
  23583. validate_slab_slab(s, page, map);
  23584. @@ -4178,7 +4253,7 @@ static int validate_slab_node(struct kmem_cache *s,
  23585. s->name, count, atomic_long_read(&n->nr_slabs));
  23586. out:
  23587. - spin_unlock_irqrestore(&n->list_lock, flags);
  23588. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  23589. return count;
  23590. }
  23591. @@ -4366,12 +4441,12 @@ static int list_locations(struct kmem_cache *s, char *buf,
  23592. if (!atomic_long_read(&n->nr_slabs))
  23593. continue;
  23594. - spin_lock_irqsave(&n->list_lock, flags);
  23595. + raw_spin_lock_irqsave(&n->list_lock, flags);
  23596. list_for_each_entry(page, &n->partial, lru)
  23597. process_slab(&t, s, page, alloc, map);
  23598. list_for_each_entry(page, &n->full, lru)
  23599. process_slab(&t, s, page, alloc, map);
  23600. - spin_unlock_irqrestore(&n->list_lock, flags);
  23601. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  23602. }
  23603. for (i = 0; i < t.count; i++) {
  23604. diff --git a/mm/swap.c b/mm/swap.c
  23605. index 03aacbcb013f..d3558eb2f685 100644
  23606. --- a/mm/swap.c
  23607. +++ b/mm/swap.c
  23608. @@ -32,6 +32,7 @@
  23609. #include <linux/memcontrol.h>
  23610. #include <linux/gfp.h>
  23611. #include <linux/uio.h>
  23612. +#include <linux/locallock.h>
  23613. #include <linux/hugetlb.h>
  23614. #include <linux/page_idle.h>
  23615. @@ -48,6 +49,9 @@ static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
  23616. static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
  23617. static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
  23618. +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
  23619. +DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
  23620. +
  23621. /*
  23622. * This path almost never happens for VM activity - pages are normally
  23623. * freed via pagevecs. But it gets used by networking.
  23624. @@ -237,11 +241,11 @@ void rotate_reclaimable_page(struct page *page)
  23625. unsigned long flags;
  23626. get_page(page);
  23627. - local_irq_save(flags);
  23628. + local_lock_irqsave(rotate_lock, flags);
  23629. pvec = this_cpu_ptr(&lru_rotate_pvecs);
  23630. if (!pagevec_add(pvec, page))
  23631. pagevec_move_tail(pvec);
  23632. - local_irq_restore(flags);
  23633. + local_unlock_irqrestore(rotate_lock, flags);
  23634. }
  23635. }
  23636. @@ -292,12 +296,13 @@ static bool need_activate_page_drain(int cpu)
  23637. void activate_page(struct page *page)
  23638. {
  23639. if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  23640. - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
  23641. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  23642. + activate_page_pvecs);
  23643. get_page(page);
  23644. if (!pagevec_add(pvec, page))
  23645. pagevec_lru_move_fn(pvec, __activate_page, NULL);
  23646. - put_cpu_var(activate_page_pvecs);
  23647. + put_locked_var(swapvec_lock, activate_page_pvecs);
  23648. }
  23649. }
  23650. @@ -323,7 +328,7 @@ void activate_page(struct page *page)
  23651. static void __lru_cache_activate_page(struct page *page)
  23652. {
  23653. - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  23654. + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
  23655. int i;
  23656. /*
  23657. @@ -345,7 +350,7 @@ static void __lru_cache_activate_page(struct page *page)
  23658. }
  23659. }
  23660. - put_cpu_var(lru_add_pvec);
  23661. + put_locked_var(swapvec_lock, lru_add_pvec);
  23662. }
  23663. /*
  23664. @@ -387,13 +392,13 @@ EXPORT_SYMBOL(mark_page_accessed);
  23665. static void __lru_cache_add(struct page *page)
  23666. {
  23667. - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  23668. + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
  23669. get_page(page);
  23670. if (!pagevec_space(pvec))
  23671. __pagevec_lru_add(pvec);
  23672. pagevec_add(pvec, page);
  23673. - put_cpu_var(lru_add_pvec);
  23674. + put_locked_var(swapvec_lock, lru_add_pvec);
  23675. }
  23676. /**
  23677. @@ -591,9 +596,15 @@ void lru_add_drain_cpu(int cpu)
  23678. unsigned long flags;
  23679. /* No harm done if a racing interrupt already did this */
  23680. - local_irq_save(flags);
  23681. +#ifdef CONFIG_PREEMPT_RT_BASE
  23682. + local_lock_irqsave_on(rotate_lock, flags, cpu);
  23683. pagevec_move_tail(pvec);
  23684. - local_irq_restore(flags);
  23685. + local_unlock_irqrestore_on(rotate_lock, flags, cpu);
  23686. +#else
  23687. + local_lock_irqsave(rotate_lock, flags);
  23688. + pagevec_move_tail(pvec);
  23689. + local_unlock_irqrestore(rotate_lock, flags);
  23690. +#endif
  23691. }
  23692. pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
  23693. @@ -625,11 +636,12 @@ void deactivate_file_page(struct page *page)
  23694. return;
  23695. if (likely(get_page_unless_zero(page))) {
  23696. - struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
  23697. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  23698. + lru_deactivate_file_pvecs);
  23699. if (!pagevec_add(pvec, page))
  23700. pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
  23701. - put_cpu_var(lru_deactivate_file_pvecs);
  23702. + put_locked_var(swapvec_lock, lru_deactivate_file_pvecs);
  23703. }
  23704. }
  23705. @@ -644,27 +656,48 @@ void deactivate_file_page(struct page *page)
  23706. void deactivate_page(struct page *page)
  23707. {
  23708. if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
  23709. - struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
  23710. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  23711. + lru_deactivate_pvecs);
  23712. get_page(page);
  23713. if (!pagevec_add(pvec, page))
  23714. pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
  23715. - put_cpu_var(lru_deactivate_pvecs);
  23716. + put_locked_var(swapvec_lock, lru_deactivate_pvecs);
  23717. }
  23718. }
  23719. void lru_add_drain(void)
  23720. {
  23721. - lru_add_drain_cpu(get_cpu());
  23722. - put_cpu();
  23723. + lru_add_drain_cpu(local_lock_cpu(swapvec_lock));
  23724. + local_unlock_cpu(swapvec_lock);
  23725. }
  23726. +
  23727. +#ifdef CONFIG_PREEMPT_RT_BASE
  23728. +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
  23729. +{
  23730. + local_lock_on(swapvec_lock, cpu);
  23731. + lru_add_drain_cpu(cpu);
  23732. + local_unlock_on(swapvec_lock, cpu);
  23733. +}
  23734. +
  23735. +#else
  23736. +
  23737. static void lru_add_drain_per_cpu(struct work_struct *dummy)
  23738. {
  23739. lru_add_drain();
  23740. }
  23741. static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
  23742. +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
  23743. +{
  23744. + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  23745. +
  23746. + INIT_WORK(work, lru_add_drain_per_cpu);
  23747. + schedule_work_on(cpu, work);
  23748. + cpumask_set_cpu(cpu, has_work);
  23749. +}
  23750. +#endif
  23751. void lru_add_drain_all(void)
  23752. {
  23753. @@ -677,21 +710,18 @@ void lru_add_drain_all(void)
  23754. cpumask_clear(&has_work);
  23755. for_each_online_cpu(cpu) {
  23756. - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  23757. -
  23758. if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
  23759. pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
  23760. pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
  23761. pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
  23762. - need_activate_page_drain(cpu)) {
  23763. - INIT_WORK(work, lru_add_drain_per_cpu);
  23764. - schedule_work_on(cpu, work);
  23765. - cpumask_set_cpu(cpu, &has_work);
  23766. - }
  23767. + need_activate_page_drain(cpu))
  23768. + remote_lru_add_drain(cpu, &has_work);
  23769. }
  23770. +#ifndef CONFIG_PREEMPT_RT_BASE
  23771. for_each_cpu(cpu, &has_work)
  23772. flush_work(&per_cpu(lru_add_drain_work, cpu));
  23773. +#endif
  23774. put_online_cpus();
  23775. mutex_unlock(&lock);
  23776. diff --git a/mm/truncate.c b/mm/truncate.c
  23777. index b00272810871..dcc445a87dfe 100644
  23778. --- a/mm/truncate.c
  23779. +++ b/mm/truncate.c
  23780. @@ -63,9 +63,12 @@ static void clear_exceptional_entry(struct address_space *mapping,
  23781. * protected by mapping->tree_lock.
  23782. */
  23783. if (!workingset_node_shadows(node) &&
  23784. - !list_empty(&node->private_list))
  23785. - list_lru_del(&workingset_shadow_nodes,
  23786. + !list_empty(&node->private_list)) {
  23787. + local_lock(workingset_shadow_lock);
  23788. + list_lru_del(&__workingset_shadow_nodes,
  23789. &node->private_list);
  23790. + local_unlock(workingset_shadow_lock);
  23791. + }
  23792. __radix_tree_delete_node(&mapping->page_tree, node);
  23793. }
  23794. unlock:
  23795. diff --git a/mm/vmalloc.c b/mm/vmalloc.c
  23796. index ae7d20b447ff..b7d6f721c2a7 100644
  23797. --- a/mm/vmalloc.c
  23798. +++ b/mm/vmalloc.c
  23799. @@ -819,7 +819,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
  23800. struct vmap_block *vb;
  23801. struct vmap_area *va;
  23802. unsigned long vb_idx;
  23803. - int node, err;
  23804. + int node, err, cpu;
  23805. void *vaddr;
  23806. node = numa_node_id();
  23807. @@ -862,11 +862,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
  23808. BUG_ON(err);
  23809. radix_tree_preload_end();
  23810. - vbq = &get_cpu_var(vmap_block_queue);
  23811. + cpu = get_cpu_light();
  23812. + vbq = this_cpu_ptr(&vmap_block_queue);
  23813. spin_lock(&vbq->lock);
  23814. list_add_tail_rcu(&vb->free_list, &vbq->free);
  23815. spin_unlock(&vbq->lock);
  23816. - put_cpu_var(vmap_block_queue);
  23817. + put_cpu_light();
  23818. return vaddr;
  23819. }
  23820. @@ -935,6 +936,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  23821. struct vmap_block *vb;
  23822. void *vaddr = NULL;
  23823. unsigned int order;
  23824. + int cpu;
  23825. BUG_ON(offset_in_page(size));
  23826. BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
  23827. @@ -949,7 +951,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  23828. order = get_order(size);
  23829. rcu_read_lock();
  23830. - vbq = &get_cpu_var(vmap_block_queue);
  23831. + cpu = get_cpu_light();
  23832. + vbq = this_cpu_ptr(&vmap_block_queue);
  23833. list_for_each_entry_rcu(vb, &vbq->free, free_list) {
  23834. unsigned long pages_off;
  23835. @@ -972,7 +975,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  23836. break;
  23837. }
  23838. - put_cpu_var(vmap_block_queue);
  23839. + put_cpu_light();
  23840. rcu_read_unlock();
  23841. /* Allocate new block if nothing was found */
  23842. diff --git a/mm/vmstat.c b/mm/vmstat.c
  23843. index 5e4300482897..1ae743192c66 100644
  23844. --- a/mm/vmstat.c
  23845. +++ b/mm/vmstat.c
  23846. @@ -226,6 +226,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
  23847. long x;
  23848. long t;
  23849. + preempt_disable_rt();
  23850. x = delta + __this_cpu_read(*p);
  23851. t = __this_cpu_read(pcp->stat_threshold);
  23852. @@ -235,6 +236,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
  23853. x = 0;
  23854. }
  23855. __this_cpu_write(*p, x);
  23856. + preempt_enable_rt();
  23857. }
  23858. EXPORT_SYMBOL(__mod_zone_page_state);
  23859. @@ -267,6 +269,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
  23860. s8 __percpu *p = pcp->vm_stat_diff + item;
  23861. s8 v, t;
  23862. + preempt_disable_rt();
  23863. v = __this_cpu_inc_return(*p);
  23864. t = __this_cpu_read(pcp->stat_threshold);
  23865. if (unlikely(v > t)) {
  23866. @@ -275,6 +278,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
  23867. zone_page_state_add(v + overstep, zone, item);
  23868. __this_cpu_write(*p, -overstep);
  23869. }
  23870. + preempt_enable_rt();
  23871. }
  23872. void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
  23873. @@ -289,6 +293,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
  23874. s8 __percpu *p = pcp->vm_stat_diff + item;
  23875. s8 v, t;
  23876. + preempt_disable_rt();
  23877. v = __this_cpu_dec_return(*p);
  23878. t = __this_cpu_read(pcp->stat_threshold);
  23879. if (unlikely(v < - t)) {
  23880. @@ -297,6 +302,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
  23881. zone_page_state_add(v - overstep, zone, item);
  23882. __this_cpu_write(*p, overstep);
  23883. }
  23884. + preempt_enable_rt();
  23885. }
  23886. void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
  23887. diff --git a/mm/workingset.c b/mm/workingset.c
  23888. index 8a75f8d2916a..00a38f9f0f37 100644
  23889. --- a/mm/workingset.c
  23890. +++ b/mm/workingset.c
  23891. @@ -335,7 +335,8 @@ void workingset_activation(struct page *page)
  23892. * point where they would still be useful.
  23893. */
  23894. -struct list_lru workingset_shadow_nodes;
  23895. +struct list_lru __workingset_shadow_nodes;
  23896. +DEFINE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
  23897. static unsigned long count_shadow_nodes(struct shrinker *shrinker,
  23898. struct shrink_control *sc)
  23899. @@ -345,9 +346,9 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
  23900. unsigned long pages;
  23901. /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
  23902. - local_irq_disable();
  23903. - shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc);
  23904. - local_irq_enable();
  23905. + local_lock_irq(workingset_shadow_lock);
  23906. + shadow_nodes = list_lru_shrink_count(&__workingset_shadow_nodes, sc);
  23907. + local_unlock_irq(workingset_shadow_lock);
  23908. if (memcg_kmem_enabled())
  23909. pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
  23910. @@ -440,9 +441,9 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
  23911. spin_unlock(&mapping->tree_lock);
  23912. ret = LRU_REMOVED_RETRY;
  23913. out:
  23914. - local_irq_enable();
  23915. + local_unlock_irq(workingset_shadow_lock);
  23916. cond_resched();
  23917. - local_irq_disable();
  23918. + local_lock_irq(workingset_shadow_lock);
  23919. spin_lock(lru_lock);
  23920. return ret;
  23921. }
  23922. @@ -453,10 +454,10 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
  23923. unsigned long ret;
  23924. /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
  23925. - local_irq_disable();
  23926. - ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc,
  23927. + local_lock_irq(workingset_shadow_lock);
  23928. + ret = list_lru_shrink_walk(&__workingset_shadow_nodes, sc,
  23929. shadow_lru_isolate, NULL);
  23930. - local_irq_enable();
  23931. + local_unlock_irq(workingset_shadow_lock);
  23932. return ret;
  23933. }
  23934. @@ -494,7 +495,7 @@ static int __init workingset_init(void)
  23935. printk("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
  23936. timestamp_bits, max_order, bucket_order);
  23937. - ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);
  23938. + ret = list_lru_init_key(&__workingset_shadow_nodes, &shadow_nodes_key);
  23939. if (ret)
  23940. goto err;
  23941. ret = register_shrinker(&workingset_shadow_shrinker);
  23942. @@ -502,7 +503,7 @@ static int __init workingset_init(void)
  23943. goto err_list_lru;
  23944. return 0;
  23945. err_list_lru:
  23946. - list_lru_destroy(&workingset_shadow_nodes);
  23947. + list_lru_destroy(&__workingset_shadow_nodes);
  23948. err:
  23949. return ret;
  23950. }
  23951. diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
  23952. index fe47fbba995a..e46d7bcf562e 100644
  23953. --- a/mm/zsmalloc.c
  23954. +++ b/mm/zsmalloc.c
  23955. @@ -1292,7 +1292,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
  23956. class = pool->size_class[class_idx];
  23957. off = obj_idx_to_offset(page, obj_idx, class->size);
  23958. - area = &get_cpu_var(zs_map_area);
  23959. + area = per_cpu_ptr(&zs_map_area, get_cpu_light());
  23960. area->vm_mm = mm;
  23961. if (off + class->size <= PAGE_SIZE) {
  23962. /* this object is contained entirely within a page */
  23963. @@ -1345,7 +1345,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
  23964. __zs_unmap_object(area, pages, off, class->size);
  23965. }
  23966. - put_cpu_var(zs_map_area);
  23967. + put_cpu_light();
  23968. unpin_tag(handle);
  23969. }
  23970. EXPORT_SYMBOL_GPL(zs_unmap_object);
  23971. diff --git a/net/core/dev.c b/net/core/dev.c
  23972. index 5c925ac50b95..a8fedf89eb12 100644
  23973. --- a/net/core/dev.c
  23974. +++ b/net/core/dev.c
  23975. @@ -188,6 +188,7 @@ static unsigned int napi_gen_id = NR_CPUS;
  23976. static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
  23977. static seqcount_t devnet_rename_seq;
  23978. +static DEFINE_MUTEX(devnet_rename_mutex);
  23979. static inline void dev_base_seq_inc(struct net *net)
  23980. {
  23981. @@ -209,14 +210,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
  23982. static inline void rps_lock(struct softnet_data *sd)
  23983. {
  23984. #ifdef CONFIG_RPS
  23985. - spin_lock(&sd->input_pkt_queue.lock);
  23986. + raw_spin_lock(&sd->input_pkt_queue.raw_lock);
  23987. #endif
  23988. }
  23989. static inline void rps_unlock(struct softnet_data *sd)
  23990. {
  23991. #ifdef CONFIG_RPS
  23992. - spin_unlock(&sd->input_pkt_queue.lock);
  23993. + raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
  23994. #endif
  23995. }
  23996. @@ -886,7 +887,8 @@ int netdev_get_name(struct net *net, char *name, int ifindex)
  23997. strcpy(name, dev->name);
  23998. rcu_read_unlock();
  23999. if (read_seqcount_retry(&devnet_rename_seq, seq)) {
  24000. - cond_resched();
  24001. + mutex_lock(&devnet_rename_mutex);
  24002. + mutex_unlock(&devnet_rename_mutex);
  24003. goto retry;
  24004. }
  24005. @@ -1155,20 +1157,17 @@ int dev_change_name(struct net_device *dev, const char *newname)
  24006. if (dev->flags & IFF_UP)
  24007. return -EBUSY;
  24008. - write_seqcount_begin(&devnet_rename_seq);
  24009. + mutex_lock(&devnet_rename_mutex);
  24010. + __raw_write_seqcount_begin(&devnet_rename_seq);
  24011. - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
  24012. - write_seqcount_end(&devnet_rename_seq);
  24013. - return 0;
  24014. - }
  24015. + if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
  24016. + goto outunlock;
  24017. memcpy(oldname, dev->name, IFNAMSIZ);
  24018. err = dev_get_valid_name(net, dev, newname);
  24019. - if (err < 0) {
  24020. - write_seqcount_end(&devnet_rename_seq);
  24021. - return err;
  24022. - }
  24023. + if (err < 0)
  24024. + goto outunlock;
  24025. if (oldname[0] && !strchr(oldname, '%'))
  24026. netdev_info(dev, "renamed from %s\n", oldname);
  24027. @@ -1181,11 +1180,12 @@ int dev_change_name(struct net_device *dev, const char *newname)
  24028. if (ret) {
  24029. memcpy(dev->name, oldname, IFNAMSIZ);
  24030. dev->name_assign_type = old_assign_type;
  24031. - write_seqcount_end(&devnet_rename_seq);
  24032. - return ret;
  24033. + err = ret;
  24034. + goto outunlock;
  24035. }
  24036. - write_seqcount_end(&devnet_rename_seq);
  24037. + __raw_write_seqcount_end(&devnet_rename_seq);
  24038. + mutex_unlock(&devnet_rename_mutex);
  24039. netdev_adjacent_rename_links(dev, oldname);
  24040. @@ -1206,7 +1206,8 @@ int dev_change_name(struct net_device *dev, const char *newname)
  24041. /* err >= 0 after dev_alloc_name() or stores the first errno */
  24042. if (err >= 0) {
  24043. err = ret;
  24044. - write_seqcount_begin(&devnet_rename_seq);
  24045. + mutex_lock(&devnet_rename_mutex);
  24046. + __raw_write_seqcount_begin(&devnet_rename_seq);
  24047. memcpy(dev->name, oldname, IFNAMSIZ);
  24048. memcpy(oldname, newname, IFNAMSIZ);
  24049. dev->name_assign_type = old_assign_type;
  24050. @@ -1219,6 +1220,11 @@ int dev_change_name(struct net_device *dev, const char *newname)
  24051. }
  24052. return err;
  24053. +
  24054. +outunlock:
  24055. + __raw_write_seqcount_end(&devnet_rename_seq);
  24056. + mutex_unlock(&devnet_rename_mutex);
  24057. + return err;
  24058. }
  24059. /**
  24060. @@ -2264,6 +2270,7 @@ static inline void __netif_reschedule(struct Qdisc *q)
  24061. sd->output_queue_tailp = &q->next_sched;
  24062. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  24063. local_irq_restore(flags);
  24064. + preempt_check_resched_rt();
  24065. }
  24066. void __netif_schedule(struct Qdisc *q)
  24067. @@ -2345,6 +2352,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
  24068. __this_cpu_write(softnet_data.completion_queue, skb);
  24069. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  24070. local_irq_restore(flags);
  24071. + preempt_check_resched_rt();
  24072. }
  24073. EXPORT_SYMBOL(__dev_kfree_skb_irq);
  24074. @@ -3035,7 +3043,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
  24075. * This permits __QDISC___STATE_RUNNING owner to get the lock more
  24076. * often and dequeue packets faster.
  24077. */
  24078. +#ifdef CONFIG_PREEMPT_RT_FULL
  24079. + contended = true;
  24080. +#else
  24081. contended = qdisc_is_running(q);
  24082. +#endif
  24083. if (unlikely(contended))
  24084. spin_lock(&q->busylock);
  24085. @@ -3096,9 +3108,44 @@ static void skb_update_prio(struct sk_buff *skb)
  24086. #define skb_update_prio(skb)
  24087. #endif
  24088. +#ifdef CONFIG_PREEMPT_RT_FULL
  24089. +
  24090. +static inline int xmit_rec_read(void)
  24091. +{
  24092. + return current->xmit_recursion;
  24093. +}
  24094. +
  24095. +static inline void xmit_rec_inc(void)
  24096. +{
  24097. + current->xmit_recursion++;
  24098. +}
  24099. +
  24100. +static inline void xmit_rec_dec(void)
  24101. +{
  24102. + current->xmit_recursion--;
  24103. +}
  24104. +
  24105. +#else
  24106. +
  24107. DEFINE_PER_CPU(int, xmit_recursion);
  24108. EXPORT_SYMBOL(xmit_recursion);
  24109. +static inline int xmit_rec_read(void)
  24110. +{
  24111. + return __this_cpu_read(xmit_recursion);
  24112. +}
  24113. +
  24114. +static inline void xmit_rec_inc(void)
  24115. +{
  24116. + __this_cpu_inc(xmit_recursion);
  24117. +}
  24118. +
  24119. +static inline void xmit_rec_dec(void)
  24120. +{
  24121. + __this_cpu_dec(xmit_recursion);
  24122. +}
  24123. +#endif
  24124. +
  24125. #define RECURSION_LIMIT 10
  24126. /**
  24127. @@ -3344,7 +3391,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
  24128. if (txq->xmit_lock_owner != cpu) {
  24129. - if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
  24130. + if (xmit_rec_read() > RECURSION_LIMIT)
  24131. goto recursion_alert;
  24132. skb = validate_xmit_skb(skb, dev);
  24133. @@ -3354,9 +3401,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
  24134. HARD_TX_LOCK(dev, txq, cpu);
  24135. if (!netif_xmit_stopped(txq)) {
  24136. - __this_cpu_inc(xmit_recursion);
  24137. + xmit_rec_inc();
  24138. skb = dev_hard_start_xmit(skb, dev, txq, &rc);
  24139. - __this_cpu_dec(xmit_recursion);
  24140. + xmit_rec_dec();
  24141. if (dev_xmit_complete(rc)) {
  24142. HARD_TX_UNLOCK(dev, txq);
  24143. goto out;
  24144. @@ -3730,6 +3777,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
  24145. rps_unlock(sd);
  24146. local_irq_restore(flags);
  24147. + preempt_check_resched_rt();
  24148. atomic_long_inc(&skb->dev->rx_dropped);
  24149. kfree_skb(skb);
  24150. @@ -3748,7 +3796,7 @@ static int netif_rx_internal(struct sk_buff *skb)
  24151. struct rps_dev_flow voidflow, *rflow = &voidflow;
  24152. int cpu;
  24153. - preempt_disable();
  24154. + migrate_disable();
  24155. rcu_read_lock();
  24156. cpu = get_rps_cpu(skb->dev, skb, &rflow);
  24157. @@ -3758,13 +3806,13 @@ static int netif_rx_internal(struct sk_buff *skb)
  24158. ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
  24159. rcu_read_unlock();
  24160. - preempt_enable();
  24161. + migrate_enable();
  24162. } else
  24163. #endif
  24164. {
  24165. unsigned int qtail;
  24166. - ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
  24167. - put_cpu();
  24168. + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
  24169. + put_cpu_light();
  24170. }
  24171. return ret;
  24172. }
  24173. @@ -3798,16 +3846,44 @@ int netif_rx_ni(struct sk_buff *skb)
  24174. trace_netif_rx_ni_entry(skb);
  24175. - preempt_disable();
  24176. + local_bh_disable();
  24177. err = netif_rx_internal(skb);
  24178. - if (local_softirq_pending())
  24179. - do_softirq();
  24180. - preempt_enable();
  24181. + local_bh_enable();
  24182. return err;
  24183. }
  24184. EXPORT_SYMBOL(netif_rx_ni);
  24185. +#ifdef CONFIG_PREEMPT_RT_FULL
  24186. +/*
  24187. + * RT runs ksoftirqd as a real time thread and the root_lock is a
  24188. + * "sleeping spinlock". If the trylock fails then we can go into an
  24189. + * infinite loop when ksoftirqd preempted the task which actually
  24190. + * holds the lock, because we requeue q and raise NET_TX softirq
  24191. + * causing ksoftirqd to loop forever.
  24192. + *
  24193. + * It's safe to use spin_lock on RT here as softirqs run in thread
  24194. + * context and cannot deadlock against the thread which is holding
  24195. + * root_lock.
  24196. + *
  24197. + * On !RT the trylock might fail, but there we bail out from the
  24198. + * softirq loop after 10 attempts which we can't do on RT. And the
  24199. + * task holding root_lock cannot be preempted, so the only downside of
  24200. + * that trylock is that we need 10 loops to decide that we should have
  24201. + * given up in the first one :)
  24202. + */
  24203. +static inline int take_root_lock(spinlock_t *lock)
  24204. +{
  24205. + spin_lock(lock);
  24206. + return 1;
  24207. +}
  24208. +#else
  24209. +static inline int take_root_lock(spinlock_t *lock)
  24210. +{
  24211. + return spin_trylock(lock);
  24212. +}
  24213. +#endif
  24214. +
  24215. static void net_tx_action(struct softirq_action *h)
  24216. {
  24217. struct softnet_data *sd = this_cpu_ptr(&softnet_data);
  24218. @@ -3855,7 +3931,7 @@ static void net_tx_action(struct softirq_action *h)
  24219. head = head->next_sched;
  24220. root_lock = qdisc_lock(q);
  24221. - if (spin_trylock(root_lock)) {
  24222. + if (take_root_lock(root_lock)) {
  24223. smp_mb__before_atomic();
  24224. clear_bit(__QDISC_STATE_SCHED,
  24225. &q->state);
  24226. @@ -4264,7 +4340,7 @@ static void flush_backlog(void *arg)
  24227. skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
  24228. if (skb->dev == dev) {
  24229. __skb_unlink(skb, &sd->input_pkt_queue);
  24230. - kfree_skb(skb);
  24231. + __skb_queue_tail(&sd->tofree_queue, skb);
  24232. input_queue_head_incr(sd);
  24233. }
  24234. }
  24235. @@ -4273,10 +4349,13 @@ static void flush_backlog(void *arg)
  24236. skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
  24237. if (skb->dev == dev) {
  24238. __skb_unlink(skb, &sd->process_queue);
  24239. - kfree_skb(skb);
  24240. + __skb_queue_tail(&sd->tofree_queue, skb);
  24241. input_queue_head_incr(sd);
  24242. }
  24243. }
  24244. +
  24245. + if (!skb_queue_empty(&sd->tofree_queue))
  24246. + raise_softirq_irqoff(NET_RX_SOFTIRQ);
  24247. }
  24248. static int napi_gro_complete(struct sk_buff *skb)
  24249. @@ -4735,6 +4814,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
  24250. sd->rps_ipi_list = NULL;
  24251. local_irq_enable();
  24252. + preempt_check_resched_rt();
  24253. /* Send pending IPI's to kick RPS processing on remote cpus. */
  24254. while (remsd) {
  24255. @@ -4748,6 +4828,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
  24256. } else
  24257. #endif
  24258. local_irq_enable();
  24259. + preempt_check_resched_rt();
  24260. }
  24261. static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
  24262. @@ -4829,6 +4910,7 @@ void __napi_schedule(struct napi_struct *n)
  24263. local_irq_save(flags);
  24264. ____napi_schedule(this_cpu_ptr(&softnet_data), n);
  24265. local_irq_restore(flags);
  24266. + preempt_check_resched_rt();
  24267. }
  24268. EXPORT_SYMBOL(__napi_schedule);
  24269. @@ -5169,7 +5251,7 @@ static void net_rx_action(struct softirq_action *h)
  24270. list_splice_tail(&repoll, &list);
  24271. list_splice(&list, &sd->poll_list);
  24272. if (!list_empty(&sd->poll_list))
  24273. - __raise_softirq_irqoff(NET_RX_SOFTIRQ);
  24274. + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ);
  24275. net_rps_action_and_irq_enable(sd);
  24276. }
  24277. @@ -7534,7 +7616,7 @@ EXPORT_SYMBOL(free_netdev);
  24278. void synchronize_net(void)
  24279. {
  24280. might_sleep();
  24281. - if (rtnl_is_locked())
  24282. + if (rtnl_is_locked() && !IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
  24283. synchronize_rcu_expedited();
  24284. else
  24285. synchronize_rcu();
  24286. @@ -7775,16 +7857,20 @@ static int dev_cpu_callback(struct notifier_block *nfb,
  24287. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  24288. local_irq_enable();
  24289. + preempt_check_resched_rt();
  24290. /* Process offline CPU's input_pkt_queue */
  24291. while ((skb = __skb_dequeue(&oldsd->process_queue))) {
  24292. netif_rx_ni(skb);
  24293. input_queue_head_incr(oldsd);
  24294. }
  24295. - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
  24296. + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
  24297. netif_rx_ni(skb);
  24298. input_queue_head_incr(oldsd);
  24299. }
  24300. + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
  24301. + kfree_skb(skb);
  24302. + }
  24303. return NOTIFY_OK;
  24304. }
  24305. @@ -8086,8 +8172,9 @@ static int __init net_dev_init(void)
  24306. for_each_possible_cpu(i) {
  24307. struct softnet_data *sd = &per_cpu(softnet_data, i);
  24308. - skb_queue_head_init(&sd->input_pkt_queue);
  24309. - skb_queue_head_init(&sd->process_queue);
  24310. + skb_queue_head_init_raw(&sd->input_pkt_queue);
  24311. + skb_queue_head_init_raw(&sd->process_queue);
  24312. + skb_queue_head_init_raw(&sd->tofree_queue);
  24313. INIT_LIST_HEAD(&sd->poll_list);
  24314. sd->output_queue_tailp = &sd->output_queue;
  24315. #ifdef CONFIG_RPS
  24316. diff --git a/net/core/skbuff.c b/net/core/skbuff.c
  24317. index 59bf4d77154f..0c1e29b0f8fb 100644
  24318. --- a/net/core/skbuff.c
  24319. +++ b/net/core/skbuff.c
  24320. @@ -63,6 +63,7 @@
  24321. #include <linux/errqueue.h>
  24322. #include <linux/prefetch.h>
  24323. #include <linux/if_vlan.h>
  24324. +#include <linux/locallock.h>
  24325. #include <net/protocol.h>
  24326. #include <net/dst.h>
  24327. @@ -359,6 +360,8 @@ struct napi_alloc_cache {
  24328. static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
  24329. static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
  24330. +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock);
  24331. +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock);
  24332. static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  24333. {
  24334. @@ -366,10 +369,10 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  24335. unsigned long flags;
  24336. void *data;
  24337. - local_irq_save(flags);
  24338. + local_lock_irqsave(netdev_alloc_lock, flags);
  24339. nc = this_cpu_ptr(&netdev_alloc_cache);
  24340. data = __alloc_page_frag(nc, fragsz, gfp_mask);
  24341. - local_irq_restore(flags);
  24342. + local_unlock_irqrestore(netdev_alloc_lock, flags);
  24343. return data;
  24344. }
  24345. @@ -388,9 +391,13 @@ EXPORT_SYMBOL(netdev_alloc_frag);
  24346. static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  24347. {
  24348. - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  24349. + struct napi_alloc_cache *nc;
  24350. + void *data;
  24351. - return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
  24352. + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  24353. + data = __alloc_page_frag(&nc->page, fragsz, gfp_mask);
  24354. + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  24355. + return data;
  24356. }
  24357. void *napi_alloc_frag(unsigned int fragsz)
  24358. @@ -437,13 +444,13 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
  24359. if (sk_memalloc_socks())
  24360. gfp_mask |= __GFP_MEMALLOC;
  24361. - local_irq_save(flags);
  24362. + local_lock_irqsave(netdev_alloc_lock, flags);
  24363. nc = this_cpu_ptr(&netdev_alloc_cache);
  24364. data = __alloc_page_frag(nc, len, gfp_mask);
  24365. pfmemalloc = nc->pfmemalloc;
  24366. - local_irq_restore(flags);
  24367. + local_unlock_irqrestore(netdev_alloc_lock, flags);
  24368. if (unlikely(!data))
  24369. return NULL;
  24370. @@ -484,9 +491,10 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
  24371. struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
  24372. gfp_t gfp_mask)
  24373. {
  24374. - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  24375. + struct napi_alloc_cache *nc;
  24376. struct sk_buff *skb;
  24377. void *data;
  24378. + bool pfmemalloc;
  24379. len += NET_SKB_PAD + NET_IP_ALIGN;
  24380. @@ -504,7 +512,10 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
  24381. if (sk_memalloc_socks())
  24382. gfp_mask |= __GFP_MEMALLOC;
  24383. + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  24384. data = __alloc_page_frag(&nc->page, len, gfp_mask);
  24385. + pfmemalloc = nc->page.pfmemalloc;
  24386. + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  24387. if (unlikely(!data))
  24388. return NULL;
  24389. @@ -515,7 +526,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
  24390. }
  24391. /* use OR instead of assignment to avoid clearing of bits in mask */
  24392. - if (nc->page.pfmemalloc)
  24393. + if (pfmemalloc)
  24394. skb->pfmemalloc = 1;
  24395. skb->head_frag = 1;
  24396. @@ -759,23 +770,26 @@ EXPORT_SYMBOL(consume_skb);
  24397. void __kfree_skb_flush(void)
  24398. {
  24399. - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  24400. + struct napi_alloc_cache *nc;
  24401. + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  24402. /* flush skb_cache if containing objects */
  24403. if (nc->skb_count) {
  24404. kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
  24405. nc->skb_cache);
  24406. nc->skb_count = 0;
  24407. }
  24408. + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  24409. }
  24410. static inline void _kfree_skb_defer(struct sk_buff *skb)
  24411. {
  24412. - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  24413. + struct napi_alloc_cache *nc;
  24414. /* drop skb->head and call any destructors for packet */
  24415. skb_release_all(skb);
  24416. + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  24417. /* record skb to CPU local list */
  24418. nc->skb_cache[nc->skb_count++] = skb;
  24419. @@ -790,6 +804,7 @@ static inline void _kfree_skb_defer(struct sk_buff *skb)
  24420. nc->skb_cache);
  24421. nc->skb_count = 0;
  24422. }
  24423. + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  24424. }
  24425. void __kfree_skb_defer(struct sk_buff *skb)
  24426. {
  24427. diff --git a/net/core/sock.c b/net/core/sock.c
  24428. index 7e73c26b6bb4..885fe2ec43fb 100644
  24429. --- a/net/core/sock.c
  24430. +++ b/net/core/sock.c
  24431. @@ -2421,12 +2421,11 @@ void lock_sock_nested(struct sock *sk, int subclass)
  24432. if (sk->sk_lock.owned)
  24433. __lock_sock(sk);
  24434. sk->sk_lock.owned = 1;
  24435. - spin_unlock(&sk->sk_lock.slock);
  24436. + spin_unlock_bh(&sk->sk_lock.slock);
  24437. /*
  24438. * The sk_lock has mutex_lock() semantics here:
  24439. */
  24440. mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
  24441. - local_bh_enable();
  24442. }
  24443. EXPORT_SYMBOL(lock_sock_nested);
  24444. diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
  24445. index 6333489771ed..c1f1d5030d37 100644
  24446. --- a/net/ipv4/icmp.c
  24447. +++ b/net/ipv4/icmp.c
  24448. @@ -69,6 +69,7 @@
  24449. #include <linux/jiffies.h>
  24450. #include <linux/kernel.h>
  24451. #include <linux/fcntl.h>
  24452. +#include <linux/sysrq.h>
  24453. #include <linux/socket.h>
  24454. #include <linux/in.h>
  24455. #include <linux/inet.h>
  24456. @@ -891,6 +892,30 @@ static bool icmp_redirect(struct sk_buff *skb)
  24457. }
  24458. /*
  24459. + * 32bit and 64bit have different timestamp length, so we check for
  24460. + * the cookie at offset 20 and verify it is repeated at offset 50
  24461. + */
  24462. +#define CO_POS0 20
  24463. +#define CO_POS1 50
  24464. +#define CO_SIZE sizeof(int)
  24465. +#define ICMP_SYSRQ_SIZE 57
  24466. +
  24467. +/*
  24468. + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
  24469. + * pattern and if it matches send the next byte as a trigger to sysrq.
  24470. + */
  24471. +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb)
  24472. +{
  24473. + int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
  24474. + char *p = skb->data;
  24475. +
  24476. + if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
  24477. + !memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
  24478. + p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
  24479. + handle_sysrq(p[CO_POS0 + CO_SIZE]);
  24480. +}
  24481. +
  24482. +/*
  24483. * Handle ICMP_ECHO ("ping") requests.
  24484. *
  24485. * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
  24486. @@ -917,6 +942,11 @@ static bool icmp_echo(struct sk_buff *skb)
  24487. icmp_param.data_len = skb->len;
  24488. icmp_param.head_len = sizeof(struct icmphdr);
  24489. icmp_reply(&icmp_param, skb);
  24490. +
  24491. + if (skb->len == ICMP_SYSRQ_SIZE &&
  24492. + net->ipv4.sysctl_icmp_echo_sysrq) {
  24493. + icmp_check_sysrq(net, skb);
  24494. + }
  24495. }
  24496. /* should there be an ICMP stat for ignored echos? */
  24497. return true;
  24498. diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
  24499. index bc5196ea1bdf..fc646aa7fad7 100644
  24500. --- a/net/ipv4/inet_connection_sock.c
  24501. +++ b/net/ipv4/inet_connection_sock.c
  24502. @@ -603,7 +603,7 @@ static void reqsk_timer_handler(unsigned long data)
  24503. if (req->num_timeout++ == 0)
  24504. atomic_dec(&queue->young);
  24505. timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
  24506. - mod_timer_pinned(&req->rsk_timer, jiffies + timeo);
  24507. + mod_timer(&req->rsk_timer, jiffies + timeo);
  24508. return;
  24509. }
  24510. drop:
  24511. @@ -617,8 +617,9 @@ static void reqsk_queue_hash_req(struct request_sock *req,
  24512. req->num_timeout = 0;
  24513. req->sk = NULL;
  24514. - setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
  24515. - mod_timer_pinned(&req->rsk_timer, jiffies + timeout);
  24516. + setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler,
  24517. + (unsigned long)req);
  24518. + mod_timer(&req->rsk_timer, jiffies + timeout);
  24519. inet_ehash_insert(req_to_sk(req), NULL);
  24520. /* before letting lookups find us, make sure all req fields
  24521. diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
  24522. index c67f9bd7699c..dfb6286f2e49 100644
  24523. --- a/net/ipv4/inet_timewait_sock.c
  24524. +++ b/net/ipv4/inet_timewait_sock.c
  24525. @@ -188,7 +188,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
  24526. tw->tw_prot = sk->sk_prot_creator;
  24527. atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
  24528. twsk_net_set(tw, sock_net(sk));
  24529. - setup_timer(&tw->tw_timer, tw_timer_handler, (unsigned long)tw);
  24530. + setup_pinned_timer(&tw->tw_timer, tw_timer_handler,
  24531. + (unsigned long)tw);
  24532. /*
  24533. * Because we use RCU lookups, we should not set tw_refcnt
  24534. * to a non null value before everything is setup for this
  24535. @@ -248,7 +249,7 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
  24536. tw->tw_kill = timeo <= 4*HZ;
  24537. if (!rearm) {
  24538. - BUG_ON(mod_timer_pinned(&tw->tw_timer, jiffies + timeo));
  24539. + BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo));
  24540. atomic_inc(&tw->tw_dr->tw_count);
  24541. } else {
  24542. mod_timer_pending(&tw->tw_timer, jiffies + timeo);
  24543. diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
  24544. index 03112a3106ab..daa6c5184977 100644
  24545. --- a/net/ipv4/sysctl_net_ipv4.c
  24546. +++ b/net/ipv4/sysctl_net_ipv4.c
  24547. @@ -681,6 +681,13 @@ static struct ctl_table ipv4_net_table[] = {
  24548. .proc_handler = proc_dointvec
  24549. },
  24550. {
  24551. + .procname = "icmp_echo_sysrq",
  24552. + .data = &init_net.ipv4.sysctl_icmp_echo_sysrq,
  24553. + .maxlen = sizeof(int),
  24554. + .mode = 0644,
  24555. + .proc_handler = proc_dointvec
  24556. + },
  24557. + {
  24558. .procname = "icmp_ignore_bogus_error_responses",
  24559. .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
  24560. .maxlen = sizeof(int),
  24561. diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
  24562. index dc27becb9b71..7815f28bc30b 100644
  24563. --- a/net/mac80211/rx.c
  24564. +++ b/net/mac80211/rx.c
  24565. @@ -3679,7 +3679,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb,
  24566. struct ieee80211_supported_band *sband;
  24567. struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
  24568. - WARN_ON_ONCE(softirq_count() == 0);
  24569. + WARN_ON_ONCE_NONRT(softirq_count() == 0);
  24570. if (WARN_ON(status->band >= IEEE80211_NUM_BANDS))
  24571. goto drop;
  24572. diff --git a/net/netfilter/core.c b/net/netfilter/core.c
  24573. index f39276d1c2d7..10880c89d62f 100644
  24574. --- a/net/netfilter/core.c
  24575. +++ b/net/netfilter/core.c
  24576. @@ -22,11 +22,17 @@
  24577. #include <linux/proc_fs.h>
  24578. #include <linux/mutex.h>
  24579. #include <linux/slab.h>
  24580. +#include <linux/locallock.h>
  24581. #include <net/net_namespace.h>
  24582. #include <net/sock.h>
  24583. #include "nf_internals.h"
  24584. +#ifdef CONFIG_PREEMPT_RT_BASE
  24585. +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
  24586. +EXPORT_PER_CPU_SYMBOL(xt_write_lock);
  24587. +#endif
  24588. +
  24589. static DEFINE_MUTEX(afinfo_mutex);
  24590. const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
  24591. diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
  24592. index 8012f67ca5ae..dbfd52456676 100644
  24593. --- a/net/packet/af_packet.c
  24594. +++ b/net/packet/af_packet.c
  24595. @@ -63,6 +63,7 @@
  24596. #include <linux/if_packet.h>
  24597. #include <linux/wireless.h>
  24598. #include <linux/kernel.h>
  24599. +#include <linux/delay.h>
  24600. #include <linux/kmod.h>
  24601. #include <linux/slab.h>
  24602. #include <linux/vmalloc.h>
  24603. @@ -694,7 +695,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data)
  24604. if (BLOCK_NUM_PKTS(pbd)) {
  24605. while (atomic_read(&pkc->blk_fill_in_prog)) {
  24606. /* Waiting for skb_copy_bits to finish... */
  24607. - cpu_relax();
  24608. + cpu_chill();
  24609. }
  24610. }
  24611. @@ -956,7 +957,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
  24612. if (!(status & TP_STATUS_BLK_TMO)) {
  24613. while (atomic_read(&pkc->blk_fill_in_prog)) {
  24614. /* Waiting for skb_copy_bits to finish... */
  24615. - cpu_relax();
  24616. + cpu_chill();
  24617. }
  24618. }
  24619. prb_close_block(pkc, pbd, po, status);
  24620. diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
  24621. index f7164ac1ffc1..6f37dd5fc717 100644
  24622. --- a/net/rds/ib_rdma.c
  24623. +++ b/net/rds/ib_rdma.c
  24624. @@ -34,6 +34,7 @@
  24625. #include <linux/slab.h>
  24626. #include <linux/rculist.h>
  24627. #include <linux/llist.h>
  24628. +#include <linux/delay.h>
  24629. #include "ib_mr.h"
  24630. @@ -209,7 +210,7 @@ static inline void wait_clean_list_grace(void)
  24631. for_each_online_cpu(cpu) {
  24632. flag = &per_cpu(clean_list_grace, cpu);
  24633. while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
  24634. - cpu_relax();
  24635. + cpu_chill();
  24636. }
  24637. }
  24638. diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
  24639. index 80742edea96f..31f70ee7e609 100644
  24640. --- a/net/sched/sch_generic.c
  24641. +++ b/net/sched/sch_generic.c
  24642. @@ -894,7 +894,7 @@ void dev_deactivate_many(struct list_head *head)
  24643. /* Wait for outstanding qdisc_run calls. */
  24644. list_for_each_entry(dev, head, close_list)
  24645. while (some_qdisc_is_busy(dev))
  24646. - yield();
  24647. + msleep(1);
  24648. }
  24649. void dev_deactivate(struct net_device *dev)
  24650. diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
  24651. index 7231cb413a2c..e9c57cd2cb7c 100644
  24652. --- a/net/sunrpc/svc_xprt.c
  24653. +++ b/net/sunrpc/svc_xprt.c
  24654. @@ -344,7 +344,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
  24655. goto out;
  24656. }
  24657. - cpu = get_cpu();
  24658. + cpu = get_cpu_light();
  24659. pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
  24660. atomic_long_inc(&pool->sp_stats.packets);
  24661. @@ -380,7 +380,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
  24662. atomic_long_inc(&pool->sp_stats.threads_woken);
  24663. wake_up_process(rqstp->rq_task);
  24664. - put_cpu();
  24665. + put_cpu_light();
  24666. goto out;
  24667. }
  24668. rcu_read_unlock();
  24669. @@ -401,7 +401,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
  24670. goto redo_search;
  24671. }
  24672. rqstp = NULL;
  24673. - put_cpu();
  24674. + put_cpu_light();
  24675. out:
  24676. trace_svc_xprt_do_enqueue(xprt, rqstp);
  24677. }
  24678. diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
  24679. index 6fdc97ef6023..523e0420d7f0 100755
  24680. --- a/scripts/mkcompile_h
  24681. +++ b/scripts/mkcompile_h
  24682. @@ -4,7 +4,8 @@ TARGET=$1
  24683. ARCH=$2
  24684. SMP=$3
  24685. PREEMPT=$4
  24686. -CC=$5
  24687. +RT=$5
  24688. +CC=$6
  24689. vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
  24690. @@ -57,6 +58,7 @@ UTS_VERSION="#$VERSION"
  24691. CONFIG_FLAGS=""
  24692. if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
  24693. if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
  24694. +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi
  24695. UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
  24696. # Truncate to maximum length
  24697. diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
  24698. index 9106d8e2300e..6c55707fdc30 100644
  24699. --- a/sound/core/pcm_native.c
  24700. +++ b/sound/core/pcm_native.c
  24701. @@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock);
  24702. void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream)
  24703. {
  24704. if (!substream->pcm->nonatomic)
  24705. - local_irq_disable();
  24706. + local_irq_disable_nort();
  24707. snd_pcm_stream_lock(substream);
  24708. }
  24709. EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq);
  24710. @@ -150,7 +150,7 @@ void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream)
  24711. {
  24712. snd_pcm_stream_unlock(substream);
  24713. if (!substream->pcm->nonatomic)
  24714. - local_irq_enable();
  24715. + local_irq_enable_nort();
  24716. }
  24717. EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq);
  24718. @@ -158,7 +158,7 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream)
  24719. {
  24720. unsigned long flags = 0;
  24721. if (!substream->pcm->nonatomic)
  24722. - local_irq_save(flags);
  24723. + local_irq_save_nort(flags);
  24724. snd_pcm_stream_lock(substream);
  24725. return flags;
  24726. }
  24727. @@ -176,7 +176,7 @@ void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream,
  24728. {
  24729. snd_pcm_stream_unlock(substream);
  24730. if (!substream->pcm->nonatomic)
  24731. - local_irq_restore(flags);
  24732. + local_irq_restore_nort(flags);
  24733. }
  24734. EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore);