patch-realtime 883 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949159501595115952159531595415955159561595715958159591596015961159621596315964159651596615967159681596915970159711597215973159741597515976159771597815979159801598115982159831598415985159861598715988159891599015991159921599315994159951599615997159981599916000160011600216003160041600516006160071600816009160101601116012160131601416015160161601716018160191602016021160221602316024160251602616027160281602916030160311603216033160341603516036160371603816039160401604116042160431604416045160461604716048160491605016051160521605316054160551605616057160581605916060160611606216063160641606516066160671606816069160701607116072160731607416075160761607716078160791608016081160821608316084160851608616087160881608916090160911609216093160941609516096160971609816099161001610116102161031610416105161061610716108161091611016111161121611316114161151611616117161181611916120161211612216123161241612516126161271612816129161301613116132161331613416135161361613716138161391614016141161421614316144161451614616147161481614916150161511615216153161541615516156161571615816159161601616116162161631616416165161661616716168161691617016171161721617316174161751617616177161781617916180161811618216183161841618516186161871618816189161901619116192161931619416195161961619716198161991620016201162021620316204162051620616207162081620916210162111621216213162141621516216162171621816219162201622116222162231622416225162261622716228162291623016231162321623316234162351623616237162381623916240162411624216243162441624516246162471624816249162501625116252162531625416255162561625716258162591626016261162621626316264162651626616267162681626916270162711627216273162741627516276162771627816279162801628116282162831628416285162861628716288162891629016291162921629316294162951629616297162981629916300163011630216303163041630516306163071630816309163101631116312163131631416315163161631716318163191632016321163221632316324163251632616327163281632916330163311633216333163341633516336163371633816339163401634116342163431634416345163461634716348163491635016351163521635316354163551635616357163581635916360163611636216363163641636516366163671636816369163701637116372163731637416375163761637716378163791638016381163821638316384163851638616387163881638916390163911639216393163941639516396163971639816399164001640116402164031640416405164061640716408164091641016411164121641316414164151641616417164181641916420164211642216423164241642516426164271642816429164301643116432164331643416435164361643716438164391644016441164421644316444164451644616447164481644916450164511645216453164541645516456164571645816459164601646116462164631646416465164661646716468164691647016471164721647316474164751647616477164781647916480164811648216483164841648516486164871648816489164901649116492164931649416495164961649716498164991650016501165021650316504165051650616507165081650916510165111651216513165141651516516165171651816519165201652116522165231652416525165261652716528165291653016531165321653316534165351653616537165381653916540165411654216543165441654516546165471654816549165501655116552165531655416555165561655716558165591656016561165621656316564165651656616567165681656916570165711657216573165741657516576165771657816579165801658116582165831658416585165861658716588165891659016591165921659316594165951659616597165981659916600166011660216603166041660516606166071660816609166101661116612166131661416615166161661716618166191662016621166221662316624166251662616627166281662916630166311663216633166341663516636166371663816639166401664116642166431664416645166461664716648166491665016651166521665316654166551665616657166581665916660166611666216663166641666516666166671666816669166701667116672166731667416675166761667716678166791668016681166821668316684166851668616687166881668916690166911669216693166941669516696166971669816699167001670116702167031670416705167061670716708167091671016711167121671316714167151671616717167181671916720167211672216723167241672516726167271672816729167301673116732167331673416735167361673716738167391674016741167421674316744167451674616747167481674916750167511675216753167541675516756167571675816759167601676116762167631676416765167661676716768167691677016771167721677316774167751677616777167781677916780167811678216783167841678516786167871678816789167901679116792167931679416795167961679716798167991680016801168021680316804168051680616807168081680916810168111681216813168141681516816168171681816819168201682116822168231682416825168261682716828168291683016831168321683316834168351683616837168381683916840168411684216843168441684516846168471684816849168501685116852168531685416855168561685716858168591686016861168621686316864168651686616867168681686916870168711687216873168741687516876168771687816879168801688116882168831688416885168861688716888168891689016891168921689316894168951689616897168981689916900169011690216903169041690516906169071690816909169101691116912169131691416915169161691716918169191692016921169221692316924169251692616927169281692916930169311693216933169341693516936169371693816939169401694116942169431694416945169461694716948169491695016951169521695316954169551695616957169581695916960169611696216963169641696516966169671696816969169701697116972169731697416975169761697716978169791698016981169821698316984169851698616987169881698916990169911699216993169941699516996169971699816999170001700117002170031700417005170061700717008170091701017011170121701317014170151701617017170181701917020170211702217023170241702517026170271702817029170301703117032170331703417035170361703717038170391704017041170421704317044170451704617047170481704917050170511705217053170541705517056170571705817059170601706117062170631706417065170661706717068170691707017071170721707317074170751707617077170781707917080170811708217083170841708517086170871708817089170901709117092170931709417095170961709717098170991710017101171021710317104171051710617107171081710917110171111711217113171141711517116171171711817119171201712117122171231712417125171261712717128171291713017131171321713317134171351713617137171381713917140171411714217143171441714517146171471714817149171501715117152171531715417155171561715717158171591716017161171621716317164171651716617167171681716917170171711717217173171741717517176171771717817179171801718117182171831718417185171861718717188171891719017191171921719317194171951719617197171981719917200172011720217203172041720517206172071720817209172101721117212172131721417215172161721717218172191722017221172221722317224172251722617227172281722917230172311723217233172341723517236172371723817239172401724117242172431724417245172461724717248172491725017251172521725317254172551725617257172581725917260172611726217263172641726517266172671726817269172701727117272172731727417275172761727717278172791728017281172821728317284172851728617287172881728917290172911729217293172941729517296172971729817299173001730117302173031730417305173061730717308173091731017311173121731317314173151731617317173181731917320173211732217323173241732517326173271732817329173301733117332173331733417335173361733717338173391734017341173421734317344173451734617347173481734917350173511735217353173541735517356173571735817359173601736117362173631736417365173661736717368173691737017371173721737317374173751737617377173781737917380173811738217383173841738517386173871738817389173901739117392173931739417395173961739717398173991740017401174021740317404174051740617407174081740917410174111741217413174141741517416174171741817419174201742117422174231742417425174261742717428174291743017431174321743317434174351743617437174381743917440174411744217443174441744517446174471744817449174501745117452174531745417455174561745717458174591746017461174621746317464174651746617467174681746917470174711747217473174741747517476174771747817479174801748117482174831748417485174861748717488174891749017491174921749317494174951749617497174981749917500175011750217503175041750517506175071750817509175101751117512175131751417515175161751717518175191752017521175221752317524175251752617527175281752917530175311753217533175341753517536175371753817539175401754117542175431754417545175461754717548175491755017551175521755317554175551755617557175581755917560175611756217563175641756517566175671756817569175701757117572175731757417575175761757717578175791758017581175821758317584175851758617587175881758917590175911759217593175941759517596175971759817599176001760117602176031760417605176061760717608176091761017611176121761317614176151761617617176181761917620176211762217623176241762517626176271762817629176301763117632176331763417635176361763717638176391764017641176421764317644176451764617647176481764917650176511765217653176541765517656176571765817659176601766117662176631766417665176661766717668176691767017671176721767317674176751767617677176781767917680176811768217683176841768517686176871768817689176901769117692176931769417695176961769717698176991770017701177021770317704177051770617707177081770917710177111771217713177141771517716177171771817719177201772117722177231772417725177261772717728177291773017731177321773317734177351773617737177381773917740177411774217743177441774517746177471774817749177501775117752177531775417755177561775717758177591776017761177621776317764177651776617767177681776917770177711777217773177741777517776177771777817779177801778117782177831778417785177861778717788177891779017791177921779317794177951779617797177981779917800178011780217803178041780517806178071780817809178101781117812178131781417815178161781717818178191782017821178221782317824178251782617827178281782917830178311783217833178341783517836178371783817839178401784117842178431784417845178461784717848178491785017851178521785317854178551785617857178581785917860178611786217863178641786517866178671786817869178701787117872178731787417875178761787717878178791788017881178821788317884178851788617887178881788917890178911789217893178941789517896178971789817899179001790117902179031790417905179061790717908179091791017911179121791317914179151791617917179181791917920179211792217923179241792517926179271792817929179301793117932179331793417935179361793717938179391794017941179421794317944179451794617947179481794917950179511795217953179541795517956179571795817959179601796117962179631796417965179661796717968179691797017971179721797317974179751797617977179781797917980179811798217983179841798517986179871798817989179901799117992179931799417995179961799717998179991800018001180021800318004180051800618007180081800918010180111801218013180141801518016180171801818019180201802118022180231802418025180261802718028180291803018031180321803318034180351803618037180381803918040180411804218043180441804518046180471804818049180501805118052180531805418055180561805718058180591806018061180621806318064180651806618067180681806918070180711807218073180741807518076180771807818079180801808118082180831808418085180861808718088180891809018091180921809318094180951809618097180981809918100181011810218103181041810518106181071810818109181101811118112181131811418115181161811718118181191812018121181221812318124181251812618127181281812918130181311813218133181341813518136181371813818139181401814118142181431814418145181461814718148181491815018151181521815318154181551815618157181581815918160181611816218163181641816518166181671816818169181701817118172181731817418175181761817718178181791818018181181821818318184181851818618187181881818918190181911819218193181941819518196181971819818199182001820118202182031820418205182061820718208182091821018211182121821318214182151821618217182181821918220182211822218223182241822518226182271822818229182301823118232182331823418235182361823718238182391824018241182421824318244182451824618247182481824918250182511825218253182541825518256182571825818259182601826118262182631826418265182661826718268182691827018271182721827318274182751827618277182781827918280182811828218283182841828518286182871828818289182901829118292182931829418295182961829718298182991830018301183021830318304183051830618307183081830918310183111831218313183141831518316183171831818319183201832118322183231832418325183261832718328183291833018331183321833318334183351833618337183381833918340183411834218343183441834518346183471834818349183501835118352183531835418355183561835718358183591836018361183621836318364183651836618367183681836918370183711837218373183741837518376183771837818379183801838118382183831838418385183861838718388183891839018391183921839318394183951839618397183981839918400184011840218403184041840518406184071840818409184101841118412184131841418415184161841718418184191842018421184221842318424184251842618427184281842918430184311843218433184341843518436184371843818439184401844118442184431844418445184461844718448184491845018451184521845318454184551845618457184581845918460184611846218463184641846518466184671846818469184701847118472184731847418475184761847718478184791848018481184821848318484184851848618487184881848918490184911849218493184941849518496184971849818499185001850118502185031850418505185061850718508185091851018511185121851318514185151851618517185181851918520185211852218523185241852518526185271852818529185301853118532185331853418535185361853718538185391854018541185421854318544185451854618547185481854918550185511855218553185541855518556185571855818559185601856118562185631856418565185661856718568185691857018571185721857318574185751857618577185781857918580185811858218583185841858518586185871858818589185901859118592185931859418595185961859718598185991860018601186021860318604186051860618607186081860918610186111861218613186141861518616186171861818619186201862118622186231862418625186261862718628186291863018631186321863318634186351863618637186381863918640186411864218643186441864518646186471864818649186501865118652186531865418655186561865718658186591866018661186621866318664186651866618667186681866918670186711867218673186741867518676186771867818679186801868118682186831868418685186861868718688186891869018691186921869318694186951869618697186981869918700187011870218703187041870518706187071870818709187101871118712187131871418715187161871718718187191872018721187221872318724187251872618727187281872918730187311873218733187341873518736187371873818739187401874118742187431874418745187461874718748187491875018751187521875318754187551875618757187581875918760187611876218763187641876518766187671876818769187701877118772187731877418775187761877718778187791878018781187821878318784187851878618787187881878918790187911879218793187941879518796187971879818799188001880118802188031880418805188061880718808188091881018811188121881318814188151881618817188181881918820188211882218823188241882518826188271882818829188301883118832188331883418835188361883718838188391884018841188421884318844188451884618847188481884918850188511885218853188541885518856188571885818859188601886118862188631886418865188661886718868188691887018871188721887318874188751887618877188781887918880188811888218883188841888518886188871888818889188901889118892188931889418895188961889718898188991890018901189021890318904189051890618907189081890918910189111891218913189141891518916189171891818919189201892118922189231892418925189261892718928189291893018931189321893318934189351893618937189381893918940189411894218943189441894518946189471894818949189501895118952189531895418955189561895718958189591896018961189621896318964189651896618967189681896918970189711897218973189741897518976189771897818979189801898118982189831898418985189861898718988189891899018991189921899318994189951899618997189981899919000190011900219003190041900519006190071900819009190101901119012190131901419015190161901719018190191902019021190221902319024190251902619027190281902919030190311903219033190341903519036190371903819039190401904119042190431904419045190461904719048190491905019051190521905319054190551905619057190581905919060190611906219063190641906519066190671906819069190701907119072190731907419075190761907719078190791908019081190821908319084190851908619087190881908919090190911909219093190941909519096190971909819099191001910119102191031910419105191061910719108191091911019111191121911319114191151911619117191181911919120191211912219123191241912519126191271912819129191301913119132191331913419135191361913719138191391914019141191421914319144191451914619147191481914919150191511915219153191541915519156191571915819159191601916119162191631916419165191661916719168191691917019171191721917319174191751917619177191781917919180191811918219183191841918519186191871918819189191901919119192191931919419195191961919719198191991920019201192021920319204192051920619207192081920919210192111921219213192141921519216192171921819219192201922119222192231922419225192261922719228192291923019231192321923319234192351923619237192381923919240192411924219243192441924519246192471924819249192501925119252192531925419255192561925719258192591926019261192621926319264192651926619267192681926919270192711927219273192741927519276192771927819279192801928119282192831928419285192861928719288192891929019291192921929319294192951929619297192981929919300193011930219303193041930519306193071930819309193101931119312193131931419315193161931719318193191932019321193221932319324193251932619327193281932919330193311933219333193341933519336193371933819339193401934119342193431934419345193461934719348193491935019351193521935319354193551935619357193581935919360193611936219363193641936519366193671936819369193701937119372193731937419375193761937719378193791938019381193821938319384193851938619387193881938919390193911939219393193941939519396193971939819399194001940119402194031940419405194061940719408194091941019411194121941319414194151941619417194181941919420194211942219423194241942519426194271942819429194301943119432194331943419435194361943719438194391944019441194421944319444194451944619447194481944919450194511945219453194541945519456194571945819459194601946119462194631946419465194661946719468194691947019471194721947319474194751947619477194781947919480194811948219483194841948519486194871948819489194901949119492194931949419495194961949719498194991950019501195021950319504195051950619507195081950919510195111951219513195141951519516195171951819519195201952119522195231952419525195261952719528195291953019531195321953319534195351953619537195381953919540195411954219543195441954519546195471954819549195501955119552195531955419555195561955719558195591956019561195621956319564195651956619567195681956919570195711957219573195741957519576195771957819579195801958119582195831958419585195861958719588195891959019591195921959319594195951959619597195981959919600196011960219603196041960519606196071960819609196101961119612196131961419615196161961719618196191962019621196221962319624196251962619627196281962919630196311963219633196341963519636196371963819639196401964119642196431964419645196461964719648196491965019651196521965319654196551965619657196581965919660196611966219663196641966519666196671966819669196701967119672196731967419675196761967719678196791968019681196821968319684196851968619687196881968919690196911969219693196941969519696196971969819699197001970119702197031970419705197061970719708197091971019711197121971319714197151971619717197181971919720197211972219723197241972519726197271972819729197301973119732197331973419735197361973719738197391974019741197421974319744197451974619747197481974919750197511975219753197541975519756197571975819759197601976119762197631976419765197661976719768197691977019771197721977319774197751977619777197781977919780197811978219783197841978519786197871978819789197901979119792197931979419795197961979719798197991980019801198021980319804198051980619807198081980919810198111981219813198141981519816198171981819819198201982119822198231982419825198261982719828198291983019831198321983319834198351983619837198381983919840198411984219843198441984519846198471984819849198501985119852198531985419855198561985719858198591986019861198621986319864198651986619867198681986919870198711987219873198741987519876198771987819879198801988119882198831988419885198861988719888198891989019891198921989319894198951989619897198981989919900199011990219903199041990519906199071990819909199101991119912199131991419915199161991719918199191992019921199221992319924199251992619927199281992919930199311993219933199341993519936199371993819939199401994119942199431994419945199461994719948199491995019951199521995319954199551995619957199581995919960199611996219963199641996519966199671996819969199701997119972199731997419975199761997719978199791998019981199821998319984199851998619987199881998919990199911999219993199941999519996199971999819999200002000120002200032000420005200062000720008200092001020011200122001320014200152001620017200182001920020200212002220023200242002520026200272002820029200302003120032200332003420035200362003720038200392004020041200422004320044200452004620047200482004920050200512005220053200542005520056200572005820059200602006120062200632006420065200662006720068200692007020071200722007320074200752007620077200782007920080200812008220083200842008520086200872008820089200902009120092200932009420095200962009720098200992010020101201022010320104201052010620107201082010920110201112011220113201142011520116201172011820119201202012120122201232012420125201262012720128201292013020131201322013320134201352013620137201382013920140201412014220143201442014520146201472014820149201502015120152201532015420155201562015720158201592016020161201622016320164201652016620167201682016920170201712017220173201742017520176201772017820179201802018120182201832018420185201862018720188201892019020191201922019320194201952019620197201982019920200202012020220203202042020520206202072020820209202102021120212202132021420215202162021720218202192022020221202222022320224202252022620227202282022920230202312023220233202342023520236202372023820239202402024120242202432024420245202462024720248202492025020251202522025320254202552025620257202582025920260202612026220263202642026520266202672026820269202702027120272202732027420275202762027720278202792028020281202822028320284202852028620287202882028920290202912029220293202942029520296202972029820299203002030120302203032030420305203062030720308203092031020311203122031320314203152031620317203182031920320203212032220323203242032520326203272032820329203302033120332203332033420335203362033720338203392034020341203422034320344203452034620347203482034920350203512035220353203542035520356203572035820359203602036120362203632036420365203662036720368203692037020371203722037320374203752037620377203782037920380203812038220383203842038520386203872038820389203902039120392203932039420395203962039720398203992040020401204022040320404204052040620407204082040920410204112041220413204142041520416204172041820419204202042120422204232042420425204262042720428204292043020431204322043320434204352043620437204382043920440204412044220443204442044520446204472044820449204502045120452204532045420455204562045720458204592046020461204622046320464204652046620467204682046920470204712047220473204742047520476204772047820479204802048120482204832048420485204862048720488204892049020491204922049320494204952049620497204982049920500205012050220503205042050520506205072050820509205102051120512205132051420515205162051720518205192052020521205222052320524205252052620527205282052920530205312053220533205342053520536205372053820539205402054120542205432054420545205462054720548205492055020551205522055320554205552055620557205582055920560205612056220563205642056520566205672056820569205702057120572205732057420575205762057720578205792058020581205822058320584205852058620587205882058920590205912059220593205942059520596205972059820599206002060120602206032060420605206062060720608206092061020611206122061320614206152061620617206182061920620206212062220623206242062520626206272062820629206302063120632206332063420635206362063720638206392064020641206422064320644206452064620647206482064920650206512065220653206542065520656206572065820659206602066120662206632066420665206662066720668206692067020671206722067320674206752067620677206782067920680206812068220683206842068520686206872068820689206902069120692206932069420695206962069720698206992070020701207022070320704207052070620707207082070920710207112071220713207142071520716207172071820719207202072120722207232072420725207262072720728207292073020731207322073320734207352073620737207382073920740207412074220743207442074520746207472074820749207502075120752207532075420755207562075720758207592076020761207622076320764207652076620767207682076920770207712077220773207742077520776207772077820779207802078120782207832078420785207862078720788207892079020791207922079320794207952079620797207982079920800208012080220803208042080520806208072080820809208102081120812208132081420815208162081720818208192082020821208222082320824208252082620827208282082920830208312083220833208342083520836208372083820839208402084120842208432084420845208462084720848208492085020851208522085320854208552085620857208582085920860208612086220863208642086520866208672086820869208702087120872208732087420875208762087720878208792088020881208822088320884208852088620887208882088920890208912089220893208942089520896208972089820899209002090120902209032090420905209062090720908209092091020911209122091320914209152091620917209182091920920209212092220923209242092520926209272092820929209302093120932209332093420935209362093720938209392094020941209422094320944209452094620947209482094920950209512095220953209542095520956209572095820959209602096120962209632096420965209662096720968209692097020971209722097320974209752097620977209782097920980209812098220983209842098520986209872098820989209902099120992209932099420995209962099720998209992100021001210022100321004210052100621007210082100921010210112101221013210142101521016210172101821019210202102121022210232102421025210262102721028210292103021031210322103321034210352103621037210382103921040210412104221043210442104521046210472104821049210502105121052210532105421055210562105721058210592106021061210622106321064210652106621067210682106921070210712107221073210742107521076210772107821079210802108121082210832108421085210862108721088210892109021091210922109321094210952109621097210982109921100211012110221103211042110521106211072110821109211102111121112211132111421115211162111721118211192112021121211222112321124211252112621127211282112921130211312113221133211342113521136211372113821139211402114121142211432114421145211462114721148211492115021151211522115321154211552115621157211582115921160211612116221163211642116521166211672116821169211702117121172211732117421175211762117721178211792118021181211822118321184211852118621187211882118921190211912119221193211942119521196211972119821199212002120121202212032120421205212062120721208212092121021211212122121321214212152121621217212182121921220212212122221223212242122521226212272122821229212302123121232212332123421235212362123721238212392124021241212422124321244212452124621247212482124921250212512125221253212542125521256212572125821259212602126121262212632126421265212662126721268212692127021271212722127321274212752127621277212782127921280212812128221283212842128521286212872128821289212902129121292212932129421295212962129721298212992130021301213022130321304213052130621307213082130921310213112131221313213142131521316213172131821319213202132121322213232132421325213262132721328213292133021331213322133321334213352133621337213382133921340213412134221343213442134521346213472134821349213502135121352213532135421355213562135721358213592136021361213622136321364213652136621367213682136921370213712137221373213742137521376213772137821379213802138121382213832138421385213862138721388213892139021391213922139321394213952139621397213982139921400214012140221403214042140521406214072140821409214102141121412214132141421415214162141721418214192142021421214222142321424214252142621427214282142921430214312143221433214342143521436214372143821439214402144121442214432144421445214462144721448214492145021451214522145321454214552145621457214582145921460214612146221463214642146521466214672146821469214702147121472214732147421475214762147721478214792148021481214822148321484214852148621487214882148921490214912149221493214942149521496214972149821499215002150121502215032150421505215062150721508215092151021511215122151321514215152151621517215182151921520215212152221523215242152521526215272152821529215302153121532215332153421535215362153721538215392154021541215422154321544215452154621547215482154921550215512155221553215542155521556215572155821559215602156121562215632156421565215662156721568215692157021571215722157321574215752157621577215782157921580215812158221583215842158521586215872158821589215902159121592215932159421595215962159721598215992160021601216022160321604216052160621607216082160921610216112161221613216142161521616216172161821619216202162121622216232162421625216262162721628216292163021631216322163321634216352163621637216382163921640216412164221643216442164521646216472164821649216502165121652216532165421655216562165721658216592166021661216622166321664216652166621667216682166921670216712167221673216742167521676216772167821679216802168121682216832168421685216862168721688216892169021691216922169321694216952169621697216982169921700217012170221703217042170521706217072170821709217102171121712217132171421715217162171721718217192172021721217222172321724217252172621727217282172921730217312173221733217342173521736217372173821739217402174121742217432174421745217462174721748217492175021751217522175321754217552175621757217582175921760217612176221763217642176521766217672176821769217702177121772217732177421775217762177721778217792178021781217822178321784217852178621787217882178921790217912179221793217942179521796217972179821799218002180121802218032180421805218062180721808218092181021811218122181321814218152181621817218182181921820218212182221823218242182521826218272182821829218302183121832218332183421835218362183721838218392184021841218422184321844218452184621847218482184921850218512185221853218542185521856218572185821859218602186121862218632186421865218662186721868218692187021871218722187321874218752187621877218782187921880218812188221883218842188521886218872188821889218902189121892218932189421895218962189721898218992190021901219022190321904219052190621907219082190921910219112191221913219142191521916219172191821919219202192121922219232192421925219262192721928219292193021931219322193321934219352193621937219382193921940219412194221943219442194521946219472194821949219502195121952219532195421955219562195721958219592196021961219622196321964219652196621967219682196921970219712197221973219742197521976219772197821979219802198121982219832198421985219862198721988219892199021991219922199321994219952199621997219982199922000220012200222003220042200522006220072200822009220102201122012220132201422015220162201722018220192202022021220222202322024220252202622027220282202922030220312203222033220342203522036220372203822039220402204122042220432204422045220462204722048220492205022051220522205322054220552205622057220582205922060220612206222063220642206522066220672206822069220702207122072220732207422075220762207722078220792208022081220822208322084220852208622087220882208922090220912209222093220942209522096220972209822099221002210122102221032210422105221062210722108221092211022111221122211322114221152211622117221182211922120221212212222123221242212522126221272212822129221302213122132221332213422135221362213722138221392214022141221422214322144221452214622147221482214922150221512215222153221542215522156221572215822159221602216122162221632216422165221662216722168221692217022171221722217322174221752217622177221782217922180221812218222183221842218522186221872218822189221902219122192221932219422195221962219722198221992220022201222022220322204222052220622207222082220922210222112221222213222142221522216222172221822219222202222122222222232222422225222262222722228222292223022231222322223322234222352223622237222382223922240222412224222243222442224522246222472224822249222502225122252222532225422255222562225722258222592226022261222622226322264222652226622267222682226922270222712227222273222742227522276222772227822279222802228122282222832228422285222862228722288222892229022291222922229322294222952229622297222982229922300223012230222303223042230522306223072230822309223102231122312223132231422315223162231722318223192232022321223222232322324223252232622327223282232922330223312233222333223342233522336223372233822339223402234122342223432234422345223462234722348223492235022351223522235322354223552235622357223582235922360223612236222363223642236522366223672236822369223702237122372223732237422375223762237722378223792238022381223822238322384223852238622387223882238922390223912239222393223942239522396223972239822399224002240122402224032240422405224062240722408224092241022411224122241322414224152241622417224182241922420224212242222423224242242522426224272242822429224302243122432224332243422435224362243722438224392244022441224422244322444224452244622447224482244922450224512245222453224542245522456224572245822459224602246122462224632246422465224662246722468224692247022471224722247322474224752247622477224782247922480224812248222483224842248522486224872248822489224902249122492224932249422495224962249722498224992250022501225022250322504225052250622507225082250922510225112251222513225142251522516225172251822519225202252122522225232252422525225262252722528225292253022531225322253322534225352253622537225382253922540225412254222543225442254522546225472254822549225502255122552225532255422555225562255722558225592256022561225622256322564225652256622567225682256922570225712257222573225742257522576225772257822579225802258122582225832258422585225862258722588225892259022591225922259322594225952259622597225982259922600226012260222603226042260522606226072260822609226102261122612226132261422615226162261722618226192262022621226222262322624226252262622627226282262922630226312263222633226342263522636226372263822639226402264122642226432264422645226462264722648226492265022651226522265322654226552265622657226582265922660226612266222663226642266522666226672266822669226702267122672226732267422675226762267722678226792268022681226822268322684226852268622687226882268922690226912269222693226942269522696226972269822699227002270122702227032270422705227062270722708227092271022711227122271322714227152271622717227182271922720227212272222723227242272522726227272272822729227302273122732227332273422735227362273722738227392274022741227422274322744227452274622747227482274922750227512275222753227542275522756227572275822759227602276122762227632276422765227662276722768227692277022771227722277322774227752277622777227782277922780227812278222783227842278522786227872278822789227902279122792227932279422795227962279722798227992280022801228022280322804228052280622807228082280922810228112281222813228142281522816228172281822819228202282122822228232282422825228262282722828228292283022831228322283322834228352283622837228382283922840228412284222843228442284522846228472284822849228502285122852228532285422855228562285722858228592286022861228622286322864228652286622867228682286922870228712287222873228742287522876228772287822879228802288122882228832288422885228862288722888228892289022891228922289322894228952289622897228982289922900229012290222903229042290522906229072290822909229102291122912229132291422915229162291722918229192292022921229222292322924229252292622927229282292922930229312293222933229342293522936229372293822939229402294122942229432294422945229462294722948229492295022951229522295322954229552295622957229582295922960229612296222963229642296522966229672296822969229702297122972229732297422975229762297722978229792298022981229822298322984229852298622987229882298922990229912299222993229942299522996229972299822999230002300123002230032300423005230062300723008230092301023011230122301323014230152301623017230182301923020230212302223023230242302523026230272302823029230302303123032230332303423035230362303723038230392304023041230422304323044230452304623047230482304923050230512305223053230542305523056230572305823059230602306123062230632306423065230662306723068230692307023071230722307323074230752307623077230782307923080230812308223083230842308523086230872308823089230902309123092230932309423095230962309723098230992310023101231022310323104231052310623107231082310923110231112311223113231142311523116231172311823119231202312123122231232312423125231262312723128231292313023131231322313323134231352313623137231382313923140231412314223143231442314523146231472314823149231502315123152231532315423155231562315723158231592316023161231622316323164231652316623167231682316923170231712317223173231742317523176231772317823179231802318123182231832318423185231862318723188231892319023191231922319323194231952319623197231982319923200232012320223203232042320523206232072320823209232102321123212232132321423215232162321723218232192322023221232222322323224232252322623227232282322923230232312323223233232342323523236232372323823239232402324123242232432324423245232462324723248232492325023251232522325323254232552325623257232582325923260232612326223263232642326523266232672326823269232702327123272232732327423275232762327723278232792328023281232822328323284232852328623287232882328923290232912329223293232942329523296232972329823299233002330123302233032330423305233062330723308233092331023311233122331323314233152331623317233182331923320233212332223323233242332523326233272332823329233302333123332233332333423335233362333723338233392334023341233422334323344233452334623347233482334923350233512335223353233542335523356233572335823359233602336123362233632336423365233662336723368233692337023371233722337323374233752337623377233782337923380233812338223383233842338523386233872338823389233902339123392233932339423395233962339723398233992340023401234022340323404234052340623407234082340923410234112341223413234142341523416234172341823419234202342123422234232342423425234262342723428234292343023431234322343323434234352343623437234382343923440234412344223443234442344523446234472344823449234502345123452234532345423455234562345723458234592346023461234622346323464234652346623467234682346923470234712347223473234742347523476234772347823479234802348123482234832348423485234862348723488234892349023491234922349323494234952349623497234982349923500235012350223503235042350523506235072350823509235102351123512235132351423515235162351723518235192352023521235222352323524235252352623527235282352923530235312353223533235342353523536235372353823539235402354123542235432354423545235462354723548235492355023551235522355323554235552355623557235582355923560235612356223563235642356523566235672356823569235702357123572235732357423575235762357723578235792358023581235822358323584235852358623587235882358923590235912359223593235942359523596235972359823599236002360123602236032360423605236062360723608236092361023611236122361323614236152361623617236182361923620236212362223623236242362523626236272362823629236302363123632236332363423635236362363723638236392364023641236422364323644236452364623647236482364923650236512365223653236542365523656236572365823659236602366123662236632366423665236662366723668236692367023671236722367323674236752367623677236782367923680236812368223683236842368523686236872368823689236902369123692236932369423695236962369723698236992370023701237022370323704237052370623707237082370923710237112371223713237142371523716237172371823719237202372123722237232372423725237262372723728237292373023731237322373323734237352373623737237382373923740237412374223743237442374523746237472374823749237502375123752237532375423755237562375723758237592376023761237622376323764237652376623767237682376923770237712377223773237742377523776237772377823779237802378123782237832378423785237862378723788237892379023791237922379323794237952379623797237982379923800238012380223803238042380523806238072380823809238102381123812238132381423815238162381723818238192382023821238222382323824238252382623827238282382923830238312383223833238342383523836238372383823839238402384123842238432384423845238462384723848238492385023851238522385323854238552385623857238582385923860238612386223863238642386523866238672386823869238702387123872238732387423875238762387723878238792388023881238822388323884238852388623887238882388923890238912389223893238942389523896238972389823899239002390123902239032390423905239062390723908239092391023911239122391323914239152391623917239182391923920239212392223923239242392523926239272392823929239302393123932239332393423935239362393723938239392394023941239422394323944239452394623947239482394923950239512395223953239542395523956239572395823959239602396123962239632396423965239662396723968239692397023971239722397323974239752397623977239782397923980239812398223983239842398523986239872398823989239902399123992239932399423995239962399723998239992400024001240022400324004240052400624007240082400924010240112401224013240142401524016240172401824019240202402124022240232402424025240262402724028240292403024031240322403324034240352403624037240382403924040240412404224043240442404524046240472404824049240502405124052240532405424055240562405724058240592406024061240622406324064240652406624067240682406924070240712407224073240742407524076240772407824079240802408124082240832408424085240862408724088240892409024091240922409324094240952409624097240982409924100241012410224103241042410524106241072410824109241102411124112241132411424115241162411724118241192412024121241222412324124241252412624127241282412924130241312413224133241342413524136241372413824139241402414124142241432414424145241462414724148241492415024151241522415324154241552415624157241582415924160241612416224163241642416524166241672416824169241702417124172241732417424175241762417724178241792418024181241822418324184241852418624187241882418924190241912419224193241942419524196241972419824199242002420124202242032420424205242062420724208242092421024211242122421324214242152421624217242182421924220242212422224223242242422524226242272422824229242302423124232242332423424235242362423724238242392424024241242422424324244242452424624247242482424924250242512425224253242542425524256242572425824259242602426124262242632426424265242662426724268242692427024271242722427324274242752427624277242782427924280242812428224283242842428524286242872428824289242902429124292242932429424295242962429724298242992430024301243022430324304243052430624307243082430924310243112431224313243142431524316243172431824319243202432124322243232432424325243262432724328243292433024331243322433324334243352433624337243382433924340243412434224343243442434524346243472434824349243502435124352243532435424355243562435724358243592436024361243622436324364243652436624367243682436924370243712437224373243742437524376243772437824379243802438124382243832438424385243862438724388243892439024391243922439324394243952439624397243982439924400244012440224403244042440524406244072440824409244102441124412244132441424415244162441724418244192442024421244222442324424244252442624427244282442924430244312443224433244342443524436244372443824439244402444124442244432444424445244462444724448244492445024451244522445324454244552445624457244582445924460244612446224463244642446524466244672446824469244702447124472244732447424475244762447724478244792448024481244822448324484244852448624487244882448924490244912449224493244942449524496244972449824499245002450124502245032450424505245062450724508245092451024511245122451324514245152451624517245182451924520245212452224523245242452524526245272452824529245302453124532245332453424535245362453724538245392454024541245422454324544245452454624547245482454924550245512455224553245542455524556245572455824559245602456124562245632456424565245662456724568245692457024571245722457324574245752457624577245782457924580245812458224583245842458524586245872458824589245902459124592245932459424595245962459724598245992460024601246022460324604246052460624607246082460924610246112461224613246142461524616246172461824619246202462124622246232462424625246262462724628246292463024631246322463324634246352463624637246382463924640246412464224643246442464524646246472464824649246502465124652246532465424655246562465724658246592466024661246622466324664246652466624667246682466924670246712467224673246742467524676246772467824679246802468124682246832468424685246862468724688246892469024691246922469324694246952469624697246982469924700247012470224703247042470524706247072470824709247102471124712247132471424715247162471724718247192472024721247222472324724247252472624727247282472924730247312473224733247342473524736247372473824739247402474124742247432474424745247462474724748247492475024751247522475324754247552475624757247582475924760247612476224763247642476524766247672476824769247702477124772247732477424775247762477724778247792478024781247822478324784247852478624787247882478924790247912479224793247942479524796247972479824799248002480124802248032480424805248062480724808248092481024811248122481324814248152481624817248182481924820248212482224823248242482524826248272482824829248302483124832248332483424835248362483724838248392484024841248422484324844248452484624847248482484924850248512485224853248542485524856248572485824859248602486124862248632486424865248662486724868248692487024871248722487324874248752487624877248782487924880248812488224883248842488524886248872488824889248902489124892248932489424895248962489724898248992490024901249022490324904249052490624907249082490924910249112491224913249142491524916249172491824919249202492124922249232492424925249262492724928249292493024931249322493324934249352493624937249382493924940249412494224943249442494524946249472494824949249502495124952249532495424955249562495724958249592496024961249622496324964249652496624967249682496924970249712497224973249742497524976249772497824979249802498124982249832498424985249862498724988249892499024991249922499324994249952499624997249982499925000250012500225003250042500525006250072500825009250102501125012250132501425015250162501725018250192502025021250222502325024250252502625027250282502925030250312503225033250342503525036250372503825039250402504125042250432504425045250462504725048250492505025051250522505325054250552505625057250582505925060250612506225063250642506525066250672506825069250702507125072250732507425075250762507725078250792508025081250822508325084250852508625087250882508925090250912509225093250942509525096250972509825099251002510125102251032510425105251062510725108251092511025111251122511325114251152511625117251182511925120251212512225123251242512525126251272512825129251302513125132251332513425135251362513725138251392514025141251422514325144251452514625147251482514925150251512515225153251542515525156251572515825159251602516125162251632516425165251662516725168251692517025171251722517325174251752517625177251782517925180251812518225183251842518525186251872518825189251902519125192251932519425195251962519725198251992520025201252022520325204252052520625207252082520925210252112521225213252142521525216252172521825219252202522125222252232522425225252262522725228252292523025231252322523325234252352523625237252382523925240252412524225243252442524525246252472524825249252502525125252252532525425255252562525725258252592526025261252622526325264252652526625267252682526925270252712527225273252742527525276252772527825279252802528125282252832528425285252862528725288252892529025291252922529325294252952529625297252982529925300253012530225303253042530525306253072530825309253102531125312253132531425315253162531725318253192532025321253222532325324253252532625327253282532925330253312533225333253342533525336253372533825339253402534125342253432534425345253462534725348253492535025351253522535325354253552535625357253582535925360253612536225363253642536525366253672536825369253702537125372253732537425375253762537725378253792538025381253822538325384253852538625387253882538925390253912539225393253942539525396253972539825399254002540125402254032540425405254062540725408254092541025411254122541325414254152541625417254182541925420254212542225423254242542525426254272542825429254302543125432254332543425435254362543725438254392544025441254422544325444254452544625447254482544925450254512545225453254542545525456254572545825459254602546125462254632546425465254662546725468254692547025471254722547325474254752547625477254782547925480254812548225483254842548525486254872548825489254902549125492254932549425495254962549725498254992550025501255022550325504255052550625507255082550925510255112551225513255142551525516255172551825519255202552125522255232552425525255262552725528255292553025531255322553325534255352553625537255382553925540255412554225543255442554525546255472554825549255502555125552255532555425555255562555725558255592556025561255622556325564255652556625567255682556925570255712557225573255742557525576255772557825579255802558125582255832558425585255862558725588255892559025591255922559325594255952559625597255982559925600256012560225603256042560525606256072560825609256102561125612256132561425615256162561725618256192562025621256222562325624256252562625627256282562925630256312563225633256342563525636256372563825639256402564125642256432564425645256462564725648256492565025651256522565325654256552565625657256582565925660256612566225663256642566525666256672566825669256702567125672256732567425675256762567725678256792568025681256822568325684256852568625687256882568925690256912569225693256942569525696256972569825699257002570125702257032570425705257062570725708257092571025711257122571325714257152571625717257182571925720257212572225723257242572525726257272572825729257302573125732257332573425735257362573725738257392574025741257422574325744257452574625747257482574925750257512575225753257542575525756257572575825759257602576125762257632576425765257662576725768257692577025771257722577325774257752577625777257782577925780257812578225783257842578525786257872578825789257902579125792257932579425795257962579725798257992580025801258022580325804258052580625807258082580925810258112581225813258142581525816258172581825819258202582125822258232582425825258262582725828258292583025831258322583325834258352583625837258382583925840258412584225843258442584525846258472584825849258502585125852258532585425855258562585725858258592586025861258622586325864258652586625867258682586925870258712587225873258742587525876258772587825879258802588125882258832588425885258862588725888258892589025891258922589325894258952589625897258982589925900259012590225903259042590525906259072590825909259102591125912259132591425915259162591725918259192592025921259222592325924259252592625927259282592925930259312593225933259342593525936259372593825939259402594125942259432594425945259462594725948259492595025951259522595325954259552595625957259582595925960259612596225963259642596525966259672596825969259702597125972259732597425975259762597725978259792598025981259822598325984259852598625987259882598925990259912599225993259942599525996259972599825999260002600126002260032600426005260062600726008260092601026011260122601326014260152601626017260182601926020260212602226023260242602526026260272602826029260302603126032260332603426035260362603726038260392604026041260422604326044260452604626047260482604926050260512605226053260542605526056260572605826059260602606126062260632606426065260662606726068260692607026071260722607326074260752607626077260782607926080260812608226083260842608526086260872608826089260902609126092260932609426095260962609726098260992610026101261022610326104261052610626107261082610926110261112611226113261142611526116261172611826119261202612126122261232612426125261262612726128261292613026131261322613326134261352613626137261382613926140261412614226143261442614526146261472614826149261502615126152261532615426155261562615726158261592616026161261622616326164261652616626167261682616926170261712617226173261742617526176261772617826179261802618126182261832618426185261862618726188261892619026191261922619326194261952619626197261982619926200262012620226203262042620526206262072620826209262102621126212262132621426215262162621726218262192622026221262222622326224262252622626227262282622926230262312623226233262342623526236262372623826239262402624126242262432624426245262462624726248262492625026251262522625326254262552625626257262582625926260262612626226263262642626526266262672626826269262702627126272262732627426275262762627726278262792628026281262822628326284262852628626287262882628926290262912629226293262942629526296262972629826299263002630126302263032630426305263062630726308263092631026311263122631326314263152631626317263182631926320263212632226323263242632526326263272632826329263302633126332263332633426335263362633726338263392634026341263422634326344263452634626347263482634926350263512635226353263542635526356263572635826359263602636126362263632636426365263662636726368263692637026371263722637326374263752637626377263782637926380263812638226383263842638526386263872638826389263902639126392263932639426395263962639726398263992640026401264022640326404264052640626407264082640926410264112641226413264142641526416264172641826419264202642126422264232642426425264262642726428264292643026431264322643326434264352643626437264382643926440264412644226443264442644526446264472644826449264502645126452264532645426455264562645726458264592646026461264622646326464264652646626467264682646926470264712647226473264742647526476264772647826479264802648126482264832648426485264862648726488264892649026491264922649326494264952649626497264982649926500265012650226503265042650526506265072650826509265102651126512265132651426515265162651726518265192652026521265222652326524265252652626527265282652926530265312653226533265342653526536265372653826539265402654126542265432654426545265462654726548265492655026551265522655326554265552655626557265582655926560265612656226563265642656526566265672656826569265702657126572265732657426575265762657726578265792658026581265822658326584265852658626587265882658926590265912659226593265942659526596265972659826599266002660126602266032660426605266062660726608266092661026611266122661326614266152661626617266182661926620266212662226623266242662526626266272662826629266302663126632266332663426635266362663726638266392664026641266422664326644266452664626647266482664926650266512665226653266542665526656266572665826659266602666126662266632666426665266662666726668266692667026671266722667326674266752667626677266782667926680266812668226683266842668526686266872668826689266902669126692266932669426695266962669726698266992670026701267022670326704267052670626707267082670926710267112671226713267142671526716267172671826719267202672126722267232672426725267262672726728267292673026731267322673326734267352673626737267382673926740267412674226743267442674526746267472674826749267502675126752267532675426755267562675726758267592676026761267622676326764267652676626767267682676926770267712677226773267742677526776267772677826779267802678126782267832678426785267862678726788267892679026791267922679326794267952679626797267982679926800268012680226803268042680526806268072680826809268102681126812268132681426815268162681726818268192682026821268222682326824268252682626827268282682926830268312683226833268342683526836268372683826839268402684126842268432684426845268462684726848268492685026851268522685326854268552685626857268582685926860268612686226863268642686526866268672686826869268702687126872268732687426875268762687726878268792688026881268822688326884268852688626887268882688926890268912689226893268942689526896268972689826899269002690126902269032690426905269062690726908269092691026911269122691326914269152691626917269182691926920269212692226923269242692526926269272692826929269302693126932269332693426935269362693726938269392694026941269422694326944269452694626947269482694926950269512695226953269542695526956269572695826959269602696126962269632696426965269662696726968269692697026971269722697326974269752697626977269782697926980269812698226983269842698526986269872698826989269902699126992269932699426995269962699726998269992700027001270022700327004270052700627007270082700927010270112701227013270142701527016270172701827019270202702127022270232702427025270262702727028270292703027031270322703327034270352703627037270382703927040270412704227043270442704527046270472704827049270502705127052270532705427055270562705727058270592706027061270622706327064270652706627067270682706927070270712707227073270742707527076270772707827079270802708127082270832708427085270862708727088270892709027091270922709327094270952709627097270982709927100271012710227103271042710527106271072710827109271102711127112271132711427115271162711727118271192712027121271222712327124271252712627127271282712927130271312713227133271342713527136271372713827139271402714127142271432714427145271462714727148271492715027151271522715327154271552715627157271582715927160271612716227163271642716527166271672716827169271702717127172271732717427175271762717727178271792718027181271822718327184271852718627187271882718927190271912719227193271942719527196271972719827199272002720127202272032720427205272062720727208272092721027211272122721327214272152721627217272182721927220272212722227223272242722527226272272722827229272302723127232272332723427235272362723727238272392724027241272422724327244272452724627247272482724927250272512725227253272542725527256272572725827259272602726127262272632726427265272662726727268272692727027271272722727327274272752727627277272782727927280272812728227283272842728527286272872728827289272902729127292272932729427295272962729727298272992730027301273022730327304273052730627307273082730927310273112731227313273142731527316273172731827319273202732127322273232732427325273262732727328273292733027331273322733327334273352733627337273382733927340273412734227343273442734527346273472734827349273502735127352273532735427355273562735727358273592736027361273622736327364273652736627367273682736927370273712737227373273742737527376273772737827379273802738127382273832738427385273862738727388273892739027391273922739327394273952739627397273982739927400274012740227403274042740527406274072740827409274102741127412274132741427415274162741727418274192742027421274222742327424274252742627427274282742927430274312743227433274342743527436274372743827439274402744127442274432744427445274462744727448274492745027451274522745327454274552745627457274582745927460274612746227463274642746527466274672746827469274702747127472274732747427475274762747727478274792748027481274822748327484274852748627487274882748927490274912749227493274942749527496274972749827499275002750127502275032750427505275062750727508275092751027511275122751327514275152751627517275182751927520275212752227523275242752527526275272752827529275302753127532275332753427535275362753727538275392754027541275422754327544275452754627547275482754927550275512755227553275542755527556275572755827559275602756127562275632756427565275662756727568275692757027571275722757327574275752757627577275782757927580275812758227583275842758527586275872758827589275902759127592275932759427595275962759727598275992760027601276022760327604276052760627607276082760927610276112761227613276142761527616276172761827619276202762127622276232762427625276262762727628276292763027631276322763327634276352763627637276382763927640276412764227643276442764527646276472764827649276502765127652276532765427655276562765727658276592766027661276622766327664276652766627667276682766927670276712767227673276742767527676276772767827679276802768127682276832768427685276862768727688276892769027691276922769327694276952769627697276982769927700277012770227703277042770527706277072770827709277102771127712277132771427715277162771727718277192772027721277222772327724277252772627727277282772927730277312773227733277342773527736277372773827739277402774127742277432774427745277462774727748277492775027751277522775327754277552775627757277582775927760277612776227763277642776527766277672776827769277702777127772277732777427775277762777727778277792778027781277822778327784277852778627787277882778927790277912779227793277942779527796277972779827799278002780127802278032780427805278062780727808278092781027811278122781327814278152781627817278182781927820278212782227823278242782527826278272782827829278302783127832278332783427835278362783727838278392784027841278422784327844278452784627847278482784927850278512785227853278542785527856278572785827859278602786127862278632786427865278662786727868278692787027871278722787327874278752787627877278782787927880278812788227883278842788527886278872788827889278902789127892278932789427895278962789727898278992790027901279022790327904279052790627907279082790927910279112791227913279142791527916279172791827919279202792127922279232792427925279262792727928279292793027931279322793327934279352793627937279382793927940279412794227943279442794527946279472794827949279502795127952279532795427955279562795727958279592796027961279622796327964279652796627967279682796927970279712797227973279742797527976279772797827979279802798127982279832798427985279862798727988279892799027991279922799327994279952799627997279982799928000280012800228003280042800528006280072800828009280102801128012280132801428015280162801728018280192802028021280222802328024280252802628027280282802928030280312803228033280342803528036280372803828039280402804128042280432804428045280462804728048280492805028051280522805328054280552805628057280582805928060280612806228063280642806528066280672806828069280702807128072280732807428075280762807728078280792808028081280822808328084280852808628087280882808928090280912809228093280942809528096280972809828099281002810128102281032810428105281062810728108281092811028111281122811328114281152811628117281182811928120281212812228123281242812528126281272812828129281302813128132281332813428135281362813728138281392814028141281422814328144281452814628147281482814928150281512815228153281542815528156281572815828159281602816128162281632816428165281662816728168281692817028171281722817328174281752817628177281782817928180281812818228183281842818528186281872818828189281902819128192281932819428195281962819728198281992820028201282022820328204282052820628207282082820928210282112821228213282142821528216282172821828219282202822128222282232822428225282262822728228282292823028231282322823328234282352823628237282382823928240282412824228243282442824528246282472824828249282502825128252282532825428255282562825728258282592826028261282622826328264282652826628267282682826928270282712827228273282742827528276282772827828279282802828128282282832828428285282862828728288282892829028291282922829328294282952829628297282982829928300283012830228303283042830528306283072830828309283102831128312283132831428315283162831728318283192832028321283222832328324283252832628327283282832928330283312833228333283342833528336283372833828339283402834128342283432834428345283462834728348283492835028351283522835328354283552835628357283582835928360283612836228363283642836528366283672836828369283702837128372283732837428375283762837728378283792838028381283822838328384283852838628387283882838928390283912839228393283942839528396283972839828399284002840128402284032840428405284062840728408284092841028411284122841328414284152841628417284182841928420284212842228423284242842528426284272842828429284302843128432284332843428435284362843728438284392844028441284422844328444284452844628447284482844928450284512845228453284542845528456284572845828459284602846128462284632846428465284662846728468284692847028471284722847328474284752847628477284782847928480284812848228483284842848528486284872848828489284902849128492284932849428495284962849728498284992850028501285022850328504285052850628507285082850928510285112851228513285142851528516285172851828519285202852128522285232852428525285262852728528285292853028531285322853328534285352853628537285382853928540285412854228543285442854528546285472854828549285502855128552285532855428555285562855728558285592856028561285622856328564285652856628567285682856928570285712857228573285742857528576285772857828579285802858128582285832858428585285862858728588285892859028591285922859328594285952859628597285982859928600286012860228603286042860528606286072860828609286102861128612286132861428615286162861728618286192862028621286222862328624286252862628627286282862928630286312863228633286342863528636286372863828639286402864128642286432864428645286462864728648286492865028651286522865328654286552865628657286582865928660286612866228663286642866528666286672866828669286702867128672286732867428675286762867728678286792868028681286822868328684286852868628687286882868928690286912869228693286942869528696286972869828699287002870128702287032870428705287062870728708287092871028711287122871328714287152871628717287182871928720287212872228723287242872528726287272872828729287302873128732287332873428735287362873728738287392874028741287422874328744287452874628747287482874928750287512875228753287542875528756287572875828759287602876128762287632876428765287662876728768287692877028771287722877328774287752877628777287782877928780287812878228783287842878528786287872878828789287902879128792287932879428795287962879728798287992880028801288022880328804288052880628807288082880928810288112881228813288142881528816288172881828819288202882128822288232882428825288262882728828288292883028831288322883328834288352883628837288382883928840288412884228843288442884528846288472884828849288502885128852288532885428855288562885728858288592886028861288622886328864288652886628867288682886928870288712887228873288742887528876288772887828879288802888128882288832888428885288862888728888288892889028891288922889328894288952889628897288982889928900289012890228903289042890528906289072890828909289102891128912289132891428915289162891728918289192892028921289222892328924289252892628927289282892928930289312893228933289342893528936289372893828939289402894128942289432894428945289462894728948289492895028951289522895328954289552895628957289582895928960289612896228963289642896528966289672896828969289702897128972289732897428975289762897728978289792898028981289822898328984289852898628987289882898928990289912899228993289942899528996289972899828999290002900129002290032900429005290062900729008290092901029011290122901329014290152901629017290182901929020290212902229023290242902529026290272902829029290302903129032290332903429035290362903729038290392904029041290422904329044290452904629047290482904929050290512905229053290542905529056290572905829059290602906129062290632906429065290662906729068290692907029071290722907329074290752907629077290782907929080290812908229083290842908529086290872908829089290902909129092290932909429095290962909729098290992910029101291022910329104291052910629107291082910929110291112911229113291142911529116291172911829119291202912129122291232912429125291262912729128291292913029131291322913329134291352913629137291382913929140291412914229143291442914529146291472914829149291502915129152291532915429155291562915729158291592916029161291622916329164291652916629167291682916929170291712917229173291742917529176291772917829179291802918129182291832918429185291862918729188291892919029191291922919329194291952919629197291982919929200292012920229203292042920529206292072920829209292102921129212292132921429215292162921729218292192922029221292222922329224292252922629227292282922929230292312923229233292342923529236292372923829239292402924129242292432924429245292462924729248292492925029251292522925329254292552925629257292582925929260292612926229263292642926529266292672926829269292702927129272292732927429275292762927729278292792928029281292822928329284292852928629287292882928929290292912929229293292942929529296292972929829299293002930129302293032930429305293062930729308293092931029311293122931329314293152931629317293182931929320293212932229323293242932529326293272932829329293302933129332293332933429335293362933729338293392934029341293422934329344293452934629347293482934929350293512935229353293542935529356293572935829359293602936129362293632936429365293662936729368293692937029371293722937329374293752937629377293782937929380293812938229383293842938529386293872938829389293902939129392293932939429395293962939729398293992940029401294022940329404294052940629407294082940929410294112941229413294142941529416294172941829419294202942129422294232942429425294262942729428294292943029431294322943329434294352943629437294382943929440294412944229443294442944529446294472944829449294502945129452294532945429455294562945729458294592946029461294622946329464294652946629467294682946929470294712947229473294742947529476294772947829479294802948129482294832948429485294862948729488294892949029491294922949329494294952949629497294982949929500295012950229503295042950529506295072950829509295102951129512295132951429515295162951729518295192952029521295222952329524295252952629527295282952929530295312953229533295342953529536295372953829539295402954129542295432954429545295462954729548295492955029551295522955329554295552955629557295582955929560295612956229563295642956529566295672956829569295702957129572295732957429575295762957729578295792958029581295822958329584295852958629587295882958929590295912959229593295942959529596295972959829599296002960129602296032960429605296062960729608296092961029611296122961329614296152961629617296182961929620296212962229623296242962529626296272962829629296302963129632296332963429635296362963729638296392964029641296422964329644296452964629647296482964929650296512965229653296542965529656296572965829659296602966129662296632966429665296662966729668296692967029671296722967329674296752967629677296782967929680296812968229683296842968529686296872968829689296902969129692296932969429695296962969729698296992970029701297022970329704297052970629707297082970929710297112971229713297142971529716297172971829719297202972129722297232972429725297262972729728297292973029731297322973329734297352973629737297382973929740297412974229743297442974529746297472974829749297502975129752297532975429755297562975729758297592976029761297622976329764297652976629767297682976929770297712977229773297742977529776297772977829779297802978129782297832978429785297862978729788297892979029791297922979329794297952979629797297982979929800298012980229803298042980529806298072980829809298102981129812298132981429815298162981729818298192982029821298222982329824298252982629827298282982929830298312983229833298342983529836298372983829839298402984129842298432984429845298462984729848
  1. diff --git a/Documentation/hwlat_detector.txt b/Documentation/hwlat_detector.txt
  2. new file mode 100644
  3. index 000000000000..cb61516483d3
  4. --- /dev/null
  5. +++ b/Documentation/hwlat_detector.txt
  6. @@ -0,0 +1,64 @@
  7. +Introduction:
  8. +-------------
  9. +
  10. +The module hwlat_detector is a special purpose kernel module that is used to
  11. +detect large system latencies induced by the behavior of certain underlying
  12. +hardware or firmware, independent of Linux itself. The code was developed
  13. +originally to detect SMIs (System Management Interrupts) on x86 systems,
  14. +however there is nothing x86 specific about this patchset. It was
  15. +originally written for use by the "RT" patch since the Real Time
  16. +kernel is highly latency sensitive.
  17. +
  18. +SMIs are usually not serviced by the Linux kernel, which typically does not
  19. +even know that they are occuring. SMIs are instead are set up by BIOS code
  20. +and are serviced by BIOS code, usually for "critical" events such as
  21. +management of thermal sensors and fans. Sometimes though, SMIs are used for
  22. +other tasks and those tasks can spend an inordinate amount of time in the
  23. +handler (sometimes measured in milliseconds). Obviously this is a problem if
  24. +you are trying to keep event service latencies down in the microsecond range.
  25. +
  26. +The hardware latency detector works by hogging all of the cpus for configurable
  27. +amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter
  28. +for some period, then looking for gaps in the TSC data. Any gap indicates a
  29. +time when the polling was interrupted and since the machine is stopped and
  30. +interrupts turned off the only thing that could do that would be an SMI.
  31. +
  32. +Note that the SMI detector should *NEVER* be used in a production environment.
  33. +It is intended to be run manually to determine if the hardware platform has a
  34. +problem with long system firmware service routines.
  35. +
  36. +Usage:
  37. +------
  38. +
  39. +Loading the module hwlat_detector passing the parameter "enabled=1" (or by
  40. +setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only
  41. +step required to start the hwlat_detector. It is possible to redefine the
  42. +threshold in microseconds (us) above which latency spikes will be taken
  43. +into account (parameter "threshold=").
  44. +
  45. +Example:
  46. +
  47. + # modprobe hwlat_detector enabled=1 threshold=100
  48. +
  49. +After the module is loaded, it creates a directory named "hwlat_detector" under
  50. +the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary
  51. +to have debugfs mounted, which might be on /sys/debug on your system.
  52. +
  53. +The /debug/hwlat_detector interface contains the following files:
  54. +
  55. +count - number of latency spikes observed since last reset
  56. +enable - a global enable/disable toggle (0/1), resets count
  57. +max - maximum hardware latency actually observed (usecs)
  58. +sample - a pipe from which to read current raw sample data
  59. + in the format <timestamp> <latency observed usecs>
  60. + (can be opened O_NONBLOCK for a single sample)
  61. +threshold - minimum latency value to be considered (usecs)
  62. +width - time period to sample with CPUs held (usecs)
  63. + must be less than the total window size (enforced)
  64. +window - total period of sampling, width being inside (usecs)
  65. +
  66. +By default we will set width to 500,000 and window to 1,000,000, meaning that
  67. +we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we
  68. +observe any latencies that exceed the threshold (initially 100 usecs),
  69. +then we write to a global sample ring buffer of 8K samples, which is
  70. +consumed by reading from the "sample" (pipe) debugfs file interface.
  71. diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
  72. index 0e307c94809a..6964d0f80ae7 100644
  73. --- a/Documentation/sysrq.txt
  74. +++ b/Documentation/sysrq.txt
  75. @@ -59,10 +59,17 @@ On PowerPC - Press 'ALT - Print Screen (or F13) - <command key>,
  76. On other - If you know of the key combos for other architectures, please
  77. let me know so I can add them to this section.
  78. -On all - write a character to /proc/sysrq-trigger. e.g.:
  79. -
  80. +On all - write a character to /proc/sysrq-trigger, e.g.:
  81. echo t > /proc/sysrq-trigger
  82. +On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g.
  83. + echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq
  84. + Send an ICMP echo request with this pattern plus the particular
  85. + SysRq command key. Example:
  86. + # ping -c1 -s57 -p0102030468
  87. + will trigger the SysRq-H (help) command.
  88. +
  89. +
  90. * What are the 'command' keys?
  91. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  92. 'b' - Will immediately reboot the system without syncing or unmounting
  93. diff --git a/Documentation/trace/histograms.txt b/Documentation/trace/histograms.txt
  94. new file mode 100644
  95. index 000000000000..6f2aeabf7faa
  96. --- /dev/null
  97. +++ b/Documentation/trace/histograms.txt
  98. @@ -0,0 +1,186 @@
  99. + Using the Linux Kernel Latency Histograms
  100. +
  101. +
  102. +This document gives a short explanation how to enable, configure and use
  103. +latency histograms. Latency histograms are primarily relevant in the
  104. +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT)
  105. +and are used in the quality management of the Linux real-time
  106. +capabilities.
  107. +
  108. +
  109. +* Purpose of latency histograms
  110. +
  111. +A latency histogram continuously accumulates the frequencies of latency
  112. +data. There are two types of histograms
  113. +- potential sources of latencies
  114. +- effective latencies
  115. +
  116. +
  117. +* Potential sources of latencies
  118. +
  119. +Potential sources of latencies are code segments where interrupts,
  120. +preemption or both are disabled (aka critical sections). To create
  121. +histograms of potential sources of latency, the kernel stores the time
  122. +stamp at the start of a critical section, determines the time elapsed
  123. +when the end of the section is reached, and increments the frequency
  124. +counter of that latency value - irrespective of whether any concurrently
  125. +running process is affected by latency or not.
  126. +- Configuration items (in the Kernel hacking/Tracers submenu)
  127. + CONFIG_INTERRUPT_OFF_LATENCY
  128. + CONFIG_PREEMPT_OFF_LATENCY
  129. +
  130. +
  131. +* Effective latencies
  132. +
  133. +Effective latencies are actually occuring during wakeup of a process. To
  134. +determine effective latencies, the kernel stores the time stamp when a
  135. +process is scheduled to be woken up, and determines the duration of the
  136. +wakeup time shortly before control is passed over to this process. Note
  137. +that the apparent latency in user space may be somewhat longer, since the
  138. +process may be interrupted after control is passed over to it but before
  139. +the execution in user space takes place. Simply measuring the interval
  140. +between enqueuing and wakeup may also not appropriate in cases when a
  141. +process is scheduled as a result of a timer expiration. The timer may have
  142. +missed its deadline, e.g. due to disabled interrupts, but this latency
  143. +would not be registered. Therefore, the offsets of missed timers are
  144. +recorded in a separate histogram. If both wakeup latency and missed timer
  145. +offsets are configured and enabled, a third histogram may be enabled that
  146. +records the overall latency as a sum of the timer latency, if any, and the
  147. +wakeup latency. This histogram is called "timerandwakeup".
  148. +- Configuration items (in the Kernel hacking/Tracers submenu)
  149. + CONFIG_WAKEUP_LATENCY
  150. + CONFIG_MISSED_TIMER_OFSETS
  151. +
  152. +
  153. +* Usage
  154. +
  155. +The interface to the administration of the latency histograms is located
  156. +in the debugfs file system. To mount it, either enter
  157. +
  158. +mount -t sysfs nodev /sys
  159. +mount -t debugfs nodev /sys/kernel/debug
  160. +
  161. +from shell command line level, or add
  162. +
  163. +nodev /sys sysfs defaults 0 0
  164. +nodev /sys/kernel/debug debugfs defaults 0 0
  165. +
  166. +to the file /etc/fstab. All latency histogram related files are then
  167. +available in the directory /sys/kernel/debug/tracing/latency_hist. A
  168. +particular histogram type is enabled by writing non-zero to the related
  169. +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory.
  170. +Select "preemptirqsoff" for the histograms of potential sources of
  171. +latencies and "wakeup" for histograms of effective latencies etc. The
  172. +histogram data - one per CPU - are available in the files
  173. +
  174. +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx
  175. +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx
  176. +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx
  177. +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx
  178. +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx
  179. +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx
  180. +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx
  181. +
  182. +The histograms are reset by writing non-zero to the file "reset" in a
  183. +particular latency directory. To reset all latency data, use
  184. +
  185. +#!/bin/sh
  186. +
  187. +TRACINGDIR=/sys/kernel/debug/tracing
  188. +HISTDIR=$TRACINGDIR/latency_hist
  189. +
  190. +if test -d $HISTDIR
  191. +then
  192. + cd $HISTDIR
  193. + for i in `find . | grep /reset$`
  194. + do
  195. + echo 1 >$i
  196. + done
  197. +fi
  198. +
  199. +
  200. +* Data format
  201. +
  202. +Latency data are stored with a resolution of one microsecond. The
  203. +maximum latency is 10,240 microseconds. The data are only valid, if the
  204. +overflow register is empty. Every output line contains the latency in
  205. +microseconds in the first row and the number of samples in the second
  206. +row. To display only lines with a positive latency count, use, for
  207. +example,
  208. +
  209. +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0
  210. +
  211. +#Minimum latency: 0 microseconds.
  212. +#Average latency: 0 microseconds.
  213. +#Maximum latency: 25 microseconds.
  214. +#Total samples: 3104770694
  215. +#There are 0 samples greater or equal than 10240 microseconds
  216. +#usecs samples
  217. + 0 2984486876
  218. + 1 49843506
  219. + 2 58219047
  220. + 3 5348126
  221. + 4 2187960
  222. + 5 3388262
  223. + 6 959289
  224. + 7 208294
  225. + 8 40420
  226. + 9 4485
  227. + 10 14918
  228. + 11 18340
  229. + 12 25052
  230. + 13 19455
  231. + 14 5602
  232. + 15 969
  233. + 16 47
  234. + 17 18
  235. + 18 14
  236. + 19 1
  237. + 20 3
  238. + 21 2
  239. + 22 5
  240. + 23 2
  241. + 25 1
  242. +
  243. +
  244. +* Wakeup latency of a selected process
  245. +
  246. +To only collect wakeup latency data of a particular process, write the
  247. +PID of the requested process to
  248. +
  249. +/sys/kernel/debug/tracing/latency_hist/wakeup/pid
  250. +
  251. +PIDs are not considered, if this variable is set to 0.
  252. +
  253. +
  254. +* Details of the process with the highest wakeup latency so far
  255. +
  256. +Selected data of the process that suffered from the highest wakeup
  257. +latency that occurred in a particular CPU are available in the file
  258. +
  259. +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx.
  260. +
  261. +In addition, other relevant system data at the time when the
  262. +latency occurred are given.
  263. +
  264. +The format of the data is (all in one line):
  265. +<PID> <Priority> <Latency> (<Timeroffset>) <Command> \
  266. +<- <PID> <Priority> <Command> <Timestamp>
  267. +
  268. +The value of <Timeroffset> is only relevant in the combined timer
  269. +and wakeup latency recording. In the wakeup recording, it is
  270. +always 0, in the missed_timer_offsets recording, it is the same
  271. +as <Latency>.
  272. +
  273. +When retrospectively searching for the origin of a latency and
  274. +tracing was not enabled, it may be helpful to know the name and
  275. +some basic data of the task that (finally) was switching to the
  276. +late real-tlme task. In addition to the victim's data, also the
  277. +data of the possible culprit are therefore displayed after the
  278. +"<-" symbol.
  279. +
  280. +Finally, the timestamp of the time when the latency occurred
  281. +in <seconds>.<microseconds> after the most recent system boot
  282. +is provided.
  283. +
  284. +These data are also reset when the wakeup histogram is reset.
  285. diff --git a/arch/Kconfig b/arch/Kconfig
  286. index a65eafb24997..78d3ed24484a 100644
  287. --- a/arch/Kconfig
  288. +++ b/arch/Kconfig
  289. @@ -6,6 +6,7 @@ config OPROFILE
  290. tristate "OProfile system profiling"
  291. depends on PROFILING
  292. depends on HAVE_OPROFILE
  293. + depends on !PREEMPT_RT_FULL
  294. select RING_BUFFER
  295. select RING_BUFFER_ALLOW_SWAP
  296. help
  297. @@ -49,6 +50,7 @@ config KPROBES
  298. config JUMP_LABEL
  299. bool "Optimize very unlikely/likely branches"
  300. depends on HAVE_ARCH_JUMP_LABEL
  301. + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST)
  302. help
  303. This option enables a transparent branch optimization that
  304. makes certain almost-always-true or almost-always-false branch
  305. diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
  306. index 9d0ac091a52a..4a905bd667e2 100644
  307. --- a/arch/alpha/mm/fault.c
  308. +++ b/arch/alpha/mm/fault.c
  309. @@ -23,8 +23,7 @@
  310. #include <linux/smp.h>
  311. #include <linux/interrupt.h>
  312. #include <linux/module.h>
  313. -
  314. -#include <asm/uaccess.h>
  315. +#include <linux/uaccess.h>
  316. extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
  317. @@ -107,7 +106,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
  318. /* If we're in an interrupt context, or have no user context,
  319. we must not take the fault. */
  320. - if (!mm || in_atomic())
  321. + if (!mm || faulthandler_disabled())
  322. goto no_context;
  323. #ifdef CONFIG_ALPHA_LARGE_VMALLOC
  324. diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h
  325. index 4dc64ddebece..05b5aaf5b0f9 100644
  326. --- a/arch/arc/include/asm/futex.h
  327. +++ b/arch/arc/include/asm/futex.h
  328. @@ -53,7 +53,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
  329. if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
  330. return -EFAULT;
  331. - pagefault_disable(); /* implies preempt_disable() */
  332. + pagefault_disable();
  333. switch (op) {
  334. case FUTEX_OP_SET:
  335. @@ -75,7 +75,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
  336. ret = -ENOSYS;
  337. }
  338. - pagefault_enable(); /* subsumes preempt_enable() */
  339. + pagefault_enable();
  340. if (!ret) {
  341. switch (cmp) {
  342. @@ -104,7 +104,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
  343. return ret;
  344. }
  345. -/* Compare-xchg with preemption disabled.
  346. +/* Compare-xchg with pagefaults disabled.
  347. * Notes:
  348. * -Best-Effort: Exchg happens only if compare succeeds.
  349. * If compare fails, returns; leaving retry/looping to upper layers
  350. @@ -121,7 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
  351. if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
  352. return -EFAULT;
  353. - pagefault_disable(); /* implies preempt_disable() */
  354. + pagefault_disable();
  355. /* TBD : can use llock/scond */
  356. __asm__ __volatile__(
  357. @@ -142,7 +142,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
  358. : "r"(oldval), "r"(newval), "r"(uaddr), "ir"(-EFAULT)
  359. : "cc", "memory");
  360. - pagefault_enable(); /* subsumes preempt_enable() */
  361. + pagefault_enable();
  362. *uval = val;
  363. return val;
  364. diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
  365. index 6a2e006cbcce..d948e4e9d89c 100644
  366. --- a/arch/arc/mm/fault.c
  367. +++ b/arch/arc/mm/fault.c
  368. @@ -86,7 +86,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
  369. * If we're in an interrupt or have no user
  370. * context, we must not take the fault..
  371. */
  372. - if (in_atomic() || !mm)
  373. + if (faulthandler_disabled() || !mm)
  374. goto no_context;
  375. if (user_mode(regs))
  376. diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
  377. index 19f4cc634b0e..e16a259177d4 100644
  378. --- a/arch/arm/Kconfig
  379. +++ b/arch/arm/Kconfig
  380. @@ -31,7 +31,7 @@ config ARM
  381. select HARDIRQS_SW_RESEND
  382. select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
  383. select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
  384. - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
  385. + select HAVE_ARCH_JUMP_LABEL if (!XIP_KERNEL && !PREEMPT_RT_BASE)
  386. select HAVE_ARCH_KGDB
  387. select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
  388. select HAVE_ARCH_TRACEHOOK
  389. @@ -66,6 +66,7 @@ config ARM
  390. select HAVE_PERF_EVENTS
  391. select HAVE_PERF_REGS
  392. select HAVE_PERF_USER_STACK_DUMP
  393. + select HAVE_PREEMPT_LAZY
  394. select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
  395. select HAVE_REGS_AND_STACK_ACCESS_API
  396. select HAVE_SYSCALL_TRACEPOINTS
  397. diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
  398. index abb2c3769b01..2386e9745ba4 100644
  399. --- a/arch/arm/include/asm/cmpxchg.h
  400. +++ b/arch/arm/include/asm/cmpxchg.h
  401. @@ -129,6 +129,8 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
  402. #else /* min ARCH >= ARMv6 */
  403. +#define __HAVE_ARCH_CMPXCHG 1
  404. +
  405. extern void __bad_cmpxchg(volatile void *ptr, int size);
  406. /*
  407. diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
  408. index 4e78065a16aa..5eed82809d82 100644
  409. --- a/arch/arm/include/asm/futex.h
  410. +++ b/arch/arm/include/asm/futex.h
  411. @@ -93,6 +93,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
  412. if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
  413. return -EFAULT;
  414. + preempt_disable();
  415. __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
  416. "1: " TUSER(ldr) " %1, [%4]\n"
  417. " teq %1, %2\n"
  418. @@ -104,6 +105,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
  419. : "cc", "memory");
  420. *uval = val;
  421. + preempt_enable();
  422. +
  423. return ret;
  424. }
  425. @@ -124,7 +127,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
  426. if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
  427. return -EFAULT;
  428. - pagefault_disable(); /* implies preempt_disable() */
  429. +#ifndef CONFIG_SMP
  430. + preempt_disable();
  431. +#endif
  432. + pagefault_disable();
  433. switch (op) {
  434. case FUTEX_OP_SET:
  435. @@ -146,7 +152,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
  436. ret = -ENOSYS;
  437. }
  438. - pagefault_enable(); /* subsumes preempt_enable() */
  439. + pagefault_enable();
  440. +#ifndef CONFIG_SMP
  441. + preempt_enable();
  442. +#endif
  443. if (!ret) {
  444. switch (cmp) {
  445. diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h
  446. index c99e259469f7..f3e3d800c407 100644
  447. --- a/arch/arm/include/asm/switch_to.h
  448. +++ b/arch/arm/include/asm/switch_to.h
  449. @@ -3,6 +3,13 @@
  450. #include <linux/thread_info.h>
  451. +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
  452. +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
  453. +#else
  454. +static inline void
  455. +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
  456. +#endif
  457. +
  458. /*
  459. * For v7 SMP cores running a preemptible kernel we may be pre-empted
  460. * during a TLB maintenance operation, so execute an inner-shareable dsb
  461. @@ -22,6 +29,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info
  462. #define switch_to(prev,next,last) \
  463. do { \
  464. + switch_kmaps(prev, next); \
  465. last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
  466. } while (0)
  467. diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
  468. index bd32eded3e50..b5a616376f60 100644
  469. --- a/arch/arm/include/asm/thread_info.h
  470. +++ b/arch/arm/include/asm/thread_info.h
  471. @@ -50,6 +50,7 @@ struct cpu_context_save {
  472. struct thread_info {
  473. unsigned long flags; /* low level flags */
  474. int preempt_count; /* 0 => preemptable, <0 => bug */
  475. + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
  476. mm_segment_t addr_limit; /* address limit */
  477. struct task_struct *task; /* main task structure */
  478. __u32 cpu; /* cpu */
  479. @@ -147,6 +148,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
  480. #define TIF_SIGPENDING 0
  481. #define TIF_NEED_RESCHED 1
  482. #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
  483. +#define TIF_NEED_RESCHED_LAZY 3
  484. #define TIF_UPROBE 7
  485. #define TIF_SYSCALL_TRACE 8
  486. #define TIF_SYSCALL_AUDIT 9
  487. @@ -160,6 +162,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
  488. #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
  489. #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
  490. #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
  491. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  492. #define _TIF_UPROBE (1 << TIF_UPROBE)
  493. #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
  494. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  495. diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
  496. index 871b8267d211..4dbe70de7318 100644
  497. --- a/arch/arm/kernel/asm-offsets.c
  498. +++ b/arch/arm/kernel/asm-offsets.c
  499. @@ -65,6 +65,7 @@ int main(void)
  500. BLANK();
  501. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  502. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  503. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  504. DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
  505. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  506. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  507. diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
  508. index 570306c49406..797a13d959b7 100644
  509. --- a/arch/arm/kernel/entry-armv.S
  510. +++ b/arch/arm/kernel/entry-armv.S
  511. @@ -208,11 +208,18 @@ __irq_svc:
  512. #ifdef CONFIG_PREEMPT
  513. get_thread_info tsk
  514. ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
  515. - ldr r0, [tsk, #TI_FLAGS] @ get flags
  516. teq r8, #0 @ if preempt count != 0
  517. + bne 1f @ return from exeption
  518. + ldr r0, [tsk, #TI_FLAGS] @ get flags
  519. + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
  520. + blne svc_preempt @ preempt!
  521. +
  522. + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
  523. + teq r8, #0 @ if preempt lazy count != 0
  524. movne r0, #0 @ force flags to 0
  525. - tst r0, #_TIF_NEED_RESCHED
  526. + tst r0, #_TIF_NEED_RESCHED_LAZY
  527. blne svc_preempt
  528. +1:
  529. #endif
  530. svc_exit r5, irq = 1 @ return from exception
  531. @@ -227,8 +234,14 @@ svc_preempt:
  532. 1: bl preempt_schedule_irq @ irq en/disable is done inside
  533. ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
  534. tst r0, #_TIF_NEED_RESCHED
  535. + bne 1b
  536. + tst r0, #_TIF_NEED_RESCHED_LAZY
  537. reteq r8 @ go again
  538. - b 1b
  539. + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
  540. + teq r0, #0 @ if preempt lazy count != 0
  541. + beq 1b
  542. + ret r8 @ go again
  543. +
  544. #endif
  545. __und_fault:
  546. diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
  547. index f192a2a41719..649247ac00e6 100644
  548. --- a/arch/arm/kernel/process.c
  549. +++ b/arch/arm/kernel/process.c
  550. @@ -290,6 +290,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
  551. }
  552. #ifdef CONFIG_MMU
  553. +/*
  554. + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not
  555. + * initialized by pgtable_page_ctor() then a coredump of the vector page will
  556. + * fail.
  557. + */
  558. +static int __init vectors_user_mapping_init_page(void)
  559. +{
  560. + struct page *page;
  561. + unsigned long addr = 0xffff0000;
  562. + pgd_t *pgd;
  563. + pud_t *pud;
  564. + pmd_t *pmd;
  565. +
  566. + pgd = pgd_offset_k(addr);
  567. + pud = pud_offset(pgd, addr);
  568. + pmd = pmd_offset(pud, addr);
  569. + page = pmd_page(*(pmd));
  570. +
  571. + pgtable_page_ctor(page);
  572. +
  573. + return 0;
  574. +}
  575. +late_initcall(vectors_user_mapping_init_page);
  576. +
  577. #ifdef CONFIG_KUSER_HELPERS
  578. /*
  579. * The vectors page is always readable from user space for the
  580. diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
  581. index 586eef26203d..25bd12ef0b36 100644
  582. --- a/arch/arm/kernel/signal.c
  583. +++ b/arch/arm/kernel/signal.c
  584. @@ -568,7 +568,8 @@ asmlinkage int
  585. do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
  586. {
  587. do {
  588. - if (likely(thread_flags & _TIF_NEED_RESCHED)) {
  589. + if (likely(thread_flags & (_TIF_NEED_RESCHED |
  590. + _TIF_NEED_RESCHED_LAZY))) {
  591. schedule();
  592. } else {
  593. if (unlikely(!user_mode(regs)))
  594. diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
  595. index f11d82527076..e561aef093c7 100644
  596. --- a/arch/arm/kernel/smp.c
  597. +++ b/arch/arm/kernel/smp.c
  598. @@ -213,8 +213,6 @@ int __cpu_disable(void)
  599. flush_cache_louis();
  600. local_flush_tlb_all();
  601. - clear_tasks_mm_cpumask(cpu);
  602. -
  603. return 0;
  604. }
  605. @@ -230,6 +228,9 @@ void __cpu_die(unsigned int cpu)
  606. pr_err("CPU%u: cpu didn't die\n", cpu);
  607. return;
  608. }
  609. +
  610. + clear_tasks_mm_cpumask(cpu);
  611. +
  612. pr_notice("CPU%u: shutdown\n", cpu);
  613. /*
  614. diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
  615. index 0bee233fef9a..314cfb232a63 100644
  616. --- a/arch/arm/kernel/unwind.c
  617. +++ b/arch/arm/kernel/unwind.c
  618. @@ -93,7 +93,7 @@ extern const struct unwind_idx __start_unwind_idx[];
  619. static const struct unwind_idx *__origin_unwind_idx;
  620. extern const struct unwind_idx __stop_unwind_idx[];
  621. -static DEFINE_SPINLOCK(unwind_lock);
  622. +static DEFINE_RAW_SPINLOCK(unwind_lock);
  623. static LIST_HEAD(unwind_tables);
  624. /* Convert a prel31 symbol to an absolute address */
  625. @@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr)
  626. /* module unwind tables */
  627. struct unwind_table *table;
  628. - spin_lock_irqsave(&unwind_lock, flags);
  629. + raw_spin_lock_irqsave(&unwind_lock, flags);
  630. list_for_each_entry(table, &unwind_tables, list) {
  631. if (addr >= table->begin_addr &&
  632. addr < table->end_addr) {
  633. @@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr)
  634. break;
  635. }
  636. }
  637. - spin_unlock_irqrestore(&unwind_lock, flags);
  638. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  639. }
  640. pr_debug("%s: idx = %p\n", __func__, idx);
  641. @@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size,
  642. tab->begin_addr = text_addr;
  643. tab->end_addr = text_addr + text_size;
  644. - spin_lock_irqsave(&unwind_lock, flags);
  645. + raw_spin_lock_irqsave(&unwind_lock, flags);
  646. list_add_tail(&tab->list, &unwind_tables);
  647. - spin_unlock_irqrestore(&unwind_lock, flags);
  648. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  649. return tab;
  650. }
  651. @@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_table *tab)
  652. if (!tab)
  653. return;
  654. - spin_lock_irqsave(&unwind_lock, flags);
  655. + raw_spin_lock_irqsave(&unwind_lock, flags);
  656. list_del(&tab->list);
  657. - spin_unlock_irqrestore(&unwind_lock, flags);
  658. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  659. kfree(tab);
  660. }
  661. diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
  662. index 87b2663a5564..ce5e58d76cf9 100644
  663. --- a/arch/arm/kvm/arm.c
  664. +++ b/arch/arm/kvm/arm.c
  665. @@ -473,9 +473,9 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
  666. static void vcpu_pause(struct kvm_vcpu *vcpu)
  667. {
  668. - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
  669. + struct swait_head *wq = kvm_arch_vcpu_wq(vcpu);
  670. - wait_event_interruptible(*wq, !vcpu->arch.pause);
  671. + swait_event_interruptible(*wq, !vcpu->arch.pause);
  672. }
  673. static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
  674. diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
  675. index 531e922486b2..e24f0461ea2d 100644
  676. --- a/arch/arm/kvm/psci.c
  677. +++ b/arch/arm/kvm/psci.c
  678. @@ -68,7 +68,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
  679. {
  680. struct kvm *kvm = source_vcpu->kvm;
  681. struct kvm_vcpu *vcpu = NULL;
  682. - wait_queue_head_t *wq;
  683. + struct swait_head *wq;
  684. unsigned long cpu_id;
  685. unsigned long context_id;
  686. phys_addr_t target_pc;
  687. @@ -117,7 +117,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
  688. smp_mb(); /* Make sure the above is visible */
  689. wq = kvm_arch_vcpu_wq(vcpu);
  690. - wake_up_interruptible(wq);
  691. + swait_wake_interruptible(wq);
  692. return PSCI_RET_SUCCESS;
  693. }
  694. diff --git a/arch/arm/mach-at91/at91rm9200.c b/arch/arm/mach-at91/at91rm9200.c
  695. index eaf58f88ef5d..8d3cb458a99c 100644
  696. --- a/arch/arm/mach-at91/at91rm9200.c
  697. +++ b/arch/arm/mach-at91/at91rm9200.c
  698. @@ -13,7 +13,6 @@
  699. #include <linux/of_platform.h>
  700. #include <asm/mach/arch.h>
  701. -#include <asm/system_misc.h>
  702. #include "generic.h"
  703. #include "soc.h"
  704. @@ -34,7 +33,6 @@ static void __init at91rm9200_dt_device_init(void)
  705. of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
  706. - arm_pm_idle = at91rm9200_idle;
  707. at91rm9200_pm_init();
  708. }
  709. diff --git a/arch/arm/mach-at91/at91sam9.c b/arch/arm/mach-at91/at91sam9.c
  710. index e47a2093a0e7..d2bede665a1b 100644
  711. --- a/arch/arm/mach-at91/at91sam9.c
  712. +++ b/arch/arm/mach-at91/at91sam9.c
  713. @@ -62,8 +62,6 @@ static void __init at91sam9_common_init(void)
  714. soc_dev = soc_device_to_device(soc);
  715. of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
  716. -
  717. - arm_pm_idle = at91sam9_idle;
  718. }
  719. static void __init at91sam9_dt_device_init(void)
  720. diff --git a/arch/arm/mach-at91/generic.h b/arch/arm/mach-at91/generic.h
  721. index b0fa7dc7286d..28ca57a2060f 100644
  722. --- a/arch/arm/mach-at91/generic.h
  723. +++ b/arch/arm/mach-at91/generic.h
  724. @@ -11,27 +11,18 @@
  725. #ifndef _AT91_GENERIC_H
  726. #define _AT91_GENERIC_H
  727. -#include <linux/of.h>
  728. -#include <linux/reboot.h>
  729. -
  730. - /* Map io */
  731. -extern void __init at91_map_io(void);
  732. -extern void __init at91_alt_map_io(void);
  733. -
  734. -/* idle */
  735. -extern void at91rm9200_idle(void);
  736. -extern void at91sam9_idle(void);
  737. -
  738. #ifdef CONFIG_PM
  739. extern void __init at91rm9200_pm_init(void);
  740. extern void __init at91sam9260_pm_init(void);
  741. extern void __init at91sam9g45_pm_init(void);
  742. extern void __init at91sam9x5_pm_init(void);
  743. +extern void __init sama5_pm_init(void);
  744. #else
  745. static inline void __init at91rm9200_pm_init(void) { }
  746. static inline void __init at91sam9260_pm_init(void) { }
  747. static inline void __init at91sam9g45_pm_init(void) { }
  748. static inline void __init at91sam9x5_pm_init(void) { }
  749. +static inline void __init sama5_pm_init(void) { }
  750. #endif
  751. #endif /* _AT91_GENERIC_H */
  752. diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
  753. index 5062699cbb12..3be82cf983dd 100644
  754. --- a/arch/arm/mach-at91/pm.c
  755. +++ b/arch/arm/mach-at91/pm.c
  756. @@ -31,10 +31,13 @@
  757. #include <asm/mach/irq.h>
  758. #include <asm/fncpy.h>
  759. #include <asm/cacheflush.h>
  760. +#include <asm/system_misc.h>
  761. #include "generic.h"
  762. #include "pm.h"
  763. +static void __iomem *pmc;
  764. +
  765. /*
  766. * FIXME: this is needed to communicate between the pinctrl driver and
  767. * the PM implementation in the machine. Possibly part of the PM
  768. @@ -85,7 +88,7 @@ static int at91_pm_verify_clocks(void)
  769. unsigned long scsr;
  770. int i;
  771. - scsr = at91_pmc_read(AT91_PMC_SCSR);
  772. + scsr = readl(pmc + AT91_PMC_SCSR);
  773. /* USB must not be using PLLB */
  774. if ((scsr & at91_pm_data.uhp_udp_mask) != 0) {
  775. @@ -99,8 +102,7 @@ static int at91_pm_verify_clocks(void)
  776. if ((scsr & (AT91_PMC_PCK0 << i)) == 0)
  777. continue;
  778. -
  779. - css = at91_pmc_read(AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
  780. + css = readl(pmc + AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
  781. if (css != AT91_PMC_CSS_SLOW) {
  782. pr_err("AT91: PM - Suspend-to-RAM with PCK%d src %d\n", i, css);
  783. return 0;
  784. @@ -143,8 +145,8 @@ static void at91_pm_suspend(suspend_state_t state)
  785. flush_cache_all();
  786. outer_disable();
  787. - at91_suspend_sram_fn(at91_pmc_base, at91_ramc_base[0],
  788. - at91_ramc_base[1], pm_data);
  789. + at91_suspend_sram_fn(pmc, at91_ramc_base[0],
  790. + at91_ramc_base[1], pm_data);
  791. outer_resume();
  792. }
  793. @@ -348,6 +350,21 @@ static __init void at91_dt_ramc(void)
  794. at91_pm_set_standby(standby);
  795. }
  796. +void at91rm9200_idle(void)
  797. +{
  798. + /*
  799. + * Disable the processor clock. The processor will be automatically
  800. + * re-enabled by an interrupt or by a reset.
  801. + */
  802. + writel(AT91_PMC_PCK, pmc + AT91_PMC_SCDR);
  803. +}
  804. +
  805. +void at91sam9_idle(void)
  806. +{
  807. + writel(AT91_PMC_PCK, pmc + AT91_PMC_SCDR);
  808. + cpu_do_idle();
  809. +}
  810. +
  811. static void __init at91_pm_sram_init(void)
  812. {
  813. struct gen_pool *sram_pool;
  814. @@ -394,13 +411,36 @@ static void __init at91_pm_sram_init(void)
  815. &at91_pm_suspend_in_sram, at91_pm_suspend_in_sram_sz);
  816. }
  817. -static void __init at91_pm_init(void)
  818. +static const struct of_device_id atmel_pmc_ids[] __initconst = {
  819. + { .compatible = "atmel,at91rm9200-pmc" },
  820. + { .compatible = "atmel,at91sam9260-pmc" },
  821. + { .compatible = "atmel,at91sam9g45-pmc" },
  822. + { .compatible = "atmel,at91sam9n12-pmc" },
  823. + { .compatible = "atmel,at91sam9x5-pmc" },
  824. + { .compatible = "atmel,sama5d3-pmc" },
  825. + { .compatible = "atmel,sama5d2-pmc" },
  826. + { /* sentinel */ },
  827. +};
  828. +
  829. +static void __init at91_pm_init(void (*pm_idle)(void))
  830. {
  831. - at91_pm_sram_init();
  832. + struct device_node *pmc_np;
  833. if (at91_cpuidle_device.dev.platform_data)
  834. platform_device_register(&at91_cpuidle_device);
  835. + pmc_np = of_find_matching_node(NULL, atmel_pmc_ids);
  836. + pmc = of_iomap(pmc_np, 0);
  837. + if (!pmc) {
  838. + pr_err("AT91: PM not supported, PMC not found\n");
  839. + return;
  840. + }
  841. +
  842. + if (pm_idle)
  843. + arm_pm_idle = pm_idle;
  844. +
  845. + at91_pm_sram_init();
  846. +
  847. if (at91_suspend_sram_fn)
  848. suspend_set_ops(&at91_pm_ops);
  849. else
  850. @@ -419,7 +459,7 @@ void __init at91rm9200_pm_init(void)
  851. at91_pm_data.uhp_udp_mask = AT91RM9200_PMC_UHP | AT91RM9200_PMC_UDP;
  852. at91_pm_data.memctrl = AT91_MEMCTRL_MC;
  853. - at91_pm_init();
  854. + at91_pm_init(at91rm9200_idle);
  855. }
  856. void __init at91sam9260_pm_init(void)
  857. @@ -427,7 +467,7 @@ void __init at91sam9260_pm_init(void)
  858. at91_dt_ramc();
  859. at91_pm_data.memctrl = AT91_MEMCTRL_SDRAMC;
  860. at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
  861. - return at91_pm_init();
  862. + at91_pm_init(at91sam9_idle);
  863. }
  864. void __init at91sam9g45_pm_init(void)
  865. @@ -435,7 +475,7 @@ void __init at91sam9g45_pm_init(void)
  866. at91_dt_ramc();
  867. at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP;
  868. at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
  869. - return at91_pm_init();
  870. + at91_pm_init(at91sam9_idle);
  871. }
  872. void __init at91sam9x5_pm_init(void)
  873. @@ -443,5 +483,13 @@ void __init at91sam9x5_pm_init(void)
  874. at91_dt_ramc();
  875. at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
  876. at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
  877. - return at91_pm_init();
  878. + at91_pm_init(at91sam9_idle);
  879. +}
  880. +
  881. +void __init sama5_pm_init(void)
  882. +{
  883. + at91_dt_ramc();
  884. + at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
  885. + at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
  886. + at91_pm_init(NULL);
  887. }
  888. diff --git a/arch/arm/mach-at91/sama5.c b/arch/arm/mach-at91/sama5.c
  889. index 41d829d8e7d5..3755da6decf5 100644
  890. --- a/arch/arm/mach-at91/sama5.c
  891. +++ b/arch/arm/mach-at91/sama5.c
  892. @@ -49,7 +49,7 @@ static void __init sama5_dt_device_init(void)
  893. soc_dev = soc_device_to_device(soc);
  894. of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
  895. - at91sam9x5_pm_init();
  896. + sama5_pm_init();
  897. }
  898. static const char *sama5_dt_board_compat[] __initconst = {
  899. diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
  900. index a825bca2a2b6..4619e228df41 100644
  901. --- a/arch/arm/mach-exynos/platsmp.c
  902. +++ b/arch/arm/mach-exynos/platsmp.c
  903. @@ -231,7 +231,7 @@ static void __iomem *scu_base_addr(void)
  904. return (void __iomem *)(S5P_VA_SCU);
  905. }
  906. -static DEFINE_SPINLOCK(boot_lock);
  907. +static DEFINE_RAW_SPINLOCK(boot_lock);
  908. static void exynos_secondary_init(unsigned int cpu)
  909. {
  910. @@ -244,8 +244,8 @@ static void exynos_secondary_init(unsigned int cpu)
  911. /*
  912. * Synchronise with the boot thread.
  913. */
  914. - spin_lock(&boot_lock);
  915. - spin_unlock(&boot_lock);
  916. + raw_spin_lock(&boot_lock);
  917. + raw_spin_unlock(&boot_lock);
  918. }
  919. static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
  920. @@ -259,7 +259,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
  921. * Set synchronisation state between this boot processor
  922. * and the secondary one
  923. */
  924. - spin_lock(&boot_lock);
  925. + raw_spin_lock(&boot_lock);
  926. /*
  927. * The secondary processor is waiting to be released from
  928. @@ -286,7 +286,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
  929. if (timeout == 0) {
  930. printk(KERN_ERR "cpu1 power enable failed");
  931. - spin_unlock(&boot_lock);
  932. + raw_spin_unlock(&boot_lock);
  933. return -ETIMEDOUT;
  934. }
  935. }
  936. @@ -342,7 +342,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
  937. * calibrations, then wait for it to finish
  938. */
  939. fail:
  940. - spin_unlock(&boot_lock);
  941. + raw_spin_unlock(&boot_lock);
  942. return pen_release != -1 ? ret : 0;
  943. }
  944. diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c
  945. index 280f3f14f77c..bc2ed95c0e62 100644
  946. --- a/arch/arm/mach-hisi/platmcpm.c
  947. +++ b/arch/arm/mach-hisi/platmcpm.c
  948. @@ -57,7 +57,7 @@
  949. static void __iomem *sysctrl, *fabric;
  950. static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
  951. -static DEFINE_SPINLOCK(boot_lock);
  952. +static DEFINE_RAW_SPINLOCK(boot_lock);
  953. static u32 fabric_phys_addr;
  954. /*
  955. * [0]: bootwrapper physical address
  956. @@ -104,7 +104,7 @@ static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
  957. if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
  958. return -EINVAL;
  959. - spin_lock_irq(&boot_lock);
  960. + raw_spin_lock_irq(&boot_lock);
  961. if (hip04_cpu_table[cluster][cpu])
  962. goto out;
  963. @@ -133,7 +133,7 @@ static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
  964. udelay(20);
  965. out:
  966. hip04_cpu_table[cluster][cpu]++;
  967. - spin_unlock_irq(&boot_lock);
  968. + raw_spin_unlock_irq(&boot_lock);
  969. return 0;
  970. }
  971. @@ -149,7 +149,7 @@ static void hip04_mcpm_power_down(void)
  972. __mcpm_cpu_going_down(cpu, cluster);
  973. - spin_lock(&boot_lock);
  974. + raw_spin_lock(&boot_lock);
  975. BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
  976. hip04_cpu_table[cluster][cpu]--;
  977. if (hip04_cpu_table[cluster][cpu] == 1) {
  978. @@ -162,7 +162,7 @@ static void hip04_mcpm_power_down(void)
  979. last_man = hip04_cluster_is_down(cluster);
  980. if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
  981. - spin_unlock(&boot_lock);
  982. + raw_spin_unlock(&boot_lock);
  983. /* Since it's Cortex A15, disable L2 prefetching. */
  984. asm volatile(
  985. "mcr p15, 1, %0, c15, c0, 3 \n\t"
  986. @@ -173,7 +173,7 @@ static void hip04_mcpm_power_down(void)
  987. hip04_set_snoop_filter(cluster, 0);
  988. __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
  989. } else {
  990. - spin_unlock(&boot_lock);
  991. + raw_spin_unlock(&boot_lock);
  992. v7_exit_coherency_flush(louis);
  993. }
  994. @@ -192,7 +192,7 @@ static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
  995. cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
  996. count = TIMEOUT_MSEC / POLL_MSEC;
  997. - spin_lock_irq(&boot_lock);
  998. + raw_spin_lock_irq(&boot_lock);
  999. for (tries = 0; tries < count; tries++) {
  1000. if (hip04_cpu_table[cluster][cpu]) {
  1001. ret = -EBUSY;
  1002. @@ -202,10 +202,10 @@ static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
  1003. data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
  1004. if (data & CORE_WFI_STATUS(cpu))
  1005. break;
  1006. - spin_unlock_irq(&boot_lock);
  1007. + raw_spin_unlock_irq(&boot_lock);
  1008. /* Wait for clean L2 when the whole cluster is down. */
  1009. msleep(POLL_MSEC);
  1010. - spin_lock_irq(&boot_lock);
  1011. + raw_spin_lock_irq(&boot_lock);
  1012. }
  1013. if (tries >= count)
  1014. goto err;
  1015. @@ -220,10 +220,10 @@ static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
  1016. }
  1017. if (tries >= count)
  1018. goto err;
  1019. - spin_unlock_irq(&boot_lock);
  1020. + raw_spin_unlock_irq(&boot_lock);
  1021. return 0;
  1022. err:
  1023. - spin_unlock_irq(&boot_lock);
  1024. + raw_spin_unlock_irq(&boot_lock);
  1025. return ret;
  1026. }
  1027. @@ -235,10 +235,10 @@ static void hip04_mcpm_powered_up(void)
  1028. cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
  1029. cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
  1030. - spin_lock(&boot_lock);
  1031. + raw_spin_lock(&boot_lock);
  1032. if (!hip04_cpu_table[cluster][cpu])
  1033. hip04_cpu_table[cluster][cpu] = 1;
  1034. - spin_unlock(&boot_lock);
  1035. + raw_spin_unlock(&boot_lock);
  1036. }
  1037. static void __naked hip04_mcpm_power_up_setup(unsigned int affinity_level)
  1038. diff --git a/arch/arm/mach-omap2/gpio.c b/arch/arm/mach-omap2/gpio.c
  1039. index 7a577145b68b..689a1af47c80 100644
  1040. --- a/arch/arm/mach-omap2/gpio.c
  1041. +++ b/arch/arm/mach-omap2/gpio.c
  1042. @@ -130,7 +130,6 @@ static int __init omap2_gpio_dev_init(struct omap_hwmod *oh, void *unused)
  1043. }
  1044. pwrdm = omap_hwmod_get_pwrdm(oh);
  1045. - pdata->loses_context = pwrdm_can_ever_lose_context(pwrdm);
  1046. pdev = omap_device_build(name, id - 1, oh, pdata, sizeof(*pdata));
  1047. kfree(pdata);
  1048. diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
  1049. index 5305ec7341ec..19732b56088b 100644
  1050. --- a/arch/arm/mach-omap2/omap-smp.c
  1051. +++ b/arch/arm/mach-omap2/omap-smp.c
  1052. @@ -43,7 +43,7 @@
  1053. /* SCU base address */
  1054. static void __iomem *scu_base;
  1055. -static DEFINE_SPINLOCK(boot_lock);
  1056. +static DEFINE_RAW_SPINLOCK(boot_lock);
  1057. void __iomem *omap4_get_scu_base(void)
  1058. {
  1059. @@ -74,8 +74,8 @@ static void omap4_secondary_init(unsigned int cpu)
  1060. /*
  1061. * Synchronise with the boot thread.
  1062. */
  1063. - spin_lock(&boot_lock);
  1064. - spin_unlock(&boot_lock);
  1065. + raw_spin_lock(&boot_lock);
  1066. + raw_spin_unlock(&boot_lock);
  1067. }
  1068. static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1069. @@ -89,7 +89,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1070. * Set synchronisation state between this boot processor
  1071. * and the secondary one
  1072. */
  1073. - spin_lock(&boot_lock);
  1074. + raw_spin_lock(&boot_lock);
  1075. /*
  1076. * Update the AuxCoreBoot0 with boot state for secondary core.
  1077. @@ -166,7 +166,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1078. * Now the secondary core is starting up let it run its
  1079. * calibrations, then wait for it to finish
  1080. */
  1081. - spin_unlock(&boot_lock);
  1082. + raw_spin_unlock(&boot_lock);
  1083. return 0;
  1084. }
  1085. diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c
  1086. index 78af6d8cf2e2..ef4227ffa3b6 100644
  1087. --- a/arch/arm/mach-omap2/powerdomain.c
  1088. +++ b/arch/arm/mach-omap2/powerdomain.c
  1089. @@ -1166,43 +1166,3 @@ int pwrdm_get_context_loss_count(struct powerdomain *pwrdm)
  1090. return count;
  1091. }
  1092. -/**
  1093. - * pwrdm_can_ever_lose_context - can this powerdomain ever lose context?
  1094. - * @pwrdm: struct powerdomain *
  1095. - *
  1096. - * Given a struct powerdomain * @pwrdm, returns 1 if the powerdomain
  1097. - * can lose either memory or logic context or if @pwrdm is invalid, or
  1098. - * returns 0 otherwise. This function is not concerned with how the
  1099. - * powerdomain registers are programmed (i.e., to go off or not); it's
  1100. - * concerned with whether it's ever possible for this powerdomain to
  1101. - * go off while some other part of the chip is active. This function
  1102. - * assumes that every powerdomain can go to either ON or INACTIVE.
  1103. - */
  1104. -bool pwrdm_can_ever_lose_context(struct powerdomain *pwrdm)
  1105. -{
  1106. - int i;
  1107. -
  1108. - if (!pwrdm) {
  1109. - pr_debug("powerdomain: %s: invalid powerdomain pointer\n",
  1110. - __func__);
  1111. - return 1;
  1112. - }
  1113. -
  1114. - if (pwrdm->pwrsts & PWRSTS_OFF)
  1115. - return 1;
  1116. -
  1117. - if (pwrdm->pwrsts & PWRSTS_RET) {
  1118. - if (pwrdm->pwrsts_logic_ret & PWRSTS_OFF)
  1119. - return 1;
  1120. -
  1121. - for (i = 0; i < pwrdm->banks; i++)
  1122. - if (pwrdm->pwrsts_mem_ret[i] & PWRSTS_OFF)
  1123. - return 1;
  1124. - }
  1125. -
  1126. - for (i = 0; i < pwrdm->banks; i++)
  1127. - if (pwrdm->pwrsts_mem_on[i] & PWRSTS_OFF)
  1128. - return 1;
  1129. -
  1130. - return 0;
  1131. -}
  1132. diff --git a/arch/arm/mach-omap2/powerdomain.h b/arch/arm/mach-omap2/powerdomain.h
  1133. index 28a796ce07d7..5e0c033a21db 100644
  1134. --- a/arch/arm/mach-omap2/powerdomain.h
  1135. +++ b/arch/arm/mach-omap2/powerdomain.h
  1136. @@ -244,7 +244,6 @@ int pwrdm_state_switch(struct powerdomain *pwrdm);
  1137. int pwrdm_pre_transition(struct powerdomain *pwrdm);
  1138. int pwrdm_post_transition(struct powerdomain *pwrdm);
  1139. int pwrdm_get_context_loss_count(struct powerdomain *pwrdm);
  1140. -bool pwrdm_can_ever_lose_context(struct powerdomain *pwrdm);
  1141. extern int omap_set_pwrdm_state(struct powerdomain *pwrdm, u8 state);
  1142. diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c
  1143. index e46c91094dde..dcb3ed0c26da 100644
  1144. --- a/arch/arm/mach-prima2/platsmp.c
  1145. +++ b/arch/arm/mach-prima2/platsmp.c
  1146. @@ -22,7 +22,7 @@
  1147. static void __iomem *clk_base;
  1148. -static DEFINE_SPINLOCK(boot_lock);
  1149. +static DEFINE_RAW_SPINLOCK(boot_lock);
  1150. static void sirfsoc_secondary_init(unsigned int cpu)
  1151. {
  1152. @@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsigned int cpu)
  1153. /*
  1154. * Synchronise with the boot thread.
  1155. */
  1156. - spin_lock(&boot_lock);
  1157. - spin_unlock(&boot_lock);
  1158. + raw_spin_lock(&boot_lock);
  1159. + raw_spin_unlock(&boot_lock);
  1160. }
  1161. static const struct of_device_id clk_ids[] = {
  1162. @@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1163. /* make sure write buffer is drained */
  1164. mb();
  1165. - spin_lock(&boot_lock);
  1166. + raw_spin_lock(&boot_lock);
  1167. /*
  1168. * The secondary processor is waiting to be released from
  1169. @@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1170. * now the secondary core is starting up let it run its
  1171. * calibrations, then wait for it to finish
  1172. */
  1173. - spin_unlock(&boot_lock);
  1174. + raw_spin_unlock(&boot_lock);
  1175. return pen_release != -1 ? -ENOSYS : 0;
  1176. }
  1177. diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c
  1178. index 5cde63a64b34..82c9b9145c3e 100644
  1179. --- a/arch/arm/mach-qcom/platsmp.c
  1180. +++ b/arch/arm/mach-qcom/platsmp.c
  1181. @@ -46,7 +46,7 @@
  1182. extern void secondary_startup_arm(void);
  1183. -static DEFINE_SPINLOCK(boot_lock);
  1184. +static DEFINE_RAW_SPINLOCK(boot_lock);
  1185. #ifdef CONFIG_HOTPLUG_CPU
  1186. static void __ref qcom_cpu_die(unsigned int cpu)
  1187. @@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned int cpu)
  1188. /*
  1189. * Synchronise with the boot thread.
  1190. */
  1191. - spin_lock(&boot_lock);
  1192. - spin_unlock(&boot_lock);
  1193. + raw_spin_lock(&boot_lock);
  1194. + raw_spin_unlock(&boot_lock);
  1195. }
  1196. static int scss_release_secondary(unsigned int cpu)
  1197. @@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int))
  1198. * set synchronisation state between this boot processor
  1199. * and the secondary one
  1200. */
  1201. - spin_lock(&boot_lock);
  1202. + raw_spin_lock(&boot_lock);
  1203. /*
  1204. * Send the secondary CPU a soft interrupt, thereby causing
  1205. @@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int))
  1206. * now the secondary core is starting up let it run its
  1207. * calibrations, then wait for it to finish
  1208. */
  1209. - spin_unlock(&boot_lock);
  1210. + raw_spin_unlock(&boot_lock);
  1211. return ret;
  1212. }
  1213. diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c
  1214. index fd4297713d67..b0553b2c2d53 100644
  1215. --- a/arch/arm/mach-spear/platsmp.c
  1216. +++ b/arch/arm/mach-spear/platsmp.c
  1217. @@ -32,7 +32,7 @@ static void write_pen_release(int val)
  1218. sync_cache_w(&pen_release);
  1219. }
  1220. -static DEFINE_SPINLOCK(boot_lock);
  1221. +static DEFINE_RAW_SPINLOCK(boot_lock);
  1222. static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
  1223. @@ -47,8 +47,8 @@ static void spear13xx_secondary_init(unsigned int cpu)
  1224. /*
  1225. * Synchronise with the boot thread.
  1226. */
  1227. - spin_lock(&boot_lock);
  1228. - spin_unlock(&boot_lock);
  1229. + raw_spin_lock(&boot_lock);
  1230. + raw_spin_unlock(&boot_lock);
  1231. }
  1232. static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1233. @@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1234. * set synchronisation state between this boot processor
  1235. * and the secondary one
  1236. */
  1237. - spin_lock(&boot_lock);
  1238. + raw_spin_lock(&boot_lock);
  1239. /*
  1240. * The secondary processor is waiting to be released from
  1241. @@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1242. * now the secondary core is starting up let it run its
  1243. * calibrations, then wait for it to finish
  1244. */
  1245. - spin_unlock(&boot_lock);
  1246. + raw_spin_unlock(&boot_lock);
  1247. return pen_release != -1 ? -ENOSYS : 0;
  1248. }
  1249. diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c
  1250. index d4b624f8dfcb..56d4028122f5 100644
  1251. --- a/arch/arm/mach-sti/platsmp.c
  1252. +++ b/arch/arm/mach-sti/platsmp.c
  1253. @@ -34,7 +34,7 @@ static void write_pen_release(int val)
  1254. sync_cache_w(&pen_release);
  1255. }
  1256. -static DEFINE_SPINLOCK(boot_lock);
  1257. +static DEFINE_RAW_SPINLOCK(boot_lock);
  1258. static void sti_secondary_init(unsigned int cpu)
  1259. {
  1260. @@ -49,8 +49,8 @@ static void sti_secondary_init(unsigned int cpu)
  1261. /*
  1262. * Synchronise with the boot thread.
  1263. */
  1264. - spin_lock(&boot_lock);
  1265. - spin_unlock(&boot_lock);
  1266. + raw_spin_lock(&boot_lock);
  1267. + raw_spin_unlock(&boot_lock);
  1268. }
  1269. static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1270. @@ -61,7 +61,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1271. * set synchronisation state between this boot processor
  1272. * and the secondary one
  1273. */
  1274. - spin_lock(&boot_lock);
  1275. + raw_spin_lock(&boot_lock);
  1276. /*
  1277. * The secondary processor is waiting to be released from
  1278. @@ -92,7 +92,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1279. * now the secondary core is starting up let it run its
  1280. * calibrations, then wait for it to finish
  1281. */
  1282. - spin_unlock(&boot_lock);
  1283. + raw_spin_unlock(&boot_lock);
  1284. return pen_release != -1 ? -ENOSYS : 0;
  1285. }
  1286. diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c
  1287. index a44967f3168c..3af22a4836bf 100644
  1288. --- a/arch/arm/mach-ux500/platsmp.c
  1289. +++ b/arch/arm/mach-ux500/platsmp.c
  1290. @@ -51,7 +51,7 @@ static void __iomem *scu_base_addr(void)
  1291. return NULL;
  1292. }
  1293. -static DEFINE_SPINLOCK(boot_lock);
  1294. +static DEFINE_RAW_SPINLOCK(boot_lock);
  1295. static void ux500_secondary_init(unsigned int cpu)
  1296. {
  1297. @@ -64,8 +64,8 @@ static void ux500_secondary_init(unsigned int cpu)
  1298. /*
  1299. * Synchronise with the boot thread.
  1300. */
  1301. - spin_lock(&boot_lock);
  1302. - spin_unlock(&boot_lock);
  1303. + raw_spin_lock(&boot_lock);
  1304. + raw_spin_unlock(&boot_lock);
  1305. }
  1306. static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1307. @@ -76,7 +76,7 @@ static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1308. * set synchronisation state between this boot processor
  1309. * and the secondary one
  1310. */
  1311. - spin_lock(&boot_lock);
  1312. + raw_spin_lock(&boot_lock);
  1313. /*
  1314. * The secondary processor is waiting to be released from
  1315. @@ -97,7 +97,7 @@ static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1316. * now the secondary core is starting up let it run its
  1317. * calibrations, then wait for it to finish
  1318. */
  1319. - spin_unlock(&boot_lock);
  1320. + raw_spin_unlock(&boot_lock);
  1321. return pen_release != -1 ? -ENOSYS : 0;
  1322. }
  1323. diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
  1324. index 6333d9c17875..62016e3e4a9c 100644
  1325. --- a/arch/arm/mm/fault.c
  1326. +++ b/arch/arm/mm/fault.c
  1327. @@ -276,7 +276,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
  1328. * If we're in an interrupt or have no user
  1329. * context, we must not take the fault..
  1330. */
  1331. - if (in_atomic() || !mm)
  1332. + if (faulthandler_disabled() || !mm)
  1333. goto no_context;
  1334. if (user_mode(regs))
  1335. @@ -430,6 +430,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
  1336. if (addr < TASK_SIZE)
  1337. return do_page_fault(addr, fsr, regs);
  1338. + if (interrupts_enabled(regs))
  1339. + local_irq_enable();
  1340. +
  1341. if (user_mode(regs))
  1342. goto bad_area;
  1343. @@ -497,6 +500,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
  1344. static int
  1345. do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
  1346. {
  1347. + if (interrupts_enabled(regs))
  1348. + local_irq_enable();
  1349. +
  1350. do_bad_area(addr, fsr, regs);
  1351. return 0;
  1352. }
  1353. diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
  1354. index b98895d9fe57..4050e9d99d6b 100644
  1355. --- a/arch/arm/mm/highmem.c
  1356. +++ b/arch/arm/mm/highmem.c
  1357. @@ -54,11 +54,13 @@ EXPORT_SYMBOL(kunmap);
  1358. void *kmap_atomic(struct page *page)
  1359. {
  1360. + pte_t pte = mk_pte(page, kmap_prot);
  1361. unsigned int idx;
  1362. unsigned long vaddr;
  1363. void *kmap;
  1364. int type;
  1365. + preempt_disable_nort();
  1366. pagefault_disable();
  1367. if (!PageHighMem(page))
  1368. return page_address(page);
  1369. @@ -92,7 +94,10 @@ void *kmap_atomic(struct page *page)
  1370. * in place, so the contained TLB flush ensures the TLB is updated
  1371. * with the new mapping.
  1372. */
  1373. - set_fixmap_pte(idx, mk_pte(page, kmap_prot));
  1374. +#ifdef CONFIG_PREEMPT_RT_FULL
  1375. + current->kmap_pte[type] = pte;
  1376. +#endif
  1377. + set_fixmap_pte(idx, pte);
  1378. return (void *)vaddr;
  1379. }
  1380. @@ -109,27 +114,33 @@ void __kunmap_atomic(void *kvaddr)
  1381. if (cache_is_vivt())
  1382. __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
  1383. +#ifdef CONFIG_PREEMPT_RT_FULL
  1384. + current->kmap_pte[type] = __pte(0);
  1385. +#endif
  1386. #ifdef CONFIG_DEBUG_HIGHMEM
  1387. BUG_ON(vaddr != __fix_to_virt(idx));
  1388. - set_fixmap_pte(idx, __pte(0));
  1389. #else
  1390. (void) idx; /* to kill a warning */
  1391. #endif
  1392. + set_fixmap_pte(idx, __pte(0));
  1393. kmap_atomic_idx_pop();
  1394. } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
  1395. /* this address was obtained through kmap_high_get() */
  1396. kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
  1397. }
  1398. pagefault_enable();
  1399. + preempt_enable_nort();
  1400. }
  1401. EXPORT_SYMBOL(__kunmap_atomic);
  1402. void *kmap_atomic_pfn(unsigned long pfn)
  1403. {
  1404. + pte_t pte = pfn_pte(pfn, kmap_prot);
  1405. unsigned long vaddr;
  1406. int idx, type;
  1407. struct page *page = pfn_to_page(pfn);
  1408. + preempt_disable_nort();
  1409. pagefault_disable();
  1410. if (!PageHighMem(page))
  1411. return page_address(page);
  1412. @@ -140,7 +151,10 @@ void *kmap_atomic_pfn(unsigned long pfn)
  1413. #ifdef CONFIG_DEBUG_HIGHMEM
  1414. BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
  1415. #endif
  1416. - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
  1417. +#ifdef CONFIG_PREEMPT_RT_FULL
  1418. + current->kmap_pte[type] = pte;
  1419. +#endif
  1420. + set_fixmap_pte(idx, pte);
  1421. return (void *)vaddr;
  1422. }
  1423. @@ -154,3 +168,28 @@ struct page *kmap_atomic_to_page(const void *ptr)
  1424. return pte_page(get_fixmap_pte(vaddr));
  1425. }
  1426. +
  1427. +#if defined CONFIG_PREEMPT_RT_FULL
  1428. +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
  1429. +{
  1430. + int i;
  1431. +
  1432. + /*
  1433. + * Clear @prev's kmap_atomic mappings
  1434. + */
  1435. + for (i = 0; i < prev_p->kmap_idx; i++) {
  1436. + int idx = i + KM_TYPE_NR * smp_processor_id();
  1437. +
  1438. + set_fixmap_pte(idx, __pte(0));
  1439. + }
  1440. + /*
  1441. + * Restore @next_p's kmap_atomic mappings
  1442. + */
  1443. + for (i = 0; i < next_p->kmap_idx; i++) {
  1444. + int idx = i + KM_TYPE_NR * smp_processor_id();
  1445. +
  1446. + if (!pte_none(next_p->kmap_pte[i]))
  1447. + set_fixmap_pte(idx, next_p->kmap_pte[i]);
  1448. + }
  1449. +}
  1450. +#endif
  1451. diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c
  1452. index 53feb90c840c..b4a8d54fc3f3 100644
  1453. --- a/arch/arm/plat-versatile/platsmp.c
  1454. +++ b/arch/arm/plat-versatile/platsmp.c
  1455. @@ -30,7 +30,7 @@ static void write_pen_release(int val)
  1456. sync_cache_w(&pen_release);
  1457. }
  1458. -static DEFINE_SPINLOCK(boot_lock);
  1459. +static DEFINE_RAW_SPINLOCK(boot_lock);
  1460. void versatile_secondary_init(unsigned int cpu)
  1461. {
  1462. @@ -43,8 +43,8 @@ void versatile_secondary_init(unsigned int cpu)
  1463. /*
  1464. * Synchronise with the boot thread.
  1465. */
  1466. - spin_lock(&boot_lock);
  1467. - spin_unlock(&boot_lock);
  1468. + raw_spin_lock(&boot_lock);
  1469. + raw_spin_unlock(&boot_lock);
  1470. }
  1471. int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1472. @@ -55,7 +55,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1473. * Set synchronisation state between this boot processor
  1474. * and the secondary one
  1475. */
  1476. - spin_lock(&boot_lock);
  1477. + raw_spin_lock(&boot_lock);
  1478. /*
  1479. * This is really belt and braces; we hold unintended secondary
  1480. @@ -85,7 +85,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1481. * now the secondary core is starting up let it run its
  1482. * calibrations, then wait for it to finish
  1483. */
  1484. - spin_unlock(&boot_lock);
  1485. + raw_spin_unlock(&boot_lock);
  1486. return pen_release != -1 ? -ENOSYS : 0;
  1487. }
  1488. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
  1489. index 6f0a3b41b009..09a41259b984 100644
  1490. --- a/arch/arm64/Kconfig
  1491. +++ b/arch/arm64/Kconfig
  1492. @@ -69,8 +69,10 @@ config ARM64
  1493. select HAVE_PERF_REGS
  1494. select HAVE_PERF_USER_STACK_DUMP
  1495. select HAVE_RCU_TABLE_FREE
  1496. + select HAVE_PREEMPT_LAZY
  1497. select HAVE_SYSCALL_TRACEPOINTS
  1498. select IRQ_DOMAIN
  1499. + select IRQ_FORCED_THREADING
  1500. select MODULES_USE_ELF_RELA
  1501. select NO_BOOTMEM
  1502. select OF
  1503. @@ -599,7 +601,7 @@ config XEN_DOM0
  1504. config XEN
  1505. bool "Xen guest support on ARM64"
  1506. - depends on ARM64 && OF
  1507. + depends on ARM64 && OF && !PREEMPT_RT_FULL
  1508. select SWIOTLB_XEN
  1509. help
  1510. Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
  1511. diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
  1512. index 5f750dc96e0f..74069b3bd919 100644
  1513. --- a/arch/arm64/include/asm/futex.h
  1514. +++ b/arch/arm64/include/asm/futex.h
  1515. @@ -58,7 +58,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
  1516. if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
  1517. return -EFAULT;
  1518. - pagefault_disable(); /* implies preempt_disable() */
  1519. + pagefault_disable();
  1520. switch (op) {
  1521. case FUTEX_OP_SET:
  1522. @@ -85,7 +85,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
  1523. ret = -ENOSYS;
  1524. }
  1525. - pagefault_enable(); /* subsumes preempt_enable() */
  1526. + pagefault_enable();
  1527. if (!ret) {
  1528. switch (cmp) {
  1529. diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
  1530. index dcd06d18a42a..7d45b00db1b3 100644
  1531. --- a/arch/arm64/include/asm/thread_info.h
  1532. +++ b/arch/arm64/include/asm/thread_info.h
  1533. @@ -47,6 +47,7 @@ struct thread_info {
  1534. mm_segment_t addr_limit; /* address limit */
  1535. struct task_struct *task; /* main task structure */
  1536. int preempt_count; /* 0 => preemptable, <0 => bug */
  1537. + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
  1538. int cpu; /* cpu */
  1539. };
  1540. @@ -101,6 +102,7 @@ static inline struct thread_info *current_thread_info(void)
  1541. #define TIF_NEED_RESCHED 1
  1542. #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
  1543. #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
  1544. +#define TIF_NEED_RESCHED_LAZY 4
  1545. #define TIF_NOHZ 7
  1546. #define TIF_SYSCALL_TRACE 8
  1547. #define TIF_SYSCALL_AUDIT 9
  1548. @@ -117,6 +119,7 @@ static inline struct thread_info *current_thread_info(void)
  1549. #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
  1550. #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
  1551. #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
  1552. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  1553. #define _TIF_NOHZ (1 << TIF_NOHZ)
  1554. #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
  1555. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  1556. diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
  1557. index 4106ac64f95e..21f4a79bda0a 100644
  1558. --- a/arch/arm64/kernel/asm-offsets.c
  1559. +++ b/arch/arm64/kernel/asm-offsets.c
  1560. @@ -35,6 +35,7 @@ int main(void)
  1561. BLANK();
  1562. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  1563. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  1564. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  1565. DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
  1566. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  1567. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  1568. diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
  1569. index 05012cdb555f..7abfb48bd163 100644
  1570. --- a/arch/arm64/kernel/entry.S
  1571. +++ b/arch/arm64/kernel/entry.S
  1572. @@ -380,11 +380,16 @@ el1_irq:
  1573. #ifdef CONFIG_PREEMPT
  1574. get_thread_info tsk
  1575. ldr w24, [tsk, #TI_PREEMPT] // get preempt count
  1576. - cbnz w24, 1f // preempt count != 0
  1577. + cbnz w24, 2f // preempt count != 0
  1578. ldr x0, [tsk, #TI_FLAGS] // get flags
  1579. - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
  1580. - bl el1_preempt
  1581. + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
  1582. +
  1583. + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count
  1584. + cbnz w24, 2f // preempt lazy count != 0
  1585. + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling?
  1586. 1:
  1587. + bl el1_preempt
  1588. +2:
  1589. #endif
  1590. #ifdef CONFIG_TRACE_IRQFLAGS
  1591. bl trace_hardirqs_on
  1592. @@ -398,6 +403,7 @@ el1_preempt:
  1593. 1: bl preempt_schedule_irq // irq en/disable is done inside
  1594. ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
  1595. tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
  1596. + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling?
  1597. ret x24
  1598. #endif
  1599. @@ -635,6 +641,7 @@ fast_work_pending:
  1600. str x0, [sp, #S_X0] // returned x0
  1601. work_pending:
  1602. tbnz x1, #TIF_NEED_RESCHED, work_resched
  1603. + tbnz x1, #TIF_NEED_RESCHED_LAZY, work_resched
  1604. /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
  1605. ldr x2, [sp, #S_PSTATE]
  1606. mov x0, sp // 'regs'
  1607. diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
  1608. index 924902083e47..30eb88e5b896 100644
  1609. --- a/arch/arm64/kernel/insn.c
  1610. +++ b/arch/arm64/kernel/insn.c
  1611. @@ -77,7 +77,7 @@ bool __kprobes aarch64_insn_is_nop(u32 insn)
  1612. }
  1613. }
  1614. -static DEFINE_SPINLOCK(patch_lock);
  1615. +static DEFINE_RAW_SPINLOCK(patch_lock);
  1616. static void __kprobes *patch_map(void *addr, int fixmap)
  1617. {
  1618. @@ -124,13 +124,13 @@ static int __kprobes __aarch64_insn_write(void *addr, u32 insn)
  1619. unsigned long flags = 0;
  1620. int ret;
  1621. - spin_lock_irqsave(&patch_lock, flags);
  1622. + raw_spin_lock_irqsave(&patch_lock, flags);
  1623. waddr = patch_map(addr, FIX_TEXT_POKE0);
  1624. ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE);
  1625. patch_unmap(FIX_TEXT_POKE0);
  1626. - spin_unlock_irqrestore(&patch_lock, flags);
  1627. + raw_spin_unlock_irqrestore(&patch_lock, flags);
  1628. return ret;
  1629. }
  1630. diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
  1631. index b67b01cb5109..c4cb2596ede6 100644
  1632. --- a/arch/arm64/kernel/perf_event.c
  1633. +++ b/arch/arm64/kernel/perf_event.c
  1634. @@ -488,7 +488,7 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu)
  1635. }
  1636. err = request_irq(irq, armpmu->handle_irq,
  1637. - IRQF_NOBALANCING,
  1638. + IRQF_NOBALANCING | IRQF_NO_THREAD,
  1639. "arm-pmu", armpmu);
  1640. if (err) {
  1641. pr_err("unable to request IRQ%d for ARM PMU counters\n",
  1642. diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
  1643. index fa5efaa5c3ac..1fdbb3704698 100644
  1644. --- a/arch/arm64/mm/fault.c
  1645. +++ b/arch/arm64/mm/fault.c
  1646. @@ -211,7 +211,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
  1647. * If we're in an interrupt or have no user context, we must not take
  1648. * the fault.
  1649. */
  1650. - if (in_atomic() || !mm)
  1651. + if (faulthandler_disabled() || !mm)
  1652. goto no_context;
  1653. if (user_mode(regs))
  1654. diff --git a/arch/avr32/include/asm/uaccess.h b/arch/avr32/include/asm/uaccess.h
  1655. index 20b52c40bcd2..b1ec1fa06463 100644
  1656. --- a/arch/avr32/include/asm/uaccess.h
  1657. +++ b/arch/avr32/include/asm/uaccess.h
  1658. @@ -106,7 +106,8 @@ static inline __kernel_size_t copy_from_user(void *to,
  1659. * @x: Value to copy to user space.
  1660. * @ptr: Destination address, in user space.
  1661. *
  1662. - * Context: User context only. This function may sleep.
  1663. + * Context: User context only. This function may sleep if pagefaults are
  1664. + * enabled.
  1665. *
  1666. * This macro copies a single simple value from kernel space to user
  1667. * space. It supports simple types like char and int, but not larger
  1668. @@ -125,7 +126,8 @@ static inline __kernel_size_t copy_from_user(void *to,
  1669. * @x: Variable to store result.
  1670. * @ptr: Source address, in user space.
  1671. *
  1672. - * Context: User context only. This function may sleep.
  1673. + * Context: User context only. This function may sleep if pagefaults are
  1674. + * enabled.
  1675. *
  1676. * This macro copies a single simple variable from user space to kernel
  1677. * space. It supports simple types like char and int, but not larger
  1678. @@ -145,7 +147,8 @@ static inline __kernel_size_t copy_from_user(void *to,
  1679. * @x: Value to copy to user space.
  1680. * @ptr: Destination address, in user space.
  1681. *
  1682. - * Context: User context only. This function may sleep.
  1683. + * Context: User context only. This function may sleep if pagefaults are
  1684. + * enabled.
  1685. *
  1686. * This macro copies a single simple value from kernel space to user
  1687. * space. It supports simple types like char and int, but not larger
  1688. @@ -167,7 +170,8 @@ static inline __kernel_size_t copy_from_user(void *to,
  1689. * @x: Variable to store result.
  1690. * @ptr: Source address, in user space.
  1691. *
  1692. - * Context: User context only. This function may sleep.
  1693. + * Context: User context only. This function may sleep if pagefaults are
  1694. + * enabled.
  1695. *
  1696. * This macro copies a single simple variable from user space to kernel
  1697. * space. It supports simple types like char and int, but not larger
  1698. diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
  1699. index d223a8b57c1e..c03533937a9f 100644
  1700. --- a/arch/avr32/mm/fault.c
  1701. +++ b/arch/avr32/mm/fault.c
  1702. @@ -14,11 +14,11 @@
  1703. #include <linux/pagemap.h>
  1704. #include <linux/kdebug.h>
  1705. #include <linux/kprobes.h>
  1706. +#include <linux/uaccess.h>
  1707. #include <asm/mmu_context.h>
  1708. #include <asm/sysreg.h>
  1709. #include <asm/tlb.h>
  1710. -#include <asm/uaccess.h>
  1711. #ifdef CONFIG_KPROBES
  1712. static inline int notify_page_fault(struct pt_regs *regs, int trap)
  1713. @@ -81,7 +81,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
  1714. * If we're in an interrupt or have no user context, we must
  1715. * not take the fault...
  1716. */
  1717. - if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
  1718. + if (faulthandler_disabled() || !mm || regs->sr & SYSREG_BIT(GM))
  1719. goto no_context;
  1720. local_irq_enable();
  1721. diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
  1722. index 83f12f2ed9e3..3066d40a6db1 100644
  1723. --- a/arch/cris/mm/fault.c
  1724. +++ b/arch/cris/mm/fault.c
  1725. @@ -8,7 +8,7 @@
  1726. #include <linux/interrupt.h>
  1727. #include <linux/module.h>
  1728. #include <linux/wait.h>
  1729. -#include <asm/uaccess.h>
  1730. +#include <linux/uaccess.h>
  1731. #include <arch/system.h>
  1732. extern int find_fixup_code(struct pt_regs *);
  1733. @@ -109,11 +109,11 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
  1734. info.si_code = SEGV_MAPERR;
  1735. /*
  1736. - * If we're in an interrupt or "atomic" operation or have no
  1737. + * If we're in an interrupt, have pagefaults disabled or have no
  1738. * user context, we must not take the fault.
  1739. */
  1740. - if (in_atomic() || !mm)
  1741. + if (faulthandler_disabled() || !mm)
  1742. goto no_context;
  1743. if (user_mode(regs))
  1744. diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c
  1745. index ec4917ddf678..61d99767fe16 100644
  1746. --- a/arch/frv/mm/fault.c
  1747. +++ b/arch/frv/mm/fault.c
  1748. @@ -19,9 +19,9 @@
  1749. #include <linux/kernel.h>
  1750. #include <linux/ptrace.h>
  1751. #include <linux/hardirq.h>
  1752. +#include <linux/uaccess.h>
  1753. #include <asm/pgtable.h>
  1754. -#include <asm/uaccess.h>
  1755. #include <asm/gdb-stub.h>
  1756. /*****************************************************************************/
  1757. @@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
  1758. * If we're in an interrupt or have no user
  1759. * context, we must not take the fault..
  1760. */
  1761. - if (in_atomic() || !mm)
  1762. + if (faulthandler_disabled() || !mm)
  1763. goto no_context;
  1764. if (user_mode(__frame))
  1765. diff --git a/arch/frv/mm/highmem.c b/arch/frv/mm/highmem.c
  1766. index bed9a9bd3c10..785344bbdc07 100644
  1767. --- a/arch/frv/mm/highmem.c
  1768. +++ b/arch/frv/mm/highmem.c
  1769. @@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page)
  1770. unsigned long paddr;
  1771. int type;
  1772. + preempt_disable();
  1773. pagefault_disable();
  1774. type = kmap_atomic_idx_push();
  1775. paddr = page_to_phys(page);
  1776. @@ -85,5 +86,6 @@ void __kunmap_atomic(void *kvaddr)
  1777. }
  1778. kmap_atomic_idx_pop();
  1779. pagefault_enable();
  1780. + preempt_enable();
  1781. }
  1782. EXPORT_SYMBOL(__kunmap_atomic);
  1783. diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h
  1784. index 25fc9049db8a..f61cfb28e9f2 100644
  1785. --- a/arch/hexagon/include/asm/uaccess.h
  1786. +++ b/arch/hexagon/include/asm/uaccess.h
  1787. @@ -36,7 +36,8 @@
  1788. * @addr: User space pointer to start of block to check
  1789. * @size: Size of block to check
  1790. *
  1791. - * Context: User context only. This function may sleep.
  1792. + * Context: User context only. This function may sleep if pagefaults are
  1793. + * enabled.
  1794. *
  1795. * Checks if a pointer to a block of memory in user space is valid.
  1796. *
  1797. diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
  1798. index ba5ba7accd0d..70b40d1205a6 100644
  1799. --- a/arch/ia64/mm/fault.c
  1800. +++ b/arch/ia64/mm/fault.c
  1801. @@ -11,10 +11,10 @@
  1802. #include <linux/kprobes.h>
  1803. #include <linux/kdebug.h>
  1804. #include <linux/prefetch.h>
  1805. +#include <linux/uaccess.h>
  1806. #include <asm/pgtable.h>
  1807. #include <asm/processor.h>
  1808. -#include <asm/uaccess.h>
  1809. extern int die(char *, struct pt_regs *, long);
  1810. @@ -96,7 +96,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
  1811. /*
  1812. * If we're in an interrupt or have no user context, we must not take the fault..
  1813. */
  1814. - if (in_atomic() || !mm)
  1815. + if (faulthandler_disabled() || !mm)
  1816. goto no_context;
  1817. #ifdef CONFIG_VIRTUAL_MEM_MAP
  1818. diff --git a/arch/m32r/include/asm/uaccess.h b/arch/m32r/include/asm/uaccess.h
  1819. index c66a38d0a895..6f8982157a75 100644
  1820. --- a/arch/m32r/include/asm/uaccess.h
  1821. +++ b/arch/m32r/include/asm/uaccess.h
  1822. @@ -91,7 +91,8 @@ static inline void set_fs(mm_segment_t s)
  1823. * @addr: User space pointer to start of block to check
  1824. * @size: Size of block to check
  1825. *
  1826. - * Context: User context only. This function may sleep.
  1827. + * Context: User context only. This function may sleep if pagefaults are
  1828. + * enabled.
  1829. *
  1830. * Checks if a pointer to a block of memory in user space is valid.
  1831. *
  1832. @@ -155,7 +156,8 @@ extern int fixup_exception(struct pt_regs *regs);
  1833. * @x: Variable to store result.
  1834. * @ptr: Source address, in user space.
  1835. *
  1836. - * Context: User context only. This function may sleep.
  1837. + * Context: User context only. This function may sleep if pagefaults are
  1838. + * enabled.
  1839. *
  1840. * This macro copies a single simple variable from user space to kernel
  1841. * space. It supports simple types like char and int, but not larger
  1842. @@ -175,7 +177,8 @@ extern int fixup_exception(struct pt_regs *regs);
  1843. * @x: Value to copy to user space.
  1844. * @ptr: Destination address, in user space.
  1845. *
  1846. - * Context: User context only. This function may sleep.
  1847. + * Context: User context only. This function may sleep if pagefaults are
  1848. + * enabled.
  1849. *
  1850. * This macro copies a single simple value from kernel space to user
  1851. * space. It supports simple types like char and int, but not larger
  1852. @@ -194,7 +197,8 @@ extern int fixup_exception(struct pt_regs *regs);
  1853. * @x: Variable to store result.
  1854. * @ptr: Source address, in user space.
  1855. *
  1856. - * Context: User context only. This function may sleep.
  1857. + * Context: User context only. This function may sleep if pagefaults are
  1858. + * enabled.
  1859. *
  1860. * This macro copies a single simple variable from user space to kernel
  1861. * space. It supports simple types like char and int, but not larger
  1862. @@ -274,7 +278,8 @@ do { \
  1863. * @x: Value to copy to user space.
  1864. * @ptr: Destination address, in user space.
  1865. *
  1866. - * Context: User context only. This function may sleep.
  1867. + * Context: User context only. This function may sleep if pagefaults are
  1868. + * enabled.
  1869. *
  1870. * This macro copies a single simple value from kernel space to user
  1871. * space. It supports simple types like char and int, but not larger
  1872. @@ -568,7 +573,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
  1873. * @from: Source address, in kernel space.
  1874. * @n: Number of bytes to copy.
  1875. *
  1876. - * Context: User context only. This function may sleep.
  1877. + * Context: User context only. This function may sleep if pagefaults are
  1878. + * enabled.
  1879. *
  1880. * Copy data from kernel space to user space. Caller must check
  1881. * the specified block with access_ok() before calling this function.
  1882. @@ -588,7 +594,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
  1883. * @from: Source address, in kernel space.
  1884. * @n: Number of bytes to copy.
  1885. *
  1886. - * Context: User context only. This function may sleep.
  1887. + * Context: User context only. This function may sleep if pagefaults are
  1888. + * enabled.
  1889. *
  1890. * Copy data from kernel space to user space.
  1891. *
  1892. @@ -606,7 +613,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
  1893. * @from: Source address, in user space.
  1894. * @n: Number of bytes to copy.
  1895. *
  1896. - * Context: User context only. This function may sleep.
  1897. + * Context: User context only. This function may sleep if pagefaults are
  1898. + * enabled.
  1899. *
  1900. * Copy data from user space to kernel space. Caller must check
  1901. * the specified block with access_ok() before calling this function.
  1902. @@ -626,7 +634,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
  1903. * @from: Source address, in user space.
  1904. * @n: Number of bytes to copy.
  1905. *
  1906. - * Context: User context only. This function may sleep.
  1907. + * Context: User context only. This function may sleep if pagefaults are
  1908. + * enabled.
  1909. *
  1910. * Copy data from user space to kernel space.
  1911. *
  1912. @@ -677,7 +686,8 @@ unsigned long clear_user(void __user *mem, unsigned long len);
  1913. * strlen_user: - Get the size of a string in user space.
  1914. * @str: The string to measure.
  1915. *
  1916. - * Context: User context only. This function may sleep.
  1917. + * Context: User context only. This function may sleep if pagefaults are
  1918. + * enabled.
  1919. *
  1920. * Get the size of a NUL-terminated string in user space.
  1921. *
  1922. diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c
  1923. index e3d4d4890104..8f9875b7933d 100644
  1924. --- a/arch/m32r/mm/fault.c
  1925. +++ b/arch/m32r/mm/fault.c
  1926. @@ -24,9 +24,9 @@
  1927. #include <linux/vt_kern.h> /* For unblank_screen() */
  1928. #include <linux/highmem.h>
  1929. #include <linux/module.h>
  1930. +#include <linux/uaccess.h>
  1931. #include <asm/m32r.h>
  1932. -#include <asm/uaccess.h>
  1933. #include <asm/hardirq.h>
  1934. #include <asm/mmu_context.h>
  1935. #include <asm/tlbflush.h>
  1936. @@ -111,10 +111,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
  1937. mm = tsk->mm;
  1938. /*
  1939. - * If we're in an interrupt or have no user context or are running in an
  1940. - * atomic region then we must not take the fault..
  1941. + * If we're in an interrupt or have no user context or have pagefaults
  1942. + * disabled then we must not take the fault.
  1943. */
  1944. - if (in_atomic() || !mm)
  1945. + if (faulthandler_disabled() || !mm)
  1946. goto bad_area_nosemaphore;
  1947. if (error_code & ACE_USERMODE)
  1948. diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
  1949. index b2f04aee46ec..6a94cdd0c830 100644
  1950. --- a/arch/m68k/mm/fault.c
  1951. +++ b/arch/m68k/mm/fault.c
  1952. @@ -10,10 +10,10 @@
  1953. #include <linux/ptrace.h>
  1954. #include <linux/interrupt.h>
  1955. #include <linux/module.h>
  1956. +#include <linux/uaccess.h>
  1957. #include <asm/setup.h>
  1958. #include <asm/traps.h>
  1959. -#include <asm/uaccess.h>
  1960. #include <asm/pgalloc.h>
  1961. extern void die_if_kernel(char *, struct pt_regs *, long);
  1962. @@ -81,7 +81,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
  1963. * If we're in an interrupt or have no user
  1964. * context, we must not take the fault..
  1965. */
  1966. - if (in_atomic() || !mm)
  1967. + if (faulthandler_disabled() || !mm)
  1968. goto no_context;
  1969. if (user_mode(regs))
  1970. diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c
  1971. index 2de5dc695a87..f57edca63609 100644
  1972. --- a/arch/metag/mm/fault.c
  1973. +++ b/arch/metag/mm/fault.c
  1974. @@ -105,7 +105,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
  1975. mm = tsk->mm;
  1976. - if (in_atomic() || !mm)
  1977. + if (faulthandler_disabled() || !mm)
  1978. goto no_context;
  1979. if (user_mode(regs))
  1980. diff --git a/arch/metag/mm/highmem.c b/arch/metag/mm/highmem.c
  1981. index d71f621a2c0b..807f1b1c4e65 100644
  1982. --- a/arch/metag/mm/highmem.c
  1983. +++ b/arch/metag/mm/highmem.c
  1984. @@ -43,7 +43,7 @@ void *kmap_atomic(struct page *page)
  1985. unsigned long vaddr;
  1986. int type;
  1987. - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
  1988. + preempt_disable();
  1989. pagefault_disable();
  1990. if (!PageHighMem(page))
  1991. return page_address(page);
  1992. @@ -82,6 +82,7 @@ void __kunmap_atomic(void *kvaddr)
  1993. }
  1994. pagefault_enable();
  1995. + preempt_enable();
  1996. }
  1997. EXPORT_SYMBOL(__kunmap_atomic);
  1998. @@ -95,6 +96,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
  1999. unsigned long vaddr;
  2000. int type;
  2001. + preempt_disable();
  2002. pagefault_disable();
  2003. type = kmap_atomic_idx_push();
  2004. diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
  2005. index 0c0a5cfbf79a..826676778094 100644
  2006. --- a/arch/microblaze/include/asm/uaccess.h
  2007. +++ b/arch/microblaze/include/asm/uaccess.h
  2008. @@ -178,7 +178,8 @@ extern long __user_bad(void);
  2009. * @x: Variable to store result.
  2010. * @ptr: Source address, in user space.
  2011. *
  2012. - * Context: User context only. This function may sleep.
  2013. + * Context: User context only. This function may sleep if pagefaults are
  2014. + * enabled.
  2015. *
  2016. * This macro copies a single simple variable from user space to kernel
  2017. * space. It supports simple types like char and int, but not larger
  2018. @@ -290,7 +291,8 @@ extern long __user_bad(void);
  2019. * @x: Value to copy to user space.
  2020. * @ptr: Destination address, in user space.
  2021. *
  2022. - * Context: User context only. This function may sleep.
  2023. + * Context: User context only. This function may sleep if pagefaults are
  2024. + * enabled.
  2025. *
  2026. * This macro copies a single simple value from kernel space to user
  2027. * space. It supports simple types like char and int, but not larger
  2028. diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
  2029. index d46a5ebb7570..177dfc003643 100644
  2030. --- a/arch/microblaze/mm/fault.c
  2031. +++ b/arch/microblaze/mm/fault.c
  2032. @@ -107,14 +107,14 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
  2033. if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
  2034. is_write = 0;
  2035. - if (unlikely(in_atomic() || !mm)) {
  2036. + if (unlikely(faulthandler_disabled() || !mm)) {
  2037. if (kernel_mode(regs))
  2038. goto bad_area_nosemaphore;
  2039. - /* in_atomic() in user mode is really bad,
  2040. + /* faulthandler_disabled() in user mode is really bad,
  2041. as is current->mm == NULL. */
  2042. - pr_emerg("Page fault in user mode with in_atomic(), mm = %p\n",
  2043. - mm);
  2044. + pr_emerg("Page fault in user mode with faulthandler_disabled(), mm = %p\n",
  2045. + mm);
  2046. pr_emerg("r15 = %lx MSR = %lx\n",
  2047. regs->r15, regs->msr);
  2048. die("Weird page fault", regs, SIGSEGV);
  2049. diff --git a/arch/microblaze/mm/highmem.c b/arch/microblaze/mm/highmem.c
  2050. index 5a92576fad92..2fcc5a52d84d 100644
  2051. --- a/arch/microblaze/mm/highmem.c
  2052. +++ b/arch/microblaze/mm/highmem.c
  2053. @@ -37,7 +37,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
  2054. unsigned long vaddr;
  2055. int idx, type;
  2056. - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
  2057. + preempt_disable();
  2058. pagefault_disable();
  2059. if (!PageHighMem(page))
  2060. return page_address(page);
  2061. @@ -63,6 +63,7 @@ void __kunmap_atomic(void *kvaddr)
  2062. if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
  2063. pagefault_enable();
  2064. + preempt_enable();
  2065. return;
  2066. }
  2067. @@ -84,5 +85,6 @@ void __kunmap_atomic(void *kvaddr)
  2068. #endif
  2069. kmap_atomic_idx_pop();
  2070. pagefault_enable();
  2071. + preempt_enable();
  2072. }
  2073. EXPORT_SYMBOL(__kunmap_atomic);
  2074. diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
  2075. index c99e8a32bea4..7e6ab18c488a 100644
  2076. --- a/arch/mips/Kconfig
  2077. +++ b/arch/mips/Kconfig
  2078. @@ -2367,7 +2367,7 @@ config CPU_R4400_WORKAROUNDS
  2079. #
  2080. config HIGHMEM
  2081. bool "High Memory Support"
  2082. - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA
  2083. + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL
  2084. config CPU_SUPPORTS_HIGHMEM
  2085. bool
  2086. diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
  2087. index bc2f5164ce51..6dc7f5130d49 100644
  2088. --- a/arch/mips/include/asm/uaccess.h
  2089. +++ b/arch/mips/include/asm/uaccess.h
  2090. @@ -104,7 +104,8 @@ extern u64 __ua_limit;
  2091. * @addr: User space pointer to start of block to check
  2092. * @size: Size of block to check
  2093. *
  2094. - * Context: User context only. This function may sleep.
  2095. + * Context: User context only. This function may sleep if pagefaults are
  2096. + * enabled.
  2097. *
  2098. * Checks if a pointer to a block of memory in user space is valid.
  2099. *
  2100. @@ -139,7 +140,8 @@ extern u64 __ua_limit;
  2101. * @x: Value to copy to user space.
  2102. * @ptr: Destination address, in user space.
  2103. *
  2104. - * Context: User context only. This function may sleep.
  2105. + * Context: User context only. This function may sleep if pagefaults are
  2106. + * enabled.
  2107. *
  2108. * This macro copies a single simple value from kernel space to user
  2109. * space. It supports simple types like char and int, but not larger
  2110. @@ -158,7 +160,8 @@ extern u64 __ua_limit;
  2111. * @x: Variable to store result.
  2112. * @ptr: Source address, in user space.
  2113. *
  2114. - * Context: User context only. This function may sleep.
  2115. + * Context: User context only. This function may sleep if pagefaults are
  2116. + * enabled.
  2117. *
  2118. * This macro copies a single simple variable from user space to kernel
  2119. * space. It supports simple types like char and int, but not larger
  2120. @@ -178,7 +181,8 @@ extern u64 __ua_limit;
  2121. * @x: Value to copy to user space.
  2122. * @ptr: Destination address, in user space.
  2123. *
  2124. - * Context: User context only. This function may sleep.
  2125. + * Context: User context only. This function may sleep if pagefaults are
  2126. + * enabled.
  2127. *
  2128. * This macro copies a single simple value from kernel space to user
  2129. * space. It supports simple types like char and int, but not larger
  2130. @@ -200,7 +204,8 @@ extern u64 __ua_limit;
  2131. * @x: Variable to store result.
  2132. * @ptr: Source address, in user space.
  2133. *
  2134. - * Context: User context only. This function may sleep.
  2135. + * Context: User context only. This function may sleep if pagefaults are
  2136. + * enabled.
  2137. *
  2138. * This macro copies a single simple variable from user space to kernel
  2139. * space. It supports simple types like char and int, but not larger
  2140. @@ -499,7 +504,8 @@ extern void __put_user_unknown(void);
  2141. * @x: Value to copy to user space.
  2142. * @ptr: Destination address, in user space.
  2143. *
  2144. - * Context: User context only. This function may sleep.
  2145. + * Context: User context only. This function may sleep if pagefaults are
  2146. + * enabled.
  2147. *
  2148. * This macro copies a single simple value from kernel space to user
  2149. * space. It supports simple types like char and int, but not larger
  2150. @@ -518,7 +524,8 @@ extern void __put_user_unknown(void);
  2151. * @x: Variable to store result.
  2152. * @ptr: Source address, in user space.
  2153. *
  2154. - * Context: User context only. This function may sleep.
  2155. + * Context: User context only. This function may sleep if pagefaults are
  2156. + * enabled.
  2157. *
  2158. * This macro copies a single simple variable from user space to kernel
  2159. * space. It supports simple types like char and int, but not larger
  2160. @@ -538,7 +545,8 @@ extern void __put_user_unknown(void);
  2161. * @x: Value to copy to user space.
  2162. * @ptr: Destination address, in user space.
  2163. *
  2164. - * Context: User context only. This function may sleep.
  2165. + * Context: User context only. This function may sleep if pagefaults are
  2166. + * enabled.
  2167. *
  2168. * This macro copies a single simple value from kernel space to user
  2169. * space. It supports simple types like char and int, but not larger
  2170. @@ -560,7 +568,8 @@ extern void __put_user_unknown(void);
  2171. * @x: Variable to store result.
  2172. * @ptr: Source address, in user space.
  2173. *
  2174. - * Context: User context only. This function may sleep.
  2175. + * Context: User context only. This function may sleep if pagefaults are
  2176. + * enabled.
  2177. *
  2178. * This macro copies a single simple variable from user space to kernel
  2179. * space. It supports simple types like char and int, but not larger
  2180. @@ -816,7 +825,8 @@ extern size_t __copy_user(void *__to, const void *__from, size_t __n);
  2181. * @from: Source address, in kernel space.
  2182. * @n: Number of bytes to copy.
  2183. *
  2184. - * Context: User context only. This function may sleep.
  2185. + * Context: User context only. This function may sleep if pagefaults are
  2186. + * enabled.
  2187. *
  2188. * Copy data from kernel space to user space. Caller must check
  2189. * the specified block with access_ok() before calling this function.
  2190. @@ -889,7 +899,8 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n);
  2191. * @from: Source address, in kernel space.
  2192. * @n: Number of bytes to copy.
  2193. *
  2194. - * Context: User context only. This function may sleep.
  2195. + * Context: User context only. This function may sleep if pagefaults are
  2196. + * enabled.
  2197. *
  2198. * Copy data from kernel space to user space.
  2199. *
  2200. @@ -1076,7 +1087,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n);
  2201. * @from: Source address, in user space.
  2202. * @n: Number of bytes to copy.
  2203. *
  2204. - * Context: User context only. This function may sleep.
  2205. + * Context: User context only. This function may sleep if pagefaults are
  2206. + * enabled.
  2207. *
  2208. * Copy data from user space to kernel space. Caller must check
  2209. * the specified block with access_ok() before calling this function.
  2210. @@ -1108,7 +1120,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n);
  2211. * @from: Source address, in user space.
  2212. * @n: Number of bytes to copy.
  2213. *
  2214. - * Context: User context only. This function may sleep.
  2215. + * Context: User context only. This function may sleep if pagefaults are
  2216. + * enabled.
  2217. *
  2218. * Copy data from user space to kernel space.
  2219. *
  2220. @@ -1332,7 +1345,8 @@ strncpy_from_user(char *__to, const char __user *__from, long __len)
  2221. * strlen_user: - Get the size of a string in user space.
  2222. * @str: The string to measure.
  2223. *
  2224. - * Context: User context only. This function may sleep.
  2225. + * Context: User context only. This function may sleep if pagefaults are
  2226. + * enabled.
  2227. *
  2228. * Get the size of a NUL-terminated string in user space.
  2229. *
  2230. @@ -1401,7 +1415,8 @@ static inline long __strnlen_user(const char __user *s, long n)
  2231. * strnlen_user: - Get the size of a string in user space.
  2232. * @str: The string to measure.
  2233. *
  2234. - * Context: User context only. This function may sleep.
  2235. + * Context: User context only. This function may sleep if pagefaults are
  2236. + * enabled.
  2237. *
  2238. * Get the size of a NUL-terminated string in user space.
  2239. *
  2240. diff --git a/arch/mips/kernel/signal-common.h b/arch/mips/kernel/signal-common.h
  2241. index 06805e09bcd3..0b85f827cd18 100644
  2242. --- a/arch/mips/kernel/signal-common.h
  2243. +++ b/arch/mips/kernel/signal-common.h
  2244. @@ -28,12 +28,7 @@ extern void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
  2245. extern int fpcsr_pending(unsigned int __user *fpcsr);
  2246. /* Make sure we will not lose FPU ownership */
  2247. -#ifdef CONFIG_PREEMPT
  2248. -#define lock_fpu_owner() preempt_disable()
  2249. -#define unlock_fpu_owner() preempt_enable()
  2250. -#else
  2251. -#define lock_fpu_owner() pagefault_disable()
  2252. -#define unlock_fpu_owner() pagefault_enable()
  2253. -#endif
  2254. +#define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); })
  2255. +#define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); })
  2256. #endif /* __SIGNAL_COMMON_H */
  2257. diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
  2258. index 7ff8637e530d..36c0f26fac6b 100644
  2259. --- a/arch/mips/mm/fault.c
  2260. +++ b/arch/mips/mm/fault.c
  2261. @@ -21,10 +21,10 @@
  2262. #include <linux/module.h>
  2263. #include <linux/kprobes.h>
  2264. #include <linux/perf_event.h>
  2265. +#include <linux/uaccess.h>
  2266. #include <asm/branch.h>
  2267. #include <asm/mmu_context.h>
  2268. -#include <asm/uaccess.h>
  2269. #include <asm/ptrace.h>
  2270. #include <asm/highmem.h> /* For VMALLOC_END */
  2271. #include <linux/kdebug.h>
  2272. @@ -94,7 +94,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
  2273. * If we're in an interrupt or have no user
  2274. * context, we must not take the fault..
  2275. */
  2276. - if (in_atomic() || !mm)
  2277. + if (faulthandler_disabled() || !mm)
  2278. goto bad_area_nosemaphore;
  2279. if (user_mode(regs))
  2280. diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c
  2281. index da815d295239..11661cbc11a8 100644
  2282. --- a/arch/mips/mm/highmem.c
  2283. +++ b/arch/mips/mm/highmem.c
  2284. @@ -47,7 +47,7 @@ void *kmap_atomic(struct page *page)
  2285. unsigned long vaddr;
  2286. int idx, type;
  2287. - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
  2288. + preempt_disable();
  2289. pagefault_disable();
  2290. if (!PageHighMem(page))
  2291. return page_address(page);
  2292. @@ -72,6 +72,7 @@ void __kunmap_atomic(void *kvaddr)
  2293. if (vaddr < FIXADDR_START) { // FIXME
  2294. pagefault_enable();
  2295. + preempt_enable();
  2296. return;
  2297. }
  2298. @@ -92,6 +93,7 @@ void __kunmap_atomic(void *kvaddr)
  2299. #endif
  2300. kmap_atomic_idx_pop();
  2301. pagefault_enable();
  2302. + preempt_enable();
  2303. }
  2304. EXPORT_SYMBOL(__kunmap_atomic);
  2305. @@ -104,6 +106,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
  2306. unsigned long vaddr;
  2307. int idx, type;
  2308. + preempt_disable();
  2309. pagefault_disable();
  2310. type = kmap_atomic_idx_push();
  2311. diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
  2312. index faa5c9822ecc..198a3147dd7d 100644
  2313. --- a/arch/mips/mm/init.c
  2314. +++ b/arch/mips/mm/init.c
  2315. @@ -90,6 +90,7 @@ static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot)
  2316. BUG_ON(Page_dcache_dirty(page));
  2317. + preempt_disable();
  2318. pagefault_disable();
  2319. idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
  2320. idx += in_interrupt() ? FIX_N_COLOURS : 0;
  2321. @@ -152,6 +153,7 @@ void kunmap_coherent(void)
  2322. write_c0_entryhi(old_ctx);
  2323. local_irq_restore(flags);
  2324. pagefault_enable();
  2325. + preempt_enable();
  2326. }
  2327. void copy_user_highpage(struct page *to, struct page *from,
  2328. diff --git a/arch/mn10300/include/asm/highmem.h b/arch/mn10300/include/asm/highmem.h
  2329. index 2fbbe4d920aa..1ddea5afba09 100644
  2330. --- a/arch/mn10300/include/asm/highmem.h
  2331. +++ b/arch/mn10300/include/asm/highmem.h
  2332. @@ -75,6 +75,7 @@ static inline void *kmap_atomic(struct page *page)
  2333. unsigned long vaddr;
  2334. int idx, type;
  2335. + preempt_disable();
  2336. pagefault_disable();
  2337. if (page < highmem_start_page)
  2338. return page_address(page);
  2339. @@ -98,6 +99,7 @@ static inline void __kunmap_atomic(unsigned long vaddr)
  2340. if (vaddr < FIXADDR_START) { /* FIXME */
  2341. pagefault_enable();
  2342. + preempt_enable();
  2343. return;
  2344. }
  2345. @@ -122,6 +124,7 @@ static inline void __kunmap_atomic(unsigned long vaddr)
  2346. kmap_atomic_idx_pop();
  2347. pagefault_enable();
  2348. + preempt_enable();
  2349. }
  2350. #endif /* __KERNEL__ */
  2351. diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c
  2352. index 0c2cc5d39c8e..4a1d181ed32f 100644
  2353. --- a/arch/mn10300/mm/fault.c
  2354. +++ b/arch/mn10300/mm/fault.c
  2355. @@ -23,8 +23,8 @@
  2356. #include <linux/interrupt.h>
  2357. #include <linux/init.h>
  2358. #include <linux/vt_kern.h> /* For unblank_screen() */
  2359. +#include <linux/uaccess.h>
  2360. -#include <asm/uaccess.h>
  2361. #include <asm/pgalloc.h>
  2362. #include <asm/hardirq.h>
  2363. #include <asm/cpu-regs.h>
  2364. @@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code,
  2365. * If we're in an interrupt or have no user
  2366. * context, we must not take the fault..
  2367. */
  2368. - if (in_atomic() || !mm)
  2369. + if (faulthandler_disabled() || !mm)
  2370. goto no_context;
  2371. if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
  2372. diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c
  2373. index 0c9b6afe69e9..b51878b0c6b8 100644
  2374. --- a/arch/nios2/mm/fault.c
  2375. +++ b/arch/nios2/mm/fault.c
  2376. @@ -77,7 +77,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
  2377. * If we're in an interrupt or have no user
  2378. * context, we must not take the fault..
  2379. */
  2380. - if (in_atomic() || !mm)
  2381. + if (faulthandler_disabled() || !mm)
  2382. goto bad_area_nosemaphore;
  2383. if (user_mode(regs))
  2384. diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
  2385. index de65f66ea64e..ec2df4bab302 100644
  2386. --- a/arch/parisc/include/asm/cacheflush.h
  2387. +++ b/arch/parisc/include/asm/cacheflush.h
  2388. @@ -142,6 +142,7 @@ static inline void kunmap(struct page *page)
  2389. static inline void *kmap_atomic(struct page *page)
  2390. {
  2391. + preempt_disable();
  2392. pagefault_disable();
  2393. return page_address(page);
  2394. }
  2395. @@ -150,6 +151,7 @@ static inline void __kunmap_atomic(void *addr)
  2396. {
  2397. flush_kernel_dcache_page_addr(addr);
  2398. pagefault_enable();
  2399. + preempt_enable();
  2400. }
  2401. #define kmap_atomic_prot(page, prot) kmap_atomic(page)
  2402. diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
  2403. index bbf22658d1a3..341966889a51 100644
  2404. --- a/arch/parisc/kernel/traps.c
  2405. +++ b/arch/parisc/kernel/traps.c
  2406. @@ -26,9 +26,9 @@
  2407. #include <linux/console.h>
  2408. #include <linux/bug.h>
  2409. #include <linux/ratelimit.h>
  2410. +#include <linux/uaccess.h>
  2411. #include <asm/assembly.h>
  2412. -#include <asm/uaccess.h>
  2413. #include <asm/io.h>
  2414. #include <asm/irq.h>
  2415. #include <asm/traps.h>
  2416. @@ -796,7 +796,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
  2417. * unless pagefault_disable() was called before.
  2418. */
  2419. - if (fault_space == 0 && !in_atomic())
  2420. + if (fault_space == 0 && !faulthandler_disabled())
  2421. {
  2422. /* Clean up and return if in exception table. */
  2423. if (fixup_exception(regs))
  2424. diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
  2425. index 50d64a7fc672..3bc9db1ad19a 100644
  2426. --- a/arch/parisc/mm/fault.c
  2427. +++ b/arch/parisc/mm/fault.c
  2428. @@ -15,8 +15,8 @@
  2429. #include <linux/sched.h>
  2430. #include <linux/interrupt.h>
  2431. #include <linux/module.h>
  2432. +#include <linux/uaccess.h>
  2433. -#include <asm/uaccess.h>
  2434. #include <asm/traps.h>
  2435. /* Various important other fields */
  2436. @@ -208,7 +208,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
  2437. int fault;
  2438. unsigned int flags;
  2439. - if (in_atomic())
  2440. + if (pagefault_disabled())
  2441. goto no_context;
  2442. tsk = current;
  2443. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
  2444. index 190cc48abc0c..7b70a5754e34 100644
  2445. --- a/arch/powerpc/Kconfig
  2446. +++ b/arch/powerpc/Kconfig
  2447. @@ -60,10 +60,11 @@ config LOCKDEP_SUPPORT
  2448. config RWSEM_GENERIC_SPINLOCK
  2449. bool
  2450. + default y if PREEMPT_RT_FULL
  2451. config RWSEM_XCHGADD_ALGORITHM
  2452. bool
  2453. - default y
  2454. + default y if !PREEMPT_RT_FULL
  2455. config GENERIC_LOCKBREAK
  2456. bool
  2457. @@ -138,6 +139,7 @@ config PPC
  2458. select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
  2459. select GENERIC_STRNCPY_FROM_USER
  2460. select GENERIC_STRNLEN_USER
  2461. + select HAVE_PREEMPT_LAZY
  2462. select HAVE_MOD_ARCH_SPECIFIC
  2463. select MODULES_USE_ELF_RELA
  2464. select CLONE_BACKWARDS
  2465. @@ -312,7 +314,7 @@ menu "Kernel options"
  2466. config HIGHMEM
  2467. bool "High memory support"
  2468. - depends on PPC32
  2469. + depends on PPC32 && !PREEMPT_RT_FULL
  2470. source kernel/Kconfig.hz
  2471. source kernel/Kconfig.preempt
  2472. diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
  2473. index a193a13cf08b..a1ddf4080e1a 100644
  2474. --- a/arch/powerpc/include/asm/kvm_host.h
  2475. +++ b/arch/powerpc/include/asm/kvm_host.h
  2476. @@ -280,7 +280,7 @@ struct kvmppc_vcore {
  2477. u8 in_guest;
  2478. struct list_head runnable_threads;
  2479. spinlock_t lock;
  2480. - wait_queue_head_t wq;
  2481. + struct swait_head wq;
  2482. spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
  2483. u64 stolen_tb;
  2484. u64 preempt_tb;
  2485. @@ -613,7 +613,7 @@ struct kvm_vcpu_arch {
  2486. u8 prodded;
  2487. u32 last_inst;
  2488. - wait_queue_head_t *wqp;
  2489. + struct swait_head *wqp;
  2490. struct kvmppc_vcore *vcore;
  2491. int ret;
  2492. int trap;
  2493. diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
  2494. index 7efee4a3240b..40e6fa1b85b2 100644
  2495. --- a/arch/powerpc/include/asm/thread_info.h
  2496. +++ b/arch/powerpc/include/asm/thread_info.h
  2497. @@ -42,6 +42,8 @@ struct thread_info {
  2498. int cpu; /* cpu we're on */
  2499. int preempt_count; /* 0 => preemptable,
  2500. <0 => BUG */
  2501. + int preempt_lazy_count; /* 0 => preemptable,
  2502. + <0 => BUG */
  2503. unsigned long local_flags; /* private flags for thread */
  2504. /* low level flags - has atomic operations done on it */
  2505. @@ -82,8 +84,7 @@ static inline struct thread_info *current_thread_info(void)
  2506. #define TIF_SYSCALL_TRACE 0 /* syscall trace active */
  2507. #define TIF_SIGPENDING 1 /* signal pending */
  2508. #define TIF_NEED_RESCHED 2 /* rescheduling necessary */
  2509. -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
  2510. - TIF_NEED_RESCHED */
  2511. +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */
  2512. #define TIF_32BIT 4 /* 32 bit binary */
  2513. #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
  2514. #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
  2515. @@ -101,6 +102,8 @@ static inline struct thread_info *current_thread_info(void)
  2516. #if defined(CONFIG_PPC64)
  2517. #define TIF_ELF2ABI 18 /* function descriptors must die! */
  2518. #endif
  2519. +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling
  2520. + TIF_NEED_RESCHED */
  2521. /* as above, but as bit values */
  2522. #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
  2523. @@ -119,14 +122,16 @@ static inline struct thread_info *current_thread_info(void)
  2524. #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
  2525. #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
  2526. #define _TIF_NOHZ (1<<TIF_NOHZ)
  2527. +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
  2528. #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
  2529. _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
  2530. _TIF_NOHZ)
  2531. #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
  2532. _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  2533. - _TIF_RESTORE_TM)
  2534. + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY)
  2535. #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
  2536. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  2537. /* Bits in local_flags */
  2538. /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
  2539. diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
  2540. index 0034b6b3556a..65cc771661c4 100644
  2541. --- a/arch/powerpc/kernel/asm-offsets.c
  2542. +++ b/arch/powerpc/kernel/asm-offsets.c
  2543. @@ -160,6 +160,7 @@ int main(void)
  2544. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  2545. DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
  2546. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  2547. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  2548. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  2549. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  2550. diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
  2551. index 46fc0f4d8982..3d390ac490d9 100644
  2552. --- a/arch/powerpc/kernel/entry_32.S
  2553. +++ b/arch/powerpc/kernel/entry_32.S
  2554. @@ -813,7 +813,14 @@ resume_kernel:
  2555. cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
  2556. bne restore
  2557. andi. r8,r8,_TIF_NEED_RESCHED
  2558. + bne+ 1f
  2559. + lwz r0,TI_PREEMPT_LAZY(r9)
  2560. + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
  2561. + bne restore
  2562. + lwz r0,TI_FLAGS(r9)
  2563. + andi. r0,r0,_TIF_NEED_RESCHED_LAZY
  2564. beq+ restore
  2565. +1:
  2566. lwz r3,_MSR(r1)
  2567. andi. r0,r3,MSR_EE /* interrupts off? */
  2568. beq restore /* don't schedule if so */
  2569. @@ -824,11 +831,11 @@ resume_kernel:
  2570. */
  2571. bl trace_hardirqs_off
  2572. #endif
  2573. -1: bl preempt_schedule_irq
  2574. +2: bl preempt_schedule_irq
  2575. CURRENT_THREAD_INFO(r9, r1)
  2576. lwz r3,TI_FLAGS(r9)
  2577. - andi. r0,r3,_TIF_NEED_RESCHED
  2578. - bne- 1b
  2579. + andi. r0,r3,_TIF_NEED_RESCHED_MASK
  2580. + bne- 2b
  2581. #ifdef CONFIG_TRACE_IRQFLAGS
  2582. /* And now, to properly rebalance the above, we tell lockdep they
  2583. * are being turned back on, which will happen when we return
  2584. @@ -1149,7 +1156,7 @@ global_dbcr0:
  2585. #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
  2586. do_work: /* r10 contains MSR_KERNEL here */
  2587. - andi. r0,r9,_TIF_NEED_RESCHED
  2588. + andi. r0,r9,_TIF_NEED_RESCHED_MASK
  2589. beq do_user_signal
  2590. do_resched: /* r10 contains MSR_KERNEL here */
  2591. @@ -1170,7 +1177,7 @@ recheck:
  2592. MTMSRD(r10) /* disable interrupts */
  2593. CURRENT_THREAD_INFO(r9, r1)
  2594. lwz r9,TI_FLAGS(r9)
  2595. - andi. r0,r9,_TIF_NEED_RESCHED
  2596. + andi. r0,r9,_TIF_NEED_RESCHED_MASK
  2597. bne- do_resched
  2598. andi. r0,r9,_TIF_USER_WORK_MASK
  2599. beq restore_user
  2600. diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
  2601. index afbc20019c2e..5e2d2645d1e0 100644
  2602. --- a/arch/powerpc/kernel/entry_64.S
  2603. +++ b/arch/powerpc/kernel/entry_64.S
  2604. @@ -636,7 +636,7 @@ _GLOBAL(ret_from_except_lite)
  2605. #else
  2606. beq restore
  2607. #endif
  2608. -1: andi. r0,r4,_TIF_NEED_RESCHED
  2609. +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK
  2610. beq 2f
  2611. bl restore_interrupts
  2612. SCHEDULE_USER
  2613. @@ -698,10 +698,18 @@ resume_kernel:
  2614. #ifdef CONFIG_PREEMPT
  2615. /* Check if we need to preempt */
  2616. + lwz r8,TI_PREEMPT(r9)
  2617. + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */
  2618. + bne restore
  2619. andi. r0,r4,_TIF_NEED_RESCHED
  2620. + bne+ check_count
  2621. +
  2622. + andi. r0,r4,_TIF_NEED_RESCHED_LAZY
  2623. beq+ restore
  2624. + lwz r8,TI_PREEMPT_LAZY(r9)
  2625. +
  2626. /* Check that preempt_count() == 0 and interrupts are enabled */
  2627. - lwz r8,TI_PREEMPT(r9)
  2628. +check_count:
  2629. cmpwi cr1,r8,0
  2630. ld r0,SOFTE(r1)
  2631. cmpdi r0,0
  2632. @@ -718,7 +726,7 @@ resume_kernel:
  2633. /* Re-test flags and eventually loop */
  2634. CURRENT_THREAD_INFO(r9, r1)
  2635. ld r4,TI_FLAGS(r9)
  2636. - andi. r0,r4,_TIF_NEED_RESCHED
  2637. + andi. r0,r4,_TIF_NEED_RESCHED_MASK
  2638. bne 1b
  2639. /*
  2640. diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
  2641. index 45096033d37b..6a8e55a17683 100644
  2642. --- a/arch/powerpc/kernel/irq.c
  2643. +++ b/arch/powerpc/kernel/irq.c
  2644. @@ -614,6 +614,7 @@ void irq_ctx_init(void)
  2645. }
  2646. }
  2647. +#ifndef CONFIG_PREEMPT_RT_FULL
  2648. void do_softirq_own_stack(void)
  2649. {
  2650. struct thread_info *curtp, *irqtp;
  2651. @@ -631,6 +632,7 @@ void do_softirq_own_stack(void)
  2652. if (irqtp->flags)
  2653. set_bits(irqtp->flags, &curtp->flags);
  2654. }
  2655. +#endif
  2656. irq_hw_number_t virq_to_hw(unsigned int virq)
  2657. {
  2658. diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
  2659. index 7c6bb4b17b49..e9dfe2270e93 100644
  2660. --- a/arch/powerpc/kernel/misc_32.S
  2661. +++ b/arch/powerpc/kernel/misc_32.S
  2662. @@ -40,6 +40,7 @@
  2663. * We store the saved ksp_limit in the unused part
  2664. * of the STACK_FRAME_OVERHEAD
  2665. */
  2666. +#ifndef CONFIG_PREEMPT_RT_FULL
  2667. _GLOBAL(call_do_softirq)
  2668. mflr r0
  2669. stw r0,4(r1)
  2670. @@ -56,6 +57,7 @@ _GLOBAL(call_do_softirq)
  2671. stw r10,THREAD+KSP_LIMIT(r2)
  2672. mtlr r0
  2673. blr
  2674. +#endif
  2675. /*
  2676. * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
  2677. diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
  2678. index 4e314b90c75d..8a7238dd2f4b 100644
  2679. --- a/arch/powerpc/kernel/misc_64.S
  2680. +++ b/arch/powerpc/kernel/misc_64.S
  2681. @@ -29,6 +29,7 @@
  2682. .text
  2683. +#ifndef CONFIG_PREEMPT_RT_FULL
  2684. _GLOBAL(call_do_softirq)
  2685. mflr r0
  2686. std r0,16(r1)
  2687. @@ -39,6 +40,7 @@ _GLOBAL(call_do_softirq)
  2688. ld r0,16(r1)
  2689. mtlr r0
  2690. blr
  2691. +#endif
  2692. _GLOBAL(call_do_irq)
  2693. mflr r0
  2694. diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
  2695. index 3caec2c42105..d4c48506ea1b 100644
  2696. --- a/arch/powerpc/kvm/Kconfig
  2697. +++ b/arch/powerpc/kvm/Kconfig
  2698. @@ -172,6 +172,7 @@ config KVM_E500MC
  2699. config KVM_MPIC
  2700. bool "KVM in-kernel MPIC emulation"
  2701. depends on KVM && E500
  2702. + depends on !PREEMPT_RT_FULL
  2703. select HAVE_KVM_IRQCHIP
  2704. select HAVE_KVM_IRQFD
  2705. select HAVE_KVM_IRQ_ROUTING
  2706. diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
  2707. index f5b3de7f7fa2..c3f43a405b58 100644
  2708. --- a/arch/powerpc/kvm/book3s_hv.c
  2709. +++ b/arch/powerpc/kvm/book3s_hv.c
  2710. @@ -115,11 +115,11 @@ static bool kvmppc_ipi_thread(int cpu)
  2711. static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
  2712. {
  2713. int cpu = vcpu->cpu;
  2714. - wait_queue_head_t *wqp;
  2715. + struct swait_head *wqp;
  2716. wqp = kvm_arch_vcpu_wq(vcpu);
  2717. - if (waitqueue_active(wqp)) {
  2718. - wake_up_interruptible(wqp);
  2719. + if (swaitqueue_active(wqp)) {
  2720. + swait_wake_interruptible(wqp);
  2721. ++vcpu->stat.halt_wakeup;
  2722. }
  2723. @@ -692,8 +692,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
  2724. tvcpu->arch.prodded = 1;
  2725. smp_mb();
  2726. if (vcpu->arch.ceded) {
  2727. - if (waitqueue_active(&vcpu->wq)) {
  2728. - wake_up_interruptible(&vcpu->wq);
  2729. + if (swaitqueue_active(&vcpu->wq)) {
  2730. + swait_wake_interruptible(&vcpu->wq);
  2731. vcpu->stat.halt_wakeup++;
  2732. }
  2733. }
  2734. @@ -1432,7 +1432,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
  2735. INIT_LIST_HEAD(&vcore->runnable_threads);
  2736. spin_lock_init(&vcore->lock);
  2737. spin_lock_init(&vcore->stoltb_lock);
  2738. - init_waitqueue_head(&vcore->wq);
  2739. + init_swait_head(&vcore->wq);
  2740. vcore->preempt_tb = TB_NIL;
  2741. vcore->lpcr = kvm->arch.lpcr;
  2742. vcore->first_vcpuid = core * threads_per_subcore;
  2743. @@ -2079,10 +2079,9 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
  2744. {
  2745. struct kvm_vcpu *vcpu;
  2746. int do_sleep = 1;
  2747. + DEFINE_SWAITER(wait);
  2748. - DEFINE_WAIT(wait);
  2749. -
  2750. - prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
  2751. + swait_prepare(&vc->wq, &wait, TASK_INTERRUPTIBLE);
  2752. /*
  2753. * Check one last time for pending exceptions and ceded state after
  2754. @@ -2096,7 +2095,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
  2755. }
  2756. if (!do_sleep) {
  2757. - finish_wait(&vc->wq, &wait);
  2758. + swait_finish(&vc->wq, &wait);
  2759. return;
  2760. }
  2761. @@ -2104,7 +2103,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
  2762. trace_kvmppc_vcore_blocked(vc, 0);
  2763. spin_unlock(&vc->lock);
  2764. schedule();
  2765. - finish_wait(&vc->wq, &wait);
  2766. + swait_finish(&vc->wq, &wait);
  2767. spin_lock(&vc->lock);
  2768. vc->vcore_state = VCORE_INACTIVE;
  2769. trace_kvmppc_vcore_blocked(vc, 1);
  2770. @@ -2148,7 +2147,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
  2771. kvmppc_start_thread(vcpu);
  2772. trace_kvm_guest_enter(vcpu);
  2773. } else if (vc->vcore_state == VCORE_SLEEPING) {
  2774. - wake_up(&vc->wq);
  2775. + swait_wake(&vc->wq);
  2776. }
  2777. }
  2778. diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
  2779. index b396868d2aa7..6d535973b200 100644
  2780. --- a/arch/powerpc/mm/fault.c
  2781. +++ b/arch/powerpc/mm/fault.c
  2782. @@ -33,13 +33,13 @@
  2783. #include <linux/ratelimit.h>
  2784. #include <linux/context_tracking.h>
  2785. #include <linux/hugetlb.h>
  2786. +#include <linux/uaccess.h>
  2787. #include <asm/firmware.h>
  2788. #include <asm/page.h>
  2789. #include <asm/pgtable.h>
  2790. #include <asm/mmu.h>
  2791. #include <asm/mmu_context.h>
  2792. -#include <asm/uaccess.h>
  2793. #include <asm/tlbflush.h>
  2794. #include <asm/siginfo.h>
  2795. #include <asm/debug.h>
  2796. @@ -272,15 +272,16 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
  2797. if (!arch_irq_disabled_regs(regs))
  2798. local_irq_enable();
  2799. - if (in_atomic() || mm == NULL) {
  2800. + if (faulthandler_disabled() || mm == NULL) {
  2801. if (!user_mode(regs)) {
  2802. rc = SIGSEGV;
  2803. goto bail;
  2804. }
  2805. - /* in_atomic() in user mode is really bad,
  2806. + /* faulthandler_disabled() in user mode is really bad,
  2807. as is current->mm == NULL. */
  2808. printk(KERN_EMERG "Page fault in user mode with "
  2809. - "in_atomic() = %d mm = %p\n", in_atomic(), mm);
  2810. + "faulthandler_disabled() = %d mm = %p\n",
  2811. + faulthandler_disabled(), mm);
  2812. printk(KERN_EMERG "NIP = %lx MSR = %lx\n",
  2813. regs->nip, regs->msr);
  2814. die("Weird page fault", regs, SIGSEGV);
  2815. diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c
  2816. index e7450bdbe83a..e292c8a60952 100644
  2817. --- a/arch/powerpc/mm/highmem.c
  2818. +++ b/arch/powerpc/mm/highmem.c
  2819. @@ -34,7 +34,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
  2820. unsigned long vaddr;
  2821. int idx, type;
  2822. - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
  2823. + preempt_disable();
  2824. pagefault_disable();
  2825. if (!PageHighMem(page))
  2826. return page_address(page);
  2827. @@ -59,6 +59,7 @@ void __kunmap_atomic(void *kvaddr)
  2828. if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
  2829. pagefault_enable();
  2830. + preempt_enable();
  2831. return;
  2832. }
  2833. @@ -82,5 +83,6 @@ void __kunmap_atomic(void *kvaddr)
  2834. kmap_atomic_idx_pop();
  2835. pagefault_enable();
  2836. + preempt_enable();
  2837. }
  2838. EXPORT_SYMBOL(__kunmap_atomic);
  2839. diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
  2840. index 3f175e8aedb4..c4c02f91904c 100644
  2841. --- a/arch/powerpc/platforms/ps3/device-init.c
  2842. +++ b/arch/powerpc/platforms/ps3/device-init.c
  2843. @@ -752,7 +752,7 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev,
  2844. }
  2845. pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op);
  2846. - res = wait_event_interruptible(dev->done.wait,
  2847. + res = swait_event_interruptible(dev->done.wait,
  2848. dev->done.done || kthread_should_stop());
  2849. if (kthread_should_stop())
  2850. res = -EINTR;
  2851. diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
  2852. index d01fc588b5c3..905007eead88 100644
  2853. --- a/arch/s390/include/asm/kvm_host.h
  2854. +++ b/arch/s390/include/asm/kvm_host.h
  2855. @@ -419,7 +419,7 @@ struct kvm_s390_irq_payload {
  2856. struct kvm_s390_local_interrupt {
  2857. spinlock_t lock;
  2858. struct kvm_s390_float_interrupt *float_int;
  2859. - wait_queue_head_t *wq;
  2860. + struct swait_head *wq;
  2861. atomic_t *cpuflags;
  2862. DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
  2863. struct kvm_s390_irq_payload irq;
  2864. diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
  2865. index f6ac1d7e7ed8..5c7381c5ad7f 100644
  2866. --- a/arch/s390/include/asm/uaccess.h
  2867. +++ b/arch/s390/include/asm/uaccess.h
  2868. @@ -98,7 +98,8 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x)
  2869. * @from: Source address, in user space.
  2870. * @n: Number of bytes to copy.
  2871. *
  2872. - * Context: User context only. This function may sleep.
  2873. + * Context: User context only. This function may sleep if pagefaults are
  2874. + * enabled.
  2875. *
  2876. * Copy data from user space to kernel space. Caller must check
  2877. * the specified block with access_ok() before calling this function.
  2878. @@ -118,7 +119,8 @@ unsigned long __must_check __copy_from_user(void *to, const void __user *from,
  2879. * @from: Source address, in kernel space.
  2880. * @n: Number of bytes to copy.
  2881. *
  2882. - * Context: User context only. This function may sleep.
  2883. + * Context: User context only. This function may sleep if pagefaults are
  2884. + * enabled.
  2885. *
  2886. * Copy data from kernel space to user space. Caller must check
  2887. * the specified block with access_ok() before calling this function.
  2888. @@ -264,7 +266,8 @@ int __get_user_bad(void) __attribute__((noreturn));
  2889. * @from: Source address, in kernel space.
  2890. * @n: Number of bytes to copy.
  2891. *
  2892. - * Context: User context only. This function may sleep.
  2893. + * Context: User context only. This function may sleep if pagefaults are
  2894. + * enabled.
  2895. *
  2896. * Copy data from kernel space to user space.
  2897. *
  2898. @@ -290,7 +293,8 @@ __compiletime_warning("copy_from_user() buffer size is not provably correct")
  2899. * @from: Source address, in user space.
  2900. * @n: Number of bytes to copy.
  2901. *
  2902. - * Context: User context only. This function may sleep.
  2903. + * Context: User context only. This function may sleep if pagefaults are
  2904. + * enabled.
  2905. *
  2906. * Copy data from user space to kernel space.
  2907. *
  2908. @@ -348,7 +352,8 @@ static inline unsigned long strnlen_user(const char __user *src, unsigned long n
  2909. * strlen_user: - Get the size of a string in user space.
  2910. * @str: The string to measure.
  2911. *
  2912. - * Context: User context only. This function may sleep.
  2913. + * Context: User context only. This function may sleep if pagefaults are
  2914. + * enabled.
  2915. *
  2916. * Get the size of a NUL-terminated string in user space.
  2917. *
  2918. diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
  2919. index 3dbba9a2bb0f..15016703b4bf 100644
  2920. --- a/arch/s390/kvm/interrupt.c
  2921. +++ b/arch/s390/kvm/interrupt.c
  2922. @@ -875,13 +875,13 @@ no_timer:
  2923. void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
  2924. {
  2925. - if (waitqueue_active(&vcpu->wq)) {
  2926. + if (swaitqueue_active(&vcpu->wq)) {
  2927. /*
  2928. * The vcpu gave up the cpu voluntarily, mark it as a good
  2929. * yield-candidate.
  2930. */
  2931. vcpu->preempted = true;
  2932. - wake_up_interruptible(&vcpu->wq);
  2933. + swait_wake_interruptible(&vcpu->wq);
  2934. vcpu->stat.halt_wakeup++;
  2935. }
  2936. }
  2937. @@ -987,7 +987,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
  2938. spin_lock(&li->lock);
  2939. irq.u.pgm.code = code;
  2940. __inject_prog(vcpu, &irq);
  2941. - BUG_ON(waitqueue_active(li->wq));
  2942. + BUG_ON(swaitqueue_active(li->wq));
  2943. spin_unlock(&li->lock);
  2944. return 0;
  2945. }
  2946. @@ -1006,7 +1006,7 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
  2947. spin_lock(&li->lock);
  2948. irq.u.pgm = *pgm_info;
  2949. rc = __inject_prog(vcpu, &irq);
  2950. - BUG_ON(waitqueue_active(li->wq));
  2951. + BUG_ON(swaitqueue_active(li->wq));
  2952. spin_unlock(&li->lock);
  2953. return rc;
  2954. }
  2955. diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
  2956. index 76515bcea2f1..4c8f5d7f9c23 100644
  2957. --- a/arch/s390/mm/fault.c
  2958. +++ b/arch/s390/mm/fault.c
  2959. @@ -399,7 +399,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
  2960. * user context.
  2961. */
  2962. fault = VM_FAULT_BADCONTEXT;
  2963. - if (unlikely(!user_space_fault(regs) || in_atomic() || !mm))
  2964. + if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm))
  2965. goto out;
  2966. address = trans_exc_code & __FAIL_ADDR_MASK;
  2967. diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h
  2968. index 69326dfb894d..01aec8ccde83 100644
  2969. --- a/arch/score/include/asm/uaccess.h
  2970. +++ b/arch/score/include/asm/uaccess.h
  2971. @@ -36,7 +36,8 @@
  2972. * @addr: User space pointer to start of block to check
  2973. * @size: Size of block to check
  2974. *
  2975. - * Context: User context only. This function may sleep.
  2976. + * Context: User context only. This function may sleep if pagefaults are
  2977. + * enabled.
  2978. *
  2979. * Checks if a pointer to a block of memory in user space is valid.
  2980. *
  2981. @@ -61,7 +62,8 @@
  2982. * @x: Value to copy to user space.
  2983. * @ptr: Destination address, in user space.
  2984. *
  2985. - * Context: User context only. This function may sleep.
  2986. + * Context: User context only. This function may sleep if pagefaults are
  2987. + * enabled.
  2988. *
  2989. * This macro copies a single simple value from kernel space to user
  2990. * space. It supports simple types like char and int, but not larger
  2991. @@ -79,7 +81,8 @@
  2992. * @x: Variable to store result.
  2993. * @ptr: Source address, in user space.
  2994. *
  2995. - * Context: User context only. This function may sleep.
  2996. + * Context: User context only. This function may sleep if pagefaults are
  2997. + * enabled.
  2998. *
  2999. * This macro copies a single simple variable from user space to kernel
  3000. * space. It supports simple types like char and int, but not larger
  3001. @@ -98,7 +101,8 @@
  3002. * @x: Value to copy to user space.
  3003. * @ptr: Destination address, in user space.
  3004. *
  3005. - * Context: User context only. This function may sleep.
  3006. + * Context: User context only. This function may sleep if pagefaults are
  3007. + * enabled.
  3008. *
  3009. * This macro copies a single simple value from kernel space to user
  3010. * space. It supports simple types like char and int, but not larger
  3011. @@ -119,7 +123,8 @@
  3012. * @x: Variable to store result.
  3013. * @ptr: Source address, in user space.
  3014. *
  3015. - * Context: User context only. This function may sleep.
  3016. + * Context: User context only. This function may sleep if pagefaults are
  3017. + * enabled.
  3018. *
  3019. * This macro copies a single simple variable from user space to kernel
  3020. * space. It supports simple types like char and int, but not larger
  3021. diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c
  3022. index 6860beb2a280..37a6c2e0e969 100644
  3023. --- a/arch/score/mm/fault.c
  3024. +++ b/arch/score/mm/fault.c
  3025. @@ -34,6 +34,7 @@
  3026. #include <linux/string.h>
  3027. #include <linux/types.h>
  3028. #include <linux/ptrace.h>
  3029. +#include <linux/uaccess.h>
  3030. /*
  3031. * This routine handles page faults. It determines the address,
  3032. @@ -73,7 +74,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
  3033. * If we're in an interrupt or have no user
  3034. * context, we must not take the fault..
  3035. */
  3036. - if (in_atomic() || !mm)
  3037. + if (pagefault_disabled() || !mm)
  3038. goto bad_area_nosemaphore;
  3039. if (user_mode(regs))
  3040. diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
  3041. index eb10ff84015c..6fe8089e63fa 100644
  3042. --- a/arch/sh/kernel/irq.c
  3043. +++ b/arch/sh/kernel/irq.c
  3044. @@ -147,6 +147,7 @@ void irq_ctx_exit(int cpu)
  3045. hardirq_ctx[cpu] = NULL;
  3046. }
  3047. +#ifndef CONFIG_PREEMPT_RT_FULL
  3048. void do_softirq_own_stack(void)
  3049. {
  3050. struct thread_info *curctx;
  3051. @@ -174,6 +175,7 @@ void do_softirq_own_stack(void)
  3052. "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
  3053. );
  3054. }
  3055. +#endif
  3056. #else
  3057. static inline void handle_one_irq(unsigned int irq)
  3058. {
  3059. diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
  3060. index a58fec9b55e0..79d8276377d1 100644
  3061. --- a/arch/sh/mm/fault.c
  3062. +++ b/arch/sh/mm/fault.c
  3063. @@ -17,6 +17,7 @@
  3064. #include <linux/kprobes.h>
  3065. #include <linux/perf_event.h>
  3066. #include <linux/kdebug.h>
  3067. +#include <linux/uaccess.h>
  3068. #include <asm/io_trapped.h>
  3069. #include <asm/mmu_context.h>
  3070. #include <asm/tlbflush.h>
  3071. @@ -438,9 +439,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
  3072. /*
  3073. * If we're in an interrupt, have no user context or are running
  3074. - * in an atomic region then we must not take the fault:
  3075. + * with pagefaults disabled then we must not take the fault:
  3076. */
  3077. - if (unlikely(in_atomic() || !mm)) {
  3078. + if (unlikely(faulthandler_disabled() || !mm)) {
  3079. bad_area_nosemaphore(regs, error_code, address);
  3080. return;
  3081. }
  3082. diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
  3083. index e49502acbab4..85cb0c621283 100644
  3084. --- a/arch/sparc/Kconfig
  3085. +++ b/arch/sparc/Kconfig
  3086. @@ -189,12 +189,10 @@ config NR_CPUS
  3087. source kernel/Kconfig.hz
  3088. config RWSEM_GENERIC_SPINLOCK
  3089. - bool
  3090. - default y if SPARC32
  3091. + def_bool PREEMPT_RT_FULL
  3092. config RWSEM_XCHGADD_ALGORITHM
  3093. - bool
  3094. - default y if SPARC64
  3095. + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
  3096. config GENERIC_HWEIGHT
  3097. bool
  3098. diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
  3099. index 4033c23bdfa6..763cd88b4e92 100644
  3100. --- a/arch/sparc/kernel/irq_64.c
  3101. +++ b/arch/sparc/kernel/irq_64.c
  3102. @@ -849,6 +849,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs)
  3103. set_irq_regs(old_regs);
  3104. }
  3105. +#ifndef CONFIG_PREEMPT_RT_FULL
  3106. void do_softirq_own_stack(void)
  3107. {
  3108. void *orig_sp, *sp = softirq_stack[smp_processor_id()];
  3109. @@ -863,6 +864,7 @@ void do_softirq_own_stack(void)
  3110. __asm__ __volatile__("mov %0, %%sp"
  3111. : : "r" (orig_sp));
  3112. }
  3113. +#endif
  3114. #ifdef CONFIG_HOTPLUG_CPU
  3115. void fixup_irqs(void)
  3116. diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
  3117. index 70d817154fe8..c399e7b3b035 100644
  3118. --- a/arch/sparc/mm/fault_32.c
  3119. +++ b/arch/sparc/mm/fault_32.c
  3120. @@ -21,6 +21,7 @@
  3121. #include <linux/perf_event.h>
  3122. #include <linux/interrupt.h>
  3123. #include <linux/kdebug.h>
  3124. +#include <linux/uaccess.h>
  3125. #include <asm/page.h>
  3126. #include <asm/pgtable.h>
  3127. @@ -29,7 +30,6 @@
  3128. #include <asm/setup.h>
  3129. #include <asm/smp.h>
  3130. #include <asm/traps.h>
  3131. -#include <asm/uaccess.h>
  3132. #include "mm_32.h"
  3133. @@ -196,7 +196,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
  3134. * If we're in an interrupt or have no user
  3135. * context, we must not take the fault..
  3136. */
  3137. - if (in_atomic() || !mm)
  3138. + if (pagefault_disabled() || !mm)
  3139. goto no_context;
  3140. perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
  3141. diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
  3142. index 479823249429..e9268ea1a68d 100644
  3143. --- a/arch/sparc/mm/fault_64.c
  3144. +++ b/arch/sparc/mm/fault_64.c
  3145. @@ -22,12 +22,12 @@
  3146. #include <linux/kdebug.h>
  3147. #include <linux/percpu.h>
  3148. #include <linux/context_tracking.h>
  3149. +#include <linux/uaccess.h>
  3150. #include <asm/page.h>
  3151. #include <asm/pgtable.h>
  3152. #include <asm/openprom.h>
  3153. #include <asm/oplib.h>
  3154. -#include <asm/uaccess.h>
  3155. #include <asm/asi.h>
  3156. #include <asm/lsu.h>
  3157. #include <asm/sections.h>
  3158. @@ -330,7 +330,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
  3159. * If we're in an interrupt or have no user
  3160. * context, we must not take the fault..
  3161. */
  3162. - if (in_atomic() || !mm)
  3163. + if (faulthandler_disabled() || !mm)
  3164. goto intr_or_no_mm;
  3165. perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
  3166. diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c
  3167. index 449f864f0cef..a454ec5ff07a 100644
  3168. --- a/arch/sparc/mm/highmem.c
  3169. +++ b/arch/sparc/mm/highmem.c
  3170. @@ -53,7 +53,7 @@ void *kmap_atomic(struct page *page)
  3171. unsigned long vaddr;
  3172. long idx, type;
  3173. - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
  3174. + preempt_disable();
  3175. pagefault_disable();
  3176. if (!PageHighMem(page))
  3177. return page_address(page);
  3178. @@ -91,6 +91,7 @@ void __kunmap_atomic(void *kvaddr)
  3179. if (vaddr < FIXADDR_START) { // FIXME
  3180. pagefault_enable();
  3181. + preempt_enable();
  3182. return;
  3183. }
  3184. @@ -126,5 +127,6 @@ void __kunmap_atomic(void *kvaddr)
  3185. kmap_atomic_idx_pop();
  3186. pagefault_enable();
  3187. + preempt_enable();
  3188. }
  3189. EXPORT_SYMBOL(__kunmap_atomic);
  3190. diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
  3191. index 1d71181dcc04..1a55c8481272 100644
  3192. --- a/arch/sparc/mm/init_64.c
  3193. +++ b/arch/sparc/mm/init_64.c
  3194. @@ -2738,7 +2738,7 @@ void hugetlb_setup(struct pt_regs *regs)
  3195. struct mm_struct *mm = current->mm;
  3196. struct tsb_config *tp;
  3197. - if (in_atomic() || !mm) {
  3198. + if (faulthandler_disabled() || !mm) {
  3199. const struct exception_table_entry *entry;
  3200. entry = search_exception_tables(regs->tpc);
  3201. diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
  3202. index f41cb53cf645..a33276bf5ca1 100644
  3203. --- a/arch/tile/include/asm/uaccess.h
  3204. +++ b/arch/tile/include/asm/uaccess.h
  3205. @@ -78,7 +78,8 @@ int __range_ok(unsigned long addr, unsigned long size);
  3206. * @addr: User space pointer to start of block to check
  3207. * @size: Size of block to check
  3208. *
  3209. - * Context: User context only. This function may sleep.
  3210. + * Context: User context only. This function may sleep if pagefaults are
  3211. + * enabled.
  3212. *
  3213. * Checks if a pointer to a block of memory in user space is valid.
  3214. *
  3215. @@ -192,7 +193,8 @@ extern int __get_user_bad(void)
  3216. * @x: Variable to store result.
  3217. * @ptr: Source address, in user space.
  3218. *
  3219. - * Context: User context only. This function may sleep.
  3220. + * Context: User context only. This function may sleep if pagefaults are
  3221. + * enabled.
  3222. *
  3223. * This macro copies a single simple variable from user space to kernel
  3224. * space. It supports simple types like char and int, but not larger
  3225. @@ -274,7 +276,8 @@ extern int __put_user_bad(void)
  3226. * @x: Value to copy to user space.
  3227. * @ptr: Destination address, in user space.
  3228. *
  3229. - * Context: User context only. This function may sleep.
  3230. + * Context: User context only. This function may sleep if pagefaults are
  3231. + * enabled.
  3232. *
  3233. * This macro copies a single simple value from kernel space to user
  3234. * space. It supports simple types like char and int, but not larger
  3235. @@ -330,7 +333,8 @@ extern int __put_user_bad(void)
  3236. * @from: Source address, in kernel space.
  3237. * @n: Number of bytes to copy.
  3238. *
  3239. - * Context: User context only. This function may sleep.
  3240. + * Context: User context only. This function may sleep if pagefaults are
  3241. + * enabled.
  3242. *
  3243. * Copy data from kernel space to user space. Caller must check
  3244. * the specified block with access_ok() before calling this function.
  3245. @@ -366,7 +370,8 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
  3246. * @from: Source address, in user space.
  3247. * @n: Number of bytes to copy.
  3248. *
  3249. - * Context: User context only. This function may sleep.
  3250. + * Context: User context only. This function may sleep if pagefaults are
  3251. + * enabled.
  3252. *
  3253. * Copy data from user space to kernel space. Caller must check
  3254. * the specified block with access_ok() before calling this function.
  3255. @@ -437,7 +442,8 @@ static inline unsigned long __must_check copy_from_user(void *to,
  3256. * @from: Source address, in user space.
  3257. * @n: Number of bytes to copy.
  3258. *
  3259. - * Context: User context only. This function may sleep.
  3260. + * Context: User context only. This function may sleep if pagefaults are
  3261. + * enabled.
  3262. *
  3263. * Copy data from user space to user space. Caller must check
  3264. * the specified blocks with access_ok() before calling this function.
  3265. diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
  3266. index e83cc999da02..3f4f58d34a92 100644
  3267. --- a/arch/tile/mm/fault.c
  3268. +++ b/arch/tile/mm/fault.c
  3269. @@ -354,9 +354,9 @@ static int handle_page_fault(struct pt_regs *regs,
  3270. /*
  3271. * If we're in an interrupt, have no user context or are running in an
  3272. - * atomic region then we must not take the fault.
  3273. + * region with pagefaults disabled then we must not take the fault.
  3274. */
  3275. - if (in_atomic() || !mm) {
  3276. + if (pagefault_disabled() || !mm) {
  3277. vma = NULL; /* happy compiler */
  3278. goto bad_area_nosemaphore;
  3279. }
  3280. diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c
  3281. index 6aa2f2625447..fcd545014e79 100644
  3282. --- a/arch/tile/mm/highmem.c
  3283. +++ b/arch/tile/mm/highmem.c
  3284. @@ -201,7 +201,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
  3285. int idx, type;
  3286. pte_t *pte;
  3287. - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
  3288. + preempt_disable();
  3289. pagefault_disable();
  3290. /* Avoid icache flushes by disallowing atomic executable mappings. */
  3291. @@ -259,6 +259,7 @@ void __kunmap_atomic(void *kvaddr)
  3292. }
  3293. pagefault_enable();
  3294. + preempt_enable();
  3295. }
  3296. EXPORT_SYMBOL(__kunmap_atomic);
  3297. diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
  3298. index 8e4daf44e980..f9c9e5a6beba 100644
  3299. --- a/arch/um/kernel/trap.c
  3300. +++ b/arch/um/kernel/trap.c
  3301. @@ -35,10 +35,10 @@ int handle_page_fault(unsigned long address, unsigned long ip,
  3302. *code_out = SEGV_MAPERR;
  3303. /*
  3304. - * If the fault was during atomic operation, don't take the fault, just
  3305. + * If the fault was with pagefaults disabled, don't take the fault, just
  3306. * fail.
  3307. */
  3308. - if (in_atomic())
  3309. + if (faulthandler_disabled())
  3310. goto out_nosemaphore;
  3311. if (is_user)
  3312. diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
  3313. index 0dc922dba915..afccef5529cc 100644
  3314. --- a/arch/unicore32/mm/fault.c
  3315. +++ b/arch/unicore32/mm/fault.c
  3316. @@ -218,7 +218,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
  3317. * If we're in an interrupt or have no user
  3318. * context, we must not take the fault..
  3319. */
  3320. - if (in_atomic() || !mm)
  3321. + if (faulthandler_disabled() || !mm)
  3322. goto no_context;
  3323. if (user_mode(regs))
  3324. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
  3325. index 226d5696e1d1..aac357a4cd5c 100644
  3326. --- a/arch/x86/Kconfig
  3327. +++ b/arch/x86/Kconfig
  3328. @@ -22,6 +22,7 @@ config X86_64
  3329. ### Arch settings
  3330. config X86
  3331. def_bool y
  3332. + select HAVE_PREEMPT_LAZY
  3333. select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
  3334. select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
  3335. select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
  3336. @@ -203,8 +204,11 @@ config ARCH_MAY_HAVE_PC_FDC
  3337. def_bool y
  3338. depends on ISA_DMA_API
  3339. +config RWSEM_GENERIC_SPINLOCK
  3340. + def_bool PREEMPT_RT_FULL
  3341. +
  3342. config RWSEM_XCHGADD_ALGORITHM
  3343. - def_bool y
  3344. + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
  3345. config GENERIC_CALIBRATE_DELAY
  3346. def_bool y
  3347. @@ -838,7 +842,7 @@ config IOMMU_HELPER
  3348. config MAXSMP
  3349. bool "Enable Maximum number of SMP Processors and NUMA Nodes"
  3350. depends on X86_64 && SMP && DEBUG_KERNEL
  3351. - select CPUMASK_OFFSTACK
  3352. + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
  3353. ---help---
  3354. Enable maximum number of CPUS and NUMA Nodes for this architecture.
  3355. If unsure, say N.
  3356. diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
  3357. index 112cefacf2af..3fd3b16349ae 100644
  3358. --- a/arch/x86/crypto/aesni-intel_glue.c
  3359. +++ b/arch/x86/crypto/aesni-intel_glue.c
  3360. @@ -382,14 +382,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc,
  3361. err = blkcipher_walk_virt(desc, &walk);
  3362. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  3363. - kernel_fpu_begin();
  3364. while ((nbytes = walk.nbytes)) {
  3365. + kernel_fpu_begin();
  3366. aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  3367. - nbytes & AES_BLOCK_MASK);
  3368. + nbytes & AES_BLOCK_MASK);
  3369. + kernel_fpu_end();
  3370. nbytes &= AES_BLOCK_SIZE - 1;
  3371. err = blkcipher_walk_done(desc, &walk, nbytes);
  3372. }
  3373. - kernel_fpu_end();
  3374. return err;
  3375. }
  3376. @@ -406,14 +406,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc,
  3377. err = blkcipher_walk_virt(desc, &walk);
  3378. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  3379. - kernel_fpu_begin();
  3380. while ((nbytes = walk.nbytes)) {
  3381. + kernel_fpu_begin();
  3382. aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  3383. nbytes & AES_BLOCK_MASK);
  3384. + kernel_fpu_end();
  3385. nbytes &= AES_BLOCK_SIZE - 1;
  3386. err = blkcipher_walk_done(desc, &walk, nbytes);
  3387. }
  3388. - kernel_fpu_end();
  3389. return err;
  3390. }
  3391. @@ -430,14 +430,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc,
  3392. err = blkcipher_walk_virt(desc, &walk);
  3393. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  3394. - kernel_fpu_begin();
  3395. while ((nbytes = walk.nbytes)) {
  3396. + kernel_fpu_begin();
  3397. aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  3398. nbytes & AES_BLOCK_MASK, walk.iv);
  3399. + kernel_fpu_end();
  3400. nbytes &= AES_BLOCK_SIZE - 1;
  3401. err = blkcipher_walk_done(desc, &walk, nbytes);
  3402. }
  3403. - kernel_fpu_end();
  3404. return err;
  3405. }
  3406. @@ -454,14 +454,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc,
  3407. err = blkcipher_walk_virt(desc, &walk);
  3408. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  3409. - kernel_fpu_begin();
  3410. while ((nbytes = walk.nbytes)) {
  3411. + kernel_fpu_begin();
  3412. aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  3413. nbytes & AES_BLOCK_MASK, walk.iv);
  3414. + kernel_fpu_end();
  3415. nbytes &= AES_BLOCK_SIZE - 1;
  3416. err = blkcipher_walk_done(desc, &walk, nbytes);
  3417. }
  3418. - kernel_fpu_end();
  3419. return err;
  3420. }
  3421. @@ -513,18 +513,20 @@ static int ctr_crypt(struct blkcipher_desc *desc,
  3422. err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
  3423. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  3424. - kernel_fpu_begin();
  3425. while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
  3426. + kernel_fpu_begin();
  3427. aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  3428. nbytes & AES_BLOCK_MASK, walk.iv);
  3429. + kernel_fpu_end();
  3430. nbytes &= AES_BLOCK_SIZE - 1;
  3431. err = blkcipher_walk_done(desc, &walk, nbytes);
  3432. }
  3433. if (walk.nbytes) {
  3434. + kernel_fpu_begin();
  3435. ctr_crypt_final(ctx, &walk);
  3436. + kernel_fpu_end();
  3437. err = blkcipher_walk_done(desc, &walk, 0);
  3438. }
  3439. - kernel_fpu_end();
  3440. return err;
  3441. }
  3442. diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
  3443. index 236c80974457..f799ec36bfa7 100644
  3444. --- a/arch/x86/crypto/cast5_avx_glue.c
  3445. +++ b/arch/x86/crypto/cast5_avx_glue.c
  3446. @@ -60,7 +60,7 @@ static inline void cast5_fpu_end(bool fpu_enabled)
  3447. static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
  3448. bool enc)
  3449. {
  3450. - bool fpu_enabled = false;
  3451. + bool fpu_enabled;
  3452. struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  3453. const unsigned int bsize = CAST5_BLOCK_SIZE;
  3454. unsigned int nbytes;
  3455. @@ -76,7 +76,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
  3456. u8 *wsrc = walk->src.virt.addr;
  3457. u8 *wdst = walk->dst.virt.addr;
  3458. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  3459. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  3460. /* Process multi-block batch */
  3461. if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
  3462. @@ -104,10 +104,9 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
  3463. } while (nbytes >= bsize);
  3464. done:
  3465. + cast5_fpu_end(fpu_enabled);
  3466. err = blkcipher_walk_done(desc, walk, nbytes);
  3467. }
  3468. -
  3469. - cast5_fpu_end(fpu_enabled);
  3470. return err;
  3471. }
  3472. @@ -228,7 +227,7 @@ done:
  3473. static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  3474. struct scatterlist *src, unsigned int nbytes)
  3475. {
  3476. - bool fpu_enabled = false;
  3477. + bool fpu_enabled;
  3478. struct blkcipher_walk walk;
  3479. int err;
  3480. @@ -237,12 +236,11 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  3481. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  3482. while ((nbytes = walk.nbytes)) {
  3483. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  3484. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  3485. nbytes = __cbc_decrypt(desc, &walk);
  3486. + cast5_fpu_end(fpu_enabled);
  3487. err = blkcipher_walk_done(desc, &walk, nbytes);
  3488. }
  3489. -
  3490. - cast5_fpu_end(fpu_enabled);
  3491. return err;
  3492. }
  3493. @@ -312,7 +310,7 @@ done:
  3494. static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  3495. struct scatterlist *src, unsigned int nbytes)
  3496. {
  3497. - bool fpu_enabled = false;
  3498. + bool fpu_enabled;
  3499. struct blkcipher_walk walk;
  3500. int err;
  3501. @@ -321,13 +319,12 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  3502. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  3503. while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
  3504. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  3505. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  3506. nbytes = __ctr_crypt(desc, &walk);
  3507. + cast5_fpu_end(fpu_enabled);
  3508. err = blkcipher_walk_done(desc, &walk, nbytes);
  3509. }
  3510. - cast5_fpu_end(fpu_enabled);
  3511. -
  3512. if (walk.nbytes) {
  3513. ctr_crypt_final(desc, &walk);
  3514. err = blkcipher_walk_done(desc, &walk, 0);
  3515. diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
  3516. index 6a85598931b5..3a506ce7ed93 100644
  3517. --- a/arch/x86/crypto/glue_helper.c
  3518. +++ b/arch/x86/crypto/glue_helper.c
  3519. @@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
  3520. void *ctx = crypto_blkcipher_ctx(desc->tfm);
  3521. const unsigned int bsize = 128 / 8;
  3522. unsigned int nbytes, i, func_bytes;
  3523. - bool fpu_enabled = false;
  3524. + bool fpu_enabled;
  3525. int err;
  3526. err = blkcipher_walk_virt(desc, walk);
  3527. @@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
  3528. u8 *wdst = walk->dst.virt.addr;
  3529. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  3530. - desc, fpu_enabled, nbytes);
  3531. + desc, false, nbytes);
  3532. for (i = 0; i < gctx->num_funcs; i++) {
  3533. func_bytes = bsize * gctx->funcs[i].num_blocks;
  3534. @@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
  3535. }
  3536. done:
  3537. + glue_fpu_end(fpu_enabled);
  3538. err = blkcipher_walk_done(desc, walk, nbytes);
  3539. }
  3540. - glue_fpu_end(fpu_enabled);
  3541. return err;
  3542. }
  3543. @@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
  3544. struct scatterlist *src, unsigned int nbytes)
  3545. {
  3546. const unsigned int bsize = 128 / 8;
  3547. - bool fpu_enabled = false;
  3548. + bool fpu_enabled;
  3549. struct blkcipher_walk walk;
  3550. int err;
  3551. @@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
  3552. while ((nbytes = walk.nbytes)) {
  3553. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  3554. - desc, fpu_enabled, nbytes);
  3555. + desc, false, nbytes);
  3556. nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
  3557. + glue_fpu_end(fpu_enabled);
  3558. err = blkcipher_walk_done(desc, &walk, nbytes);
  3559. }
  3560. - glue_fpu_end(fpu_enabled);
  3561. return err;
  3562. }
  3563. EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
  3564. @@ -277,7 +277,7 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
  3565. struct scatterlist *src, unsigned int nbytes)
  3566. {
  3567. const unsigned int bsize = 128 / 8;
  3568. - bool fpu_enabled = false;
  3569. + bool fpu_enabled;
  3570. struct blkcipher_walk walk;
  3571. int err;
  3572. @@ -286,13 +286,12 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
  3573. while ((nbytes = walk.nbytes) >= bsize) {
  3574. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  3575. - desc, fpu_enabled, nbytes);
  3576. + desc, false, nbytes);
  3577. nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
  3578. + glue_fpu_end(fpu_enabled);
  3579. err = blkcipher_walk_done(desc, &walk, nbytes);
  3580. }
  3581. - glue_fpu_end(fpu_enabled);
  3582. -
  3583. if (walk.nbytes) {
  3584. glue_ctr_crypt_final_128bit(
  3585. gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
  3586. @@ -347,7 +346,7 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
  3587. void *tweak_ctx, void *crypt_ctx)
  3588. {
  3589. const unsigned int bsize = 128 / 8;
  3590. - bool fpu_enabled = false;
  3591. + bool fpu_enabled;
  3592. struct blkcipher_walk walk;
  3593. int err;
  3594. @@ -360,21 +359,21 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
  3595. /* set minimum length to bsize, for tweak_fn */
  3596. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  3597. - desc, fpu_enabled,
  3598. + desc, false,
  3599. nbytes < bsize ? bsize : nbytes);
  3600. -
  3601. /* calculate first value of T */
  3602. tweak_fn(tweak_ctx, walk.iv, walk.iv);
  3603. + glue_fpu_end(fpu_enabled);
  3604. while (nbytes) {
  3605. + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  3606. + desc, false, nbytes);
  3607. nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
  3608. + glue_fpu_end(fpu_enabled);
  3609. err = blkcipher_walk_done(desc, &walk, nbytes);
  3610. nbytes = walk.nbytes;
  3611. }
  3612. -
  3613. - glue_fpu_end(fpu_enabled);
  3614. -
  3615. return err;
  3616. }
  3617. EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
  3618. diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
  3619. index 67b6cd00a44f..eff1b8609f77 100644
  3620. --- a/arch/x86/include/asm/preempt.h
  3621. +++ b/arch/x86/include/asm/preempt.h
  3622. @@ -82,17 +82,46 @@ static __always_inline void __preempt_count_sub(int val)
  3623. * a decrement which hits zero means we have no preempt_count and should
  3624. * reschedule.
  3625. */
  3626. -static __always_inline bool __preempt_count_dec_and_test(void)
  3627. +static __always_inline bool ____preempt_count_dec_and_test(void)
  3628. {
  3629. GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
  3630. }
  3631. +static __always_inline bool __preempt_count_dec_and_test(void)
  3632. +{
  3633. + if (____preempt_count_dec_and_test())
  3634. + return true;
  3635. +#ifdef CONFIG_PREEMPT_LAZY
  3636. + if (current_thread_info()->preempt_lazy_count)
  3637. + return false;
  3638. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  3639. +#else
  3640. + return false;
  3641. +#endif
  3642. +}
  3643. +
  3644. /*
  3645. * Returns true when we need to resched and can (barring IRQ state).
  3646. */
  3647. static __always_inline bool should_resched(int preempt_offset)
  3648. {
  3649. +#ifdef CONFIG_PREEMPT_LAZY
  3650. + u32 tmp;
  3651. +
  3652. + tmp = raw_cpu_read_4(__preempt_count);
  3653. + if (tmp == preempt_offset)
  3654. + return true;
  3655. +
  3656. + /* preempt count == 0 ? */
  3657. + tmp &= ~PREEMPT_NEED_RESCHED;
  3658. + if (tmp)
  3659. + return false;
  3660. + if (current_thread_info()->preempt_lazy_count)
  3661. + return false;
  3662. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  3663. +#else
  3664. return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
  3665. +#endif
  3666. }
  3667. #ifdef CONFIG_PREEMPT
  3668. diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
  3669. index 31eab867e6d3..0e7bfe98e1d1 100644
  3670. --- a/arch/x86/include/asm/signal.h
  3671. +++ b/arch/x86/include/asm/signal.h
  3672. @@ -23,6 +23,19 @@ typedef struct {
  3673. unsigned long sig[_NSIG_WORDS];
  3674. } sigset_t;
  3675. +/*
  3676. + * Because some traps use the IST stack, we must keep preemption
  3677. + * disabled while calling do_trap(), but do_trap() may call
  3678. + * force_sig_info() which will grab the signal spin_locks for the
  3679. + * task, which in PREEMPT_RT_FULL are mutexes. By defining
  3680. + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
  3681. + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
  3682. + * trap.
  3683. + */
  3684. +#if defined(CONFIG_PREEMPT_RT_FULL)
  3685. +#define ARCH_RT_DELAYS_SIGNAL_SEND
  3686. +#endif
  3687. +
  3688. #ifndef CONFIG_COMPAT
  3689. typedef sigset_t compat_sigset_t;
  3690. #endif
  3691. diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
  3692. index 6a998598f172..64fb5cbe54fa 100644
  3693. --- a/arch/x86/include/asm/stackprotector.h
  3694. +++ b/arch/x86/include/asm/stackprotector.h
  3695. @@ -57,7 +57,7 @@
  3696. */
  3697. static __always_inline void boot_init_stack_canary(void)
  3698. {
  3699. - u64 canary;
  3700. + u64 uninitialized_var(canary);
  3701. u64 tsc;
  3702. #ifdef CONFIG_X86_64
  3703. @@ -68,8 +68,16 @@ static __always_inline void boot_init_stack_canary(void)
  3704. * of randomness. The TSC only matters for very early init,
  3705. * there it already has some randomness on most systems. Later
  3706. * on during the bootup the random pool has true entropy too.
  3707. + *
  3708. + * For preempt-rt we need to weaken the randomness a bit, as
  3709. + * we can't call into the random generator from atomic context
  3710. + * due to locking constraints. We just leave canary
  3711. + * uninitialized and use the TSC based randomness on top of
  3712. + * it.
  3713. */
  3714. +#ifndef CONFIG_PREEMPT_RT_FULL
  3715. get_random_bytes(&canary, sizeof(canary));
  3716. +#endif
  3717. tsc = __native_read_tsc();
  3718. canary += tsc + (tsc << 32UL);
  3719. diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
  3720. index b4bdec3e9523..606144afb990 100644
  3721. --- a/arch/x86/include/asm/thread_info.h
  3722. +++ b/arch/x86/include/asm/thread_info.h
  3723. @@ -55,6 +55,8 @@ struct thread_info {
  3724. __u32 status; /* thread synchronous flags */
  3725. __u32 cpu; /* current CPU */
  3726. int saved_preempt_count;
  3727. + int preempt_lazy_count; /* 0 => lazy preemptable
  3728. + <0 => BUG */
  3729. mm_segment_t addr_limit;
  3730. void __user *sysenter_return;
  3731. unsigned int sig_on_uaccess_error:1;
  3732. @@ -95,6 +97,7 @@ struct thread_info {
  3733. #define TIF_SYSCALL_EMU 6 /* syscall emulation active */
  3734. #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
  3735. #define TIF_SECCOMP 8 /* secure computing */
  3736. +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
  3737. #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
  3738. #define TIF_UPROBE 12 /* breakpointed or singlestepping */
  3739. #define TIF_NOTSC 16 /* TSC is not accessible in userland */
  3740. @@ -119,6 +122,7 @@ struct thread_info {
  3741. #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
  3742. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  3743. #define _TIF_SECCOMP (1 << TIF_SECCOMP)
  3744. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  3745. #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
  3746. #define _TIF_UPROBE (1 << TIF_UPROBE)
  3747. #define _TIF_NOTSC (1 << TIF_NOTSC)
  3748. @@ -168,6 +172,8 @@ struct thread_info {
  3749. #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
  3750. #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
  3751. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  3752. +
  3753. #define STACK_WARN (THREAD_SIZE/8)
  3754. /*
  3755. diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
  3756. index d081e7e42fb3..705e3d89d84d 100644
  3757. --- a/arch/x86/include/asm/uaccess.h
  3758. +++ b/arch/x86/include/asm/uaccess.h
  3759. @@ -74,7 +74,8 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
  3760. * @addr: User space pointer to start of block to check
  3761. * @size: Size of block to check
  3762. *
  3763. - * Context: User context only. This function may sleep.
  3764. + * Context: User context only. This function may sleep if pagefaults are
  3765. + * enabled.
  3766. *
  3767. * Checks if a pointer to a block of memory in user space is valid.
  3768. *
  3769. @@ -145,7 +146,8 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
  3770. * @x: Variable to store result.
  3771. * @ptr: Source address, in user space.
  3772. *
  3773. - * Context: User context only. This function may sleep.
  3774. + * Context: User context only. This function may sleep if pagefaults are
  3775. + * enabled.
  3776. *
  3777. * This macro copies a single simple variable from user space to kernel
  3778. * space. It supports simple types like char and int, but not larger
  3779. @@ -240,7 +242,8 @@ extern void __put_user_8(void);
  3780. * @x: Value to copy to user space.
  3781. * @ptr: Destination address, in user space.
  3782. *
  3783. - * Context: User context only. This function may sleep.
  3784. + * Context: User context only. This function may sleep if pagefaults are
  3785. + * enabled.
  3786. *
  3787. * This macro copies a single simple value from kernel space to user
  3788. * space. It supports simple types like char and int, but not larger
  3789. @@ -459,7 +462,8 @@ struct __large_struct { unsigned long buf[100]; };
  3790. * @x: Variable to store result.
  3791. * @ptr: Source address, in user space.
  3792. *
  3793. - * Context: User context only. This function may sleep.
  3794. + * Context: User context only. This function may sleep if pagefaults are
  3795. + * enabled.
  3796. *
  3797. * This macro copies a single simple variable from user space to kernel
  3798. * space. It supports simple types like char and int, but not larger
  3799. @@ -483,7 +487,8 @@ struct __large_struct { unsigned long buf[100]; };
  3800. * @x: Value to copy to user space.
  3801. * @ptr: Destination address, in user space.
  3802. *
  3803. - * Context: User context only. This function may sleep.
  3804. + * Context: User context only. This function may sleep if pagefaults are
  3805. + * enabled.
  3806. *
  3807. * This macro copies a single simple value from kernel space to user
  3808. * space. It supports simple types like char and int, but not larger
  3809. diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
  3810. index 3c03a5de64d3..7c8ad3451988 100644
  3811. --- a/arch/x86/include/asm/uaccess_32.h
  3812. +++ b/arch/x86/include/asm/uaccess_32.h
  3813. @@ -70,7 +70,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
  3814. * @from: Source address, in kernel space.
  3815. * @n: Number of bytes to copy.
  3816. *
  3817. - * Context: User context only. This function may sleep.
  3818. + * Context: User context only. This function may sleep if pagefaults are
  3819. + * enabled.
  3820. *
  3821. * Copy data from kernel space to user space. Caller must check
  3822. * the specified block with access_ok() before calling this function.
  3823. @@ -117,7 +118,8 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
  3824. * @from: Source address, in user space.
  3825. * @n: Number of bytes to copy.
  3826. *
  3827. - * Context: User context only. This function may sleep.
  3828. + * Context: User context only. This function may sleep if pagefaults are
  3829. + * enabled.
  3830. *
  3831. * Copy data from user space to kernel space. Caller must check
  3832. * the specified block with access_ok() before calling this function.
  3833. diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
  3834. index fc808b83fccb..ebb40118abf5 100644
  3835. --- a/arch/x86/include/asm/uv/uv_bau.h
  3836. +++ b/arch/x86/include/asm/uv/uv_bau.h
  3837. @@ -615,9 +615,9 @@ struct bau_control {
  3838. cycles_t send_message;
  3839. cycles_t period_end;
  3840. cycles_t period_time;
  3841. - spinlock_t uvhub_lock;
  3842. - spinlock_t queue_lock;
  3843. - spinlock_t disable_lock;
  3844. + raw_spinlock_t uvhub_lock;
  3845. + raw_spinlock_t queue_lock;
  3846. + raw_spinlock_t disable_lock;
  3847. /* tunables */
  3848. int max_concurr;
  3849. int max_concurr_const;
  3850. @@ -776,15 +776,15 @@ static inline int atom_asr(short i, struct atomic_short *v)
  3851. * to be lowered below the current 'v'. atomic_add_unless can only stop
  3852. * on equal.
  3853. */
  3854. -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
  3855. +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u)
  3856. {
  3857. - spin_lock(lock);
  3858. + raw_spin_lock(lock);
  3859. if (atomic_read(v) >= u) {
  3860. - spin_unlock(lock);
  3861. + raw_spin_unlock(lock);
  3862. return 0;
  3863. }
  3864. atomic_inc(v);
  3865. - spin_unlock(lock);
  3866. + raw_spin_unlock(lock);
  3867. return 1;
  3868. }
  3869. diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
  3870. index a00ad8f2a657..c2729abe02bc 100644
  3871. --- a/arch/x86/include/asm/uv/uv_hub.h
  3872. +++ b/arch/x86/include/asm/uv/uv_hub.h
  3873. @@ -492,7 +492,7 @@ struct uv_blade_info {
  3874. unsigned short nr_online_cpus;
  3875. unsigned short pnode;
  3876. short memory_nid;
  3877. - spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
  3878. + raw_spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
  3879. unsigned long nmi_count; /* obsolete, see uv_hub_nmi */
  3880. };
  3881. extern struct uv_blade_info *uv_blade_info;
  3882. diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
  3883. index f4dc2462a1ac..07c6aba75aa0 100644
  3884. --- a/arch/x86/kernel/apic/io_apic.c
  3885. +++ b/arch/x86/kernel/apic/io_apic.c
  3886. @@ -1891,7 +1891,8 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
  3887. static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
  3888. {
  3889. /* If we are moving the irq we need to mask it */
  3890. - if (unlikely(irqd_is_setaffinity_pending(data))) {
  3891. + if (unlikely(irqd_is_setaffinity_pending(data) &&
  3892. + !irqd_irq_inprogress(data))) {
  3893. mask_ioapic(cfg);
  3894. return true;
  3895. }
  3896. diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
  3897. index c8d92950bc04..3d2fbca33b73 100644
  3898. --- a/arch/x86/kernel/apic/x2apic_uv_x.c
  3899. +++ b/arch/x86/kernel/apic/x2apic_uv_x.c
  3900. @@ -949,7 +949,7 @@ void __init uv_system_init(void)
  3901. uv_blade_info[blade].pnode = pnode;
  3902. uv_blade_info[blade].nr_possible_cpus = 0;
  3903. uv_blade_info[blade].nr_online_cpus = 0;
  3904. - spin_lock_init(&uv_blade_info[blade].nmi_lock);
  3905. + raw_spin_lock_init(&uv_blade_info[blade].nmi_lock);
  3906. min_pnode = min(pnode, min_pnode);
  3907. max_pnode = max(pnode, max_pnode);
  3908. blade++;
  3909. diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
  3910. index 9f6b9341950f..5701b507510b 100644
  3911. --- a/arch/x86/kernel/asm-offsets.c
  3912. +++ b/arch/x86/kernel/asm-offsets.c
  3913. @@ -32,6 +32,7 @@ void common(void) {
  3914. OFFSET(TI_flags, thread_info, flags);
  3915. OFFSET(TI_status, thread_info, status);
  3916. OFFSET(TI_addr_limit, thread_info, addr_limit);
  3917. + OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count);
  3918. BLANK();
  3919. OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
  3920. @@ -71,4 +72,5 @@ void common(void) {
  3921. BLANK();
  3922. DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
  3923. + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED);
  3924. }
  3925. diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
  3926. index 20190bdac9d5..9d46f9a133e1 100644
  3927. --- a/arch/x86/kernel/cpu/mcheck/mce.c
  3928. +++ b/arch/x86/kernel/cpu/mcheck/mce.c
  3929. @@ -41,6 +41,8 @@
  3930. #include <linux/debugfs.h>
  3931. #include <linux/irq_work.h>
  3932. #include <linux/export.h>
  3933. +#include <linux/jiffies.h>
  3934. +#include <linux/work-simple.h>
  3935. #include <asm/processor.h>
  3936. #include <asm/traps.h>
  3937. @@ -1267,7 +1269,7 @@ void mce_log_therm_throt_event(__u64 status)
  3938. static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
  3939. static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
  3940. -static DEFINE_PER_CPU(struct timer_list, mce_timer);
  3941. +static DEFINE_PER_CPU(struct hrtimer, mce_timer);
  3942. static unsigned long mce_adjust_timer_default(unsigned long interval)
  3943. {
  3944. @@ -1276,32 +1278,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
  3945. static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
  3946. -static void __restart_timer(struct timer_list *t, unsigned long interval)
  3947. +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval)
  3948. {
  3949. - unsigned long when = jiffies + interval;
  3950. - unsigned long flags;
  3951. -
  3952. - local_irq_save(flags);
  3953. -
  3954. - if (timer_pending(t)) {
  3955. - if (time_before(when, t->expires))
  3956. - mod_timer_pinned(t, when);
  3957. - } else {
  3958. - t->expires = round_jiffies(when);
  3959. - add_timer_on(t, smp_processor_id());
  3960. - }
  3961. -
  3962. - local_irq_restore(flags);
  3963. + if (!interval)
  3964. + return HRTIMER_NORESTART;
  3965. + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval)));
  3966. + return HRTIMER_RESTART;
  3967. }
  3968. -static void mce_timer_fn(unsigned long data)
  3969. +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
  3970. {
  3971. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  3972. - int cpu = smp_processor_id();
  3973. unsigned long iv;
  3974. - WARN_ON(cpu != data);
  3975. -
  3976. iv = __this_cpu_read(mce_next_interval);
  3977. if (mce_available(this_cpu_ptr(&cpu_info))) {
  3978. @@ -1324,7 +1312,7 @@ static void mce_timer_fn(unsigned long data)
  3979. done:
  3980. __this_cpu_write(mce_next_interval, iv);
  3981. - __restart_timer(t, iv);
  3982. + return __restart_timer(timer, iv);
  3983. }
  3984. /*
  3985. @@ -1332,7 +1320,7 @@ done:
  3986. */
  3987. void mce_timer_kick(unsigned long interval)
  3988. {
  3989. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  3990. + struct hrtimer *t = this_cpu_ptr(&mce_timer);
  3991. unsigned long iv = __this_cpu_read(mce_next_interval);
  3992. __restart_timer(t, interval);
  3993. @@ -1347,7 +1335,7 @@ static void mce_timer_delete_all(void)
  3994. int cpu;
  3995. for_each_online_cpu(cpu)
  3996. - del_timer_sync(&per_cpu(mce_timer, cpu));
  3997. + hrtimer_cancel(&per_cpu(mce_timer, cpu));
  3998. }
  3999. static void mce_do_trigger(struct work_struct *work)
  4000. @@ -1357,6 +1345,56 @@ static void mce_do_trigger(struct work_struct *work)
  4001. static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
  4002. +static void __mce_notify_work(struct swork_event *event)
  4003. +{
  4004. + /* Not more than two messages every minute */
  4005. + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
  4006. +
  4007. + /* wake processes polling /dev/mcelog */
  4008. + wake_up_interruptible(&mce_chrdev_wait);
  4009. +
  4010. + /*
  4011. + * There is no risk of missing notifications because
  4012. + * work_pending is always cleared before the function is
  4013. + * executed.
  4014. + */
  4015. + if (mce_helper[0] && !work_pending(&mce_trigger_work))
  4016. + schedule_work(&mce_trigger_work);
  4017. +
  4018. + if (__ratelimit(&ratelimit))
  4019. + pr_info(HW_ERR "Machine check events logged\n");
  4020. +}
  4021. +
  4022. +#ifdef CONFIG_PREEMPT_RT_FULL
  4023. +static bool notify_work_ready __read_mostly;
  4024. +static struct swork_event notify_work;
  4025. +
  4026. +static int mce_notify_work_init(void)
  4027. +{
  4028. + int err;
  4029. +
  4030. + err = swork_get();
  4031. + if (err)
  4032. + return err;
  4033. +
  4034. + INIT_SWORK(&notify_work, __mce_notify_work);
  4035. + notify_work_ready = true;
  4036. + return 0;
  4037. +}
  4038. +
  4039. +static void mce_notify_work(void)
  4040. +{
  4041. + if (notify_work_ready)
  4042. + swork_queue(&notify_work);
  4043. +}
  4044. +#else
  4045. +static void mce_notify_work(void)
  4046. +{
  4047. + __mce_notify_work(NULL);
  4048. +}
  4049. +static inline int mce_notify_work_init(void) { return 0; }
  4050. +#endif
  4051. +
  4052. /*
  4053. * Notify the user(s) about new machine check events.
  4054. * Can be called from interrupt context, but not from machine check/NMI
  4055. @@ -1364,19 +1402,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
  4056. */
  4057. int mce_notify_irq(void)
  4058. {
  4059. - /* Not more than two messages every minute */
  4060. - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
  4061. -
  4062. if (test_and_clear_bit(0, &mce_need_notify)) {
  4063. - /* wake processes polling /dev/mcelog */
  4064. - wake_up_interruptible(&mce_chrdev_wait);
  4065. -
  4066. - if (mce_helper[0])
  4067. - schedule_work(&mce_trigger_work);
  4068. -
  4069. - if (__ratelimit(&ratelimit))
  4070. - pr_info(HW_ERR "Machine check events logged\n");
  4071. -
  4072. + mce_notify_work();
  4073. return 1;
  4074. }
  4075. return 0;
  4076. @@ -1649,7 +1676,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
  4077. }
  4078. }
  4079. -static void mce_start_timer(unsigned int cpu, struct timer_list *t)
  4080. +static void mce_start_timer(unsigned int cpu, struct hrtimer *t)
  4081. {
  4082. unsigned long iv = check_interval * HZ;
  4083. @@ -1658,16 +1685,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
  4084. per_cpu(mce_next_interval, cpu) = iv;
  4085. - t->expires = round_jiffies(jiffies + iv);
  4086. - add_timer_on(t, cpu);
  4087. + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL),
  4088. + 0, HRTIMER_MODE_REL_PINNED);
  4089. }
  4090. static void __mcheck_cpu_init_timer(void)
  4091. {
  4092. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  4093. + struct hrtimer *t = this_cpu_ptr(&mce_timer);
  4094. unsigned int cpu = smp_processor_id();
  4095. - setup_timer(t, mce_timer_fn, cpu);
  4096. + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  4097. + t->function = mce_timer_fn;
  4098. mce_start_timer(cpu, t);
  4099. }
  4100. @@ -2345,6 +2373,8 @@ static void mce_disable_cpu(void *h)
  4101. if (!mce_available(raw_cpu_ptr(&cpu_info)))
  4102. return;
  4103. + hrtimer_cancel(this_cpu_ptr(&mce_timer));
  4104. +
  4105. if (!(action & CPU_TASKS_FROZEN))
  4106. cmci_clear();
  4107. for (i = 0; i < mca_cfg.banks; i++) {
  4108. @@ -2371,6 +2401,7 @@ static void mce_reenable_cpu(void *h)
  4109. if (b->init)
  4110. wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
  4111. }
  4112. + __mcheck_cpu_init_timer();
  4113. }
  4114. /* Get notified when a cpu comes on/off. Be hotplug friendly. */
  4115. @@ -2378,7 +2409,6 @@ static int
  4116. mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
  4117. {
  4118. unsigned int cpu = (unsigned long)hcpu;
  4119. - struct timer_list *t = &per_cpu(mce_timer, cpu);
  4120. switch (action & ~CPU_TASKS_FROZEN) {
  4121. case CPU_ONLINE:
  4122. @@ -2398,11 +2428,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
  4123. break;
  4124. case CPU_DOWN_PREPARE:
  4125. smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
  4126. - del_timer_sync(t);
  4127. break;
  4128. case CPU_DOWN_FAILED:
  4129. smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
  4130. - mce_start_timer(cpu, t);
  4131. break;
  4132. }
  4133. @@ -2441,6 +2469,10 @@ static __init int mcheck_init_device(void)
  4134. goto err_out;
  4135. }
  4136. + err = mce_notify_work_init();
  4137. + if (err)
  4138. + goto err_out;
  4139. +
  4140. if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) {
  4141. err = -ENOMEM;
  4142. goto err_out;
  4143. diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
  4144. index 358c54ad20d4..94689f19ad92 100644
  4145. --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
  4146. +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
  4147. @@ -119,7 +119,7 @@ static struct perf_pmu_events_attr event_attr_##v = { \
  4148. };
  4149. struct rapl_pmu {
  4150. - spinlock_t lock;
  4151. + raw_spinlock_t lock;
  4152. int n_active; /* number of active events */
  4153. struct list_head active_list;
  4154. struct pmu *pmu; /* pointer to rapl_pmu_class */
  4155. @@ -223,13 +223,13 @@ static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
  4156. if (!pmu->n_active)
  4157. return HRTIMER_NORESTART;
  4158. - spin_lock_irqsave(&pmu->lock, flags);
  4159. + raw_spin_lock_irqsave(&pmu->lock, flags);
  4160. list_for_each_entry(event, &pmu->active_list, active_entry) {
  4161. rapl_event_update(event);
  4162. }
  4163. - spin_unlock_irqrestore(&pmu->lock, flags);
  4164. + raw_spin_unlock_irqrestore(&pmu->lock, flags);
  4165. hrtimer_forward_now(hrtimer, pmu->timer_interval);
  4166. @@ -266,9 +266,9 @@ static void rapl_pmu_event_start(struct perf_event *event, int mode)
  4167. struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
  4168. unsigned long flags;
  4169. - spin_lock_irqsave(&pmu->lock, flags);
  4170. + raw_spin_lock_irqsave(&pmu->lock, flags);
  4171. __rapl_pmu_event_start(pmu, event);
  4172. - spin_unlock_irqrestore(&pmu->lock, flags);
  4173. + raw_spin_unlock_irqrestore(&pmu->lock, flags);
  4174. }
  4175. static void rapl_pmu_event_stop(struct perf_event *event, int mode)
  4176. @@ -277,7 +277,7 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode)
  4177. struct hw_perf_event *hwc = &event->hw;
  4178. unsigned long flags;
  4179. - spin_lock_irqsave(&pmu->lock, flags);
  4180. + raw_spin_lock_irqsave(&pmu->lock, flags);
  4181. /* mark event as deactivated and stopped */
  4182. if (!(hwc->state & PERF_HES_STOPPED)) {
  4183. @@ -302,7 +302,7 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode)
  4184. hwc->state |= PERF_HES_UPTODATE;
  4185. }
  4186. - spin_unlock_irqrestore(&pmu->lock, flags);
  4187. + raw_spin_unlock_irqrestore(&pmu->lock, flags);
  4188. }
  4189. static int rapl_pmu_event_add(struct perf_event *event, int mode)
  4190. @@ -311,14 +311,14 @@ static int rapl_pmu_event_add(struct perf_event *event, int mode)
  4191. struct hw_perf_event *hwc = &event->hw;
  4192. unsigned long flags;
  4193. - spin_lock_irqsave(&pmu->lock, flags);
  4194. + raw_spin_lock_irqsave(&pmu->lock, flags);
  4195. hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
  4196. if (mode & PERF_EF_START)
  4197. __rapl_pmu_event_start(pmu, event);
  4198. - spin_unlock_irqrestore(&pmu->lock, flags);
  4199. + raw_spin_unlock_irqrestore(&pmu->lock, flags);
  4200. return 0;
  4201. }
  4202. @@ -594,7 +594,7 @@ static int rapl_cpu_prepare(int cpu)
  4203. pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
  4204. if (!pmu)
  4205. return -1;
  4206. - spin_lock_init(&pmu->lock);
  4207. + raw_spin_lock_init(&pmu->lock);
  4208. INIT_LIST_HEAD(&pmu->active_list);
  4209. diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
  4210. index 464ffd69b92e..00db1aad1548 100644
  4211. --- a/arch/x86/kernel/dumpstack_32.c
  4212. +++ b/arch/x86/kernel/dumpstack_32.c
  4213. @@ -42,7 +42,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
  4214. unsigned long *stack, unsigned long bp,
  4215. const struct stacktrace_ops *ops, void *data)
  4216. {
  4217. - const unsigned cpu = get_cpu();
  4218. + const unsigned cpu = get_cpu_light();
  4219. int graph = 0;
  4220. u32 *prev_esp;
  4221. @@ -86,7 +86,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
  4222. break;
  4223. touch_nmi_watchdog();
  4224. }
  4225. - put_cpu();
  4226. + put_cpu_light();
  4227. }
  4228. EXPORT_SYMBOL(dump_trace);
  4229. diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
  4230. index 5f1c6266eb30..c331e3fef465 100644
  4231. --- a/arch/x86/kernel/dumpstack_64.c
  4232. +++ b/arch/x86/kernel/dumpstack_64.c
  4233. @@ -152,7 +152,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
  4234. unsigned long *stack, unsigned long bp,
  4235. const struct stacktrace_ops *ops, void *data)
  4236. {
  4237. - const unsigned cpu = get_cpu();
  4238. + const unsigned cpu = get_cpu_light();
  4239. struct thread_info *tinfo;
  4240. unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
  4241. unsigned long dummy;
  4242. @@ -241,7 +241,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
  4243. * This handles the process stack:
  4244. */
  4245. bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
  4246. - put_cpu();
  4247. + put_cpu_light();
  4248. }
  4249. EXPORT_SYMBOL(dump_trace);
  4250. @@ -255,7 +255,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
  4251. int cpu;
  4252. int i;
  4253. - preempt_disable();
  4254. + migrate_disable();
  4255. cpu = smp_processor_id();
  4256. irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
  4257. @@ -291,7 +291,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
  4258. pr_cont(" %016lx", *stack++);
  4259. touch_nmi_watchdog();
  4260. }
  4261. - preempt_enable();
  4262. + migrate_enable();
  4263. pr_cont("\n");
  4264. show_trace_log_lvl(task, regs, sp, bp, log_lvl);
  4265. diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
  4266. index 1c309763e321..8612b314af92 100644
  4267. --- a/arch/x86/kernel/entry_32.S
  4268. +++ b/arch/x86/kernel/entry_32.S
  4269. @@ -359,8 +359,24 @@ END(ret_from_exception)
  4270. ENTRY(resume_kernel)
  4271. DISABLE_INTERRUPTS(CLBR_ANY)
  4272. need_resched:
  4273. + # preempt count == 0 + NEED_RS set?
  4274. cmpl $0,PER_CPU_VAR(__preempt_count)
  4275. +#ifndef CONFIG_PREEMPT_LAZY
  4276. jnz restore_all
  4277. +#else
  4278. + jz test_int_off
  4279. +
  4280. + # atleast preempt count == 0 ?
  4281. + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
  4282. + jne restore_all
  4283. +
  4284. + cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ?
  4285. + jnz restore_all
  4286. +
  4287. + testl $_TIF_NEED_RESCHED_LAZY, TI_flags(%ebp)
  4288. + jz restore_all
  4289. +test_int_off:
  4290. +#endif
  4291. testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
  4292. jz restore_all
  4293. call preempt_schedule_irq
  4294. @@ -594,7 +610,7 @@ ENDPROC(system_call)
  4295. ALIGN
  4296. RING0_PTREGS_FRAME # can't unwind into user space anyway
  4297. work_pending:
  4298. - testb $_TIF_NEED_RESCHED, %cl
  4299. + testl $_TIF_NEED_RESCHED_MASK, %ecx
  4300. jz work_notifysig
  4301. work_resched:
  4302. call schedule
  4303. @@ -607,7 +623,7 @@ work_resched:
  4304. andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
  4305. # than syscall tracing?
  4306. jz restore_all
  4307. - testb $_TIF_NEED_RESCHED, %cl
  4308. + testl $_TIF_NEED_RESCHED_MASK, %ecx
  4309. jnz work_resched
  4310. work_notifysig: # deal with pending signals and
  4311. diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
  4312. index 6c9cb6073832..db2a15c91a65 100644
  4313. --- a/arch/x86/kernel/entry_64.S
  4314. +++ b/arch/x86/kernel/entry_64.S
  4315. @@ -370,8 +370,8 @@ GLOBAL(int_with_check)
  4316. /* First do a reschedule test. */
  4317. /* edx: work, edi: workmask */
  4318. int_careful:
  4319. - bt $TIF_NEED_RESCHED,%edx
  4320. - jnc int_very_careful
  4321. + testl $_TIF_NEED_RESCHED_MASK,%edx
  4322. + jz int_very_careful
  4323. TRACE_IRQS_ON
  4324. ENABLE_INTERRUPTS(CLBR_NONE)
  4325. pushq_cfi %rdi
  4326. @@ -776,7 +776,23 @@ retint_kernel:
  4327. bt $9,EFLAGS(%rsp) /* interrupts were off? */
  4328. jnc 1f
  4329. 0: cmpl $0,PER_CPU_VAR(__preempt_count)
  4330. +#ifndef CONFIG_PREEMPT_LAZY
  4331. jnz 1f
  4332. +#else
  4333. + jz do_preempt_schedule_irq
  4334. +
  4335. + # atleast preempt count == 0 ?
  4336. + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
  4337. + jnz 1f
  4338. +
  4339. + GET_THREAD_INFO(%rcx)
  4340. + cmpl $0, TI_preempt_lazy_count(%rcx)
  4341. + jnz 1f
  4342. +
  4343. + bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx)
  4344. + jnc 1f
  4345. +do_preempt_schedule_irq:
  4346. +#endif
  4347. call preempt_schedule_irq
  4348. jmp 0b
  4349. 1:
  4350. @@ -844,8 +860,8 @@ native_irq_return_ldt:
  4351. /* edi: workmask, edx: work */
  4352. retint_careful:
  4353. CFI_RESTORE_STATE
  4354. - bt $TIF_NEED_RESCHED,%edx
  4355. - jnc retint_signal
  4356. + testl $_TIF_NEED_RESCHED_MASK,%edx
  4357. + jz retint_signal
  4358. TRACE_IRQS_ON
  4359. ENABLE_INTERRUPTS(CLBR_NONE)
  4360. pushq_cfi %rdi
  4361. @@ -1118,6 +1134,7 @@ bad_gs:
  4362. jmp 2b
  4363. .previous
  4364. +#ifndef CONFIG_PREEMPT_RT_FULL
  4365. /* Call softirq on interrupt stack. Interrupts are off. */
  4366. ENTRY(do_softirq_own_stack)
  4367. CFI_STARTPROC
  4368. @@ -1137,6 +1154,7 @@ ENTRY(do_softirq_own_stack)
  4369. ret
  4370. CFI_ENDPROC
  4371. END(do_softirq_own_stack)
  4372. +#endif
  4373. #ifdef CONFIG_XEN
  4374. idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
  4375. diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
  4376. index f9fd86a7fcc7..521ef3cc8066 100644
  4377. --- a/arch/x86/kernel/irq_32.c
  4378. +++ b/arch/x86/kernel/irq_32.c
  4379. @@ -135,6 +135,7 @@ void irq_ctx_init(int cpu)
  4380. cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
  4381. }
  4382. +#ifndef CONFIG_PREEMPT_RT_FULL
  4383. void do_softirq_own_stack(void)
  4384. {
  4385. struct thread_info *curstk;
  4386. @@ -153,6 +154,7 @@ void do_softirq_own_stack(void)
  4387. call_on_stack(__do_softirq, isp);
  4388. }
  4389. +#endif
  4390. bool handle_irq(unsigned irq, struct pt_regs *regs)
  4391. {
  4392. diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
  4393. index 9435620062df..ba97b5b45879 100644
  4394. --- a/arch/x86/kernel/kvm.c
  4395. +++ b/arch/x86/kernel/kvm.c
  4396. @@ -36,6 +36,7 @@
  4397. #include <linux/kprobes.h>
  4398. #include <linux/debugfs.h>
  4399. #include <linux/nmi.h>
  4400. +#include <linux/wait-simple.h>
  4401. #include <asm/timer.h>
  4402. #include <asm/cpu.h>
  4403. #include <asm/traps.h>
  4404. @@ -91,14 +92,14 @@ static void kvm_io_delay(void)
  4405. struct kvm_task_sleep_node {
  4406. struct hlist_node link;
  4407. - wait_queue_head_t wq;
  4408. + struct swait_head wq;
  4409. u32 token;
  4410. int cpu;
  4411. bool halted;
  4412. };
  4413. static struct kvm_task_sleep_head {
  4414. - spinlock_t lock;
  4415. + raw_spinlock_t lock;
  4416. struct hlist_head list;
  4417. } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
  4418. @@ -122,17 +123,17 @@ void kvm_async_pf_task_wait(u32 token)
  4419. u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
  4420. struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
  4421. struct kvm_task_sleep_node n, *e;
  4422. - DEFINE_WAIT(wait);
  4423. + DEFINE_SWAITER(wait);
  4424. rcu_irq_enter();
  4425. - spin_lock(&b->lock);
  4426. + raw_spin_lock(&b->lock);
  4427. e = _find_apf_task(b, token);
  4428. if (e) {
  4429. /* dummy entry exist -> wake up was delivered ahead of PF */
  4430. hlist_del(&e->link);
  4431. kfree(e);
  4432. - spin_unlock(&b->lock);
  4433. + raw_spin_unlock(&b->lock);
  4434. rcu_irq_exit();
  4435. return;
  4436. @@ -141,13 +142,13 @@ void kvm_async_pf_task_wait(u32 token)
  4437. n.token = token;
  4438. n.cpu = smp_processor_id();
  4439. n.halted = is_idle_task(current) || preempt_count() > 1;
  4440. - init_waitqueue_head(&n.wq);
  4441. + init_swait_head(&n.wq);
  4442. hlist_add_head(&n.link, &b->list);
  4443. - spin_unlock(&b->lock);
  4444. + raw_spin_unlock(&b->lock);
  4445. for (;;) {
  4446. if (!n.halted)
  4447. - prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
  4448. + swait_prepare(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
  4449. if (hlist_unhashed(&n.link))
  4450. break;
  4451. @@ -166,7 +167,7 @@ void kvm_async_pf_task_wait(u32 token)
  4452. }
  4453. }
  4454. if (!n.halted)
  4455. - finish_wait(&n.wq, &wait);
  4456. + swait_finish(&n.wq, &wait);
  4457. rcu_irq_exit();
  4458. return;
  4459. @@ -178,8 +179,8 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n)
  4460. hlist_del_init(&n->link);
  4461. if (n->halted)
  4462. smp_send_reschedule(n->cpu);
  4463. - else if (waitqueue_active(&n->wq))
  4464. - wake_up(&n->wq);
  4465. + else if (swaitqueue_active(&n->wq))
  4466. + swait_wake(&n->wq);
  4467. }
  4468. static void apf_task_wake_all(void)
  4469. @@ -189,14 +190,14 @@ static void apf_task_wake_all(void)
  4470. for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
  4471. struct hlist_node *p, *next;
  4472. struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
  4473. - spin_lock(&b->lock);
  4474. + raw_spin_lock(&b->lock);
  4475. hlist_for_each_safe(p, next, &b->list) {
  4476. struct kvm_task_sleep_node *n =
  4477. hlist_entry(p, typeof(*n), link);
  4478. if (n->cpu == smp_processor_id())
  4479. apf_task_wake_one(n);
  4480. }
  4481. - spin_unlock(&b->lock);
  4482. + raw_spin_unlock(&b->lock);
  4483. }
  4484. }
  4485. @@ -212,7 +213,7 @@ void kvm_async_pf_task_wake(u32 token)
  4486. }
  4487. again:
  4488. - spin_lock(&b->lock);
  4489. + raw_spin_lock(&b->lock);
  4490. n = _find_apf_task(b, token);
  4491. if (!n) {
  4492. /*
  4493. @@ -225,17 +226,17 @@ again:
  4494. * Allocation failed! Busy wait while other cpu
  4495. * handles async PF.
  4496. */
  4497. - spin_unlock(&b->lock);
  4498. + raw_spin_unlock(&b->lock);
  4499. cpu_relax();
  4500. goto again;
  4501. }
  4502. n->token = token;
  4503. n->cpu = smp_processor_id();
  4504. - init_waitqueue_head(&n->wq);
  4505. + init_swait_head(&n->wq);
  4506. hlist_add_head(&n->link, &b->list);
  4507. } else
  4508. apf_task_wake_one(n);
  4509. - spin_unlock(&b->lock);
  4510. + raw_spin_unlock(&b->lock);
  4511. return;
  4512. }
  4513. EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
  4514. @@ -486,7 +487,7 @@ void __init kvm_guest_init(void)
  4515. paravirt_ops_setup();
  4516. register_reboot_notifier(&kvm_pv_reboot_nb);
  4517. for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
  4518. - spin_lock_init(&async_pf_sleepers[i].lock);
  4519. + raw_spin_lock_init(&async_pf_sleepers[i].lock);
  4520. if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
  4521. x86_init.irqs.trap_init = kvm_apf_trap_init;
  4522. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
  4523. index 8ed2106b06da..3a70713079c5 100644
  4524. --- a/arch/x86/kernel/process_32.c
  4525. +++ b/arch/x86/kernel/process_32.c
  4526. @@ -35,6 +35,7 @@
  4527. #include <linux/uaccess.h>
  4528. #include <linux/io.h>
  4529. #include <linux/kdebug.h>
  4530. +#include <linux/highmem.h>
  4531. #include <asm/pgtable.h>
  4532. #include <asm/ldt.h>
  4533. @@ -210,6 +211,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
  4534. }
  4535. EXPORT_SYMBOL_GPL(start_thread);
  4536. +#ifdef CONFIG_PREEMPT_RT_FULL
  4537. +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
  4538. +{
  4539. + int i;
  4540. +
  4541. + /*
  4542. + * Clear @prev's kmap_atomic mappings
  4543. + */
  4544. + for (i = 0; i < prev_p->kmap_idx; i++) {
  4545. + int idx = i + KM_TYPE_NR * smp_processor_id();
  4546. + pte_t *ptep = kmap_pte - idx;
  4547. +
  4548. + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
  4549. + }
  4550. + /*
  4551. + * Restore @next_p's kmap_atomic mappings
  4552. + */
  4553. + for (i = 0; i < next_p->kmap_idx; i++) {
  4554. + int idx = i + KM_TYPE_NR * smp_processor_id();
  4555. +
  4556. + if (!pte_none(next_p->kmap_pte[i]))
  4557. + set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
  4558. + }
  4559. +}
  4560. +#else
  4561. +static inline void
  4562. +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
  4563. +#endif
  4564. +
  4565. /*
  4566. * switch_to(x,y) should switch tasks from x to y.
  4567. @@ -292,6 +322,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
  4568. task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
  4569. __switch_to_xtra(prev_p, next_p, tss);
  4570. + switch_kmaps(prev_p, next_p);
  4571. +
  4572. /*
  4573. * Leave lazy mode, flushing any hypercalls made here.
  4574. * This must be done before restoring TLS segments so
  4575. diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
  4576. index 5d2e2e9af1c4..1c9cc74ba99b 100644
  4577. --- a/arch/x86/kernel/signal.c
  4578. +++ b/arch/x86/kernel/signal.c
  4579. @@ -726,6 +726,14 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
  4580. {
  4581. user_exit();
  4582. +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
  4583. + if (unlikely(current->forced_info.si_signo)) {
  4584. + struct task_struct *t = current;
  4585. + force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
  4586. + t->forced_info.si_signo = 0;
  4587. + }
  4588. +#endif
  4589. +
  4590. if (thread_info_flags & _TIF_UPROBE)
  4591. uprobe_notify_resume(regs);
  4592. diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
  4593. index 7dd9a8d3911a..192de5908083 100644
  4594. --- a/arch/x86/kvm/lapic.c
  4595. +++ b/arch/x86/kvm/lapic.c
  4596. @@ -1106,7 +1106,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
  4597. static void apic_timer_expired(struct kvm_lapic *apic)
  4598. {
  4599. struct kvm_vcpu *vcpu = apic->vcpu;
  4600. - wait_queue_head_t *q = &vcpu->wq;
  4601. + struct swait_head *q = &vcpu->wq;
  4602. struct kvm_timer *ktimer = &apic->lapic_timer;
  4603. if (atomic_read(&apic->lapic_timer.pending))
  4604. @@ -1115,8 +1115,8 @@ static void apic_timer_expired(struct kvm_lapic *apic)
  4605. atomic_inc(&apic->lapic_timer.pending);
  4606. kvm_set_pending_timer(vcpu);
  4607. - if (waitqueue_active(q))
  4608. - wake_up_interruptible(q);
  4609. + if (swaitqueue_active(q))
  4610. + swait_wake_interruptible(q);
  4611. if (apic_lvtt_tscdeadline(apic))
  4612. ktimer->expired_tscdeadline = ktimer->tscdeadline;
  4613. @@ -1169,8 +1169,36 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
  4614. __delay(tsc_deadline - guest_tsc);
  4615. }
  4616. +static enum hrtimer_restart apic_timer_fn(struct hrtimer *data);
  4617. +
  4618. +static void __apic_timer_expired(struct hrtimer *data)
  4619. +{
  4620. + int ret, i = 0;
  4621. + enum hrtimer_restart r;
  4622. + struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
  4623. +
  4624. + r = apic_timer_fn(data);
  4625. +
  4626. + if (r == HRTIMER_RESTART) {
  4627. + do {
  4628. + ret = hrtimer_start_expires(data, HRTIMER_MODE_ABS);
  4629. + if (ret == -ETIME)
  4630. + hrtimer_add_expires_ns(&ktimer->timer,
  4631. + ktimer->period);
  4632. + i++;
  4633. + } while (ret == -ETIME && i < 10);
  4634. +
  4635. + if (ret == -ETIME) {
  4636. + printk_once(KERN_ERR "%s: failed to reprogram timer\n",
  4637. + __func__);
  4638. + WARN_ON_ONCE(1);
  4639. + }
  4640. + }
  4641. +}
  4642. +
  4643. static void start_apic_timer(struct kvm_lapic *apic)
  4644. {
  4645. + int ret;
  4646. ktime_t now;
  4647. atomic_set(&apic->lapic_timer.pending, 0);
  4648. @@ -1201,9 +1229,11 @@ static void start_apic_timer(struct kvm_lapic *apic)
  4649. }
  4650. }
  4651. - hrtimer_start(&apic->lapic_timer.timer,
  4652. + ret = hrtimer_start(&apic->lapic_timer.timer,
  4653. ktime_add_ns(now, apic->lapic_timer.period),
  4654. HRTIMER_MODE_ABS);
  4655. + if (ret == -ETIME)
  4656. + __apic_timer_expired(&apic->lapic_timer.timer);
  4657. apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
  4658. PRIx64 ", "
  4659. @@ -1235,8 +1265,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
  4660. do_div(ns, this_tsc_khz);
  4661. expire = ktime_add_ns(now, ns);
  4662. expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
  4663. - hrtimer_start(&apic->lapic_timer.timer,
  4664. + ret = hrtimer_start(&apic->lapic_timer.timer,
  4665. expire, HRTIMER_MODE_ABS);
  4666. + if (ret == -ETIME)
  4667. + __apic_timer_expired(&apic->lapic_timer.timer);
  4668. } else
  4669. apic_timer_expired(apic);
  4670. @@ -1709,6 +1741,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
  4671. hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
  4672. HRTIMER_MODE_ABS);
  4673. apic->lapic_timer.timer.function = apic_timer_fn;
  4674. + apic->lapic_timer.timer.irqsafe = 1;
  4675. /*
  4676. * APIC is created enabled. This will prevent kvm_lapic_set_base from
  4677. @@ -1836,7 +1869,8 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
  4678. timer = &vcpu->arch.apic->lapic_timer.timer;
  4679. if (hrtimer_cancel(timer))
  4680. - hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
  4681. + if (hrtimer_start_expires(timer, HRTIMER_MODE_ABS) == -ETIME)
  4682. + __apic_timer_expired(timer);
  4683. }
  4684. /*
  4685. diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
  4686. index bd84d2226ca1..ba639dd5f09d 100644
  4687. --- a/arch/x86/kvm/x86.c
  4688. +++ b/arch/x86/kvm/x86.c
  4689. @@ -5815,6 +5815,13 @@ int kvm_arch_init(void *opaque)
  4690. goto out;
  4691. }
  4692. +#ifdef CONFIG_PREEMPT_RT_FULL
  4693. + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
  4694. + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
  4695. + return -EOPNOTSUPP;
  4696. + }
  4697. +#endif
  4698. +
  4699. r = kvm_mmu_module_init();
  4700. if (r)
  4701. goto out_free_percpu;
  4702. diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
  4703. index e2f5e21c03b3..91d93b95bd86 100644
  4704. --- a/arch/x86/lib/usercopy_32.c
  4705. +++ b/arch/x86/lib/usercopy_32.c
  4706. @@ -647,7 +647,8 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
  4707. * @from: Source address, in kernel space.
  4708. * @n: Number of bytes to copy.
  4709. *
  4710. - * Context: User context only. This function may sleep.
  4711. + * Context: User context only. This function may sleep if pagefaults are
  4712. + * enabled.
  4713. *
  4714. * Copy data from kernel space to user space.
  4715. *
  4716. @@ -668,7 +669,8 @@ EXPORT_SYMBOL(_copy_to_user);
  4717. * @from: Source address, in user space.
  4718. * @n: Number of bytes to copy.
  4719. *
  4720. - * Context: User context only. This function may sleep.
  4721. + * Context: User context only. This function may sleep if pagefaults are
  4722. + * enabled.
  4723. *
  4724. * Copy data from user space to kernel space.
  4725. *
  4726. diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
  4727. index 62855ac37ab7..1d3beaf7526f 100644
  4728. --- a/arch/x86/mm/fault.c
  4729. +++ b/arch/x86/mm/fault.c
  4730. @@ -13,6 +13,7 @@
  4731. #include <linux/hugetlb.h> /* hstate_index_to_shift */
  4732. #include <linux/prefetch.h> /* prefetchw */
  4733. #include <linux/context_tracking.h> /* exception_enter(), ... */
  4734. +#include <linux/uaccess.h> /* faulthandler_disabled() */
  4735. #include <asm/traps.h> /* dotraplinkage, ... */
  4736. #include <asm/pgalloc.h> /* pgd_*(), ... */
  4737. @@ -1133,9 +1134,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
  4738. /*
  4739. * If we're in an interrupt, have no user context or are running
  4740. - * in an atomic region then we must not take the fault:
  4741. + * in a region with pagefaults disabled then we must not take the fault
  4742. */
  4743. - if (unlikely(in_atomic() || !mm)) {
  4744. + if (unlikely(faulthandler_disabled() || !mm)) {
  4745. bad_area_nosemaphore(regs, error_code, address);
  4746. return;
  4747. }
  4748. diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
  4749. index 4500142bc4aa..0d1cbcf47f80 100644
  4750. --- a/arch/x86/mm/highmem_32.c
  4751. +++ b/arch/x86/mm/highmem_32.c
  4752. @@ -32,10 +32,11 @@ EXPORT_SYMBOL(kunmap);
  4753. */
  4754. void *kmap_atomic_prot(struct page *page, pgprot_t prot)
  4755. {
  4756. + pte_t pte = mk_pte(page, prot);
  4757. unsigned long vaddr;
  4758. int idx, type;
  4759. - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
  4760. + preempt_disable_nort();
  4761. pagefault_disable();
  4762. if (!PageHighMem(page))
  4763. @@ -45,7 +46,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
  4764. idx = type + KM_TYPE_NR*smp_processor_id();
  4765. vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
  4766. BUG_ON(!pte_none(*(kmap_pte-idx)));
  4767. - set_pte(kmap_pte-idx, mk_pte(page, prot));
  4768. +#ifdef CONFIG_PREEMPT_RT_FULL
  4769. + current->kmap_pte[type] = pte;
  4770. +#endif
  4771. + set_pte(kmap_pte-idx, pte);
  4772. arch_flush_lazy_mmu_mode();
  4773. return (void *)vaddr;
  4774. @@ -88,6 +92,9 @@ void __kunmap_atomic(void *kvaddr)
  4775. * is a bad idea also, in case the page changes cacheability
  4776. * attributes or becomes a protected page in a hypervisor.
  4777. */
  4778. +#ifdef CONFIG_PREEMPT_RT_FULL
  4779. + current->kmap_pte[type] = __pte(0);
  4780. +#endif
  4781. kpte_clear_flush(kmap_pte-idx, vaddr);
  4782. kmap_atomic_idx_pop();
  4783. arch_flush_lazy_mmu_mode();
  4784. @@ -100,6 +107,7 @@ void __kunmap_atomic(void *kvaddr)
  4785. #endif
  4786. pagefault_enable();
  4787. + preempt_enable_nort();
  4788. }
  4789. EXPORT_SYMBOL(__kunmap_atomic);
  4790. diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
  4791. index 9ca35fc60cfe..b2ffa5c7d3d3 100644
  4792. --- a/arch/x86/mm/iomap_32.c
  4793. +++ b/arch/x86/mm/iomap_32.c
  4794. @@ -56,15 +56,22 @@ EXPORT_SYMBOL_GPL(iomap_free);
  4795. void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
  4796. {
  4797. + pte_t pte = pfn_pte(pfn, prot);
  4798. unsigned long vaddr;
  4799. int idx, type;
  4800. + preempt_disable();
  4801. pagefault_disable();
  4802. type = kmap_atomic_idx_push();
  4803. idx = type + KM_TYPE_NR * smp_processor_id();
  4804. vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
  4805. - set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
  4806. + WARN_ON(!pte_none(*(kmap_pte - idx)));
  4807. +
  4808. +#ifdef CONFIG_PREEMPT_RT_FULL
  4809. + current->kmap_pte[type] = pte;
  4810. +#endif
  4811. + set_pte(kmap_pte - idx, pte);
  4812. arch_flush_lazy_mmu_mode();
  4813. return (void *)vaddr;
  4814. @@ -112,10 +119,14 @@ iounmap_atomic(void __iomem *kvaddr)
  4815. * is a bad idea also, in case the page changes cacheability
  4816. * attributes or becomes a protected page in a hypervisor.
  4817. */
  4818. +#ifdef CONFIG_PREEMPT_RT_FULL
  4819. + current->kmap_pte[type] = __pte(0);
  4820. +#endif
  4821. kpte_clear_flush(kmap_pte-idx, vaddr);
  4822. kmap_atomic_idx_pop();
  4823. }
  4824. pagefault_enable();
  4825. + preempt_enable();
  4826. }
  4827. EXPORT_SYMBOL_GPL(iounmap_atomic);
  4828. diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
  4829. index 3b6ec42718e4..7871083de089 100644
  4830. --- a/arch/x86/platform/uv/tlb_uv.c
  4831. +++ b/arch/x86/platform/uv/tlb_uv.c
  4832. @@ -714,9 +714,9 @@ static void destination_plugged(struct bau_desc *bau_desc,
  4833. quiesce_local_uvhub(hmaster);
  4834. - spin_lock(&hmaster->queue_lock);
  4835. + raw_spin_lock(&hmaster->queue_lock);
  4836. reset_with_ipi(&bau_desc->distribution, bcp);
  4837. - spin_unlock(&hmaster->queue_lock);
  4838. + raw_spin_unlock(&hmaster->queue_lock);
  4839. end_uvhub_quiesce(hmaster);
  4840. @@ -736,9 +736,9 @@ static void destination_timeout(struct bau_desc *bau_desc,
  4841. quiesce_local_uvhub(hmaster);
  4842. - spin_lock(&hmaster->queue_lock);
  4843. + raw_spin_lock(&hmaster->queue_lock);
  4844. reset_with_ipi(&bau_desc->distribution, bcp);
  4845. - spin_unlock(&hmaster->queue_lock);
  4846. + raw_spin_unlock(&hmaster->queue_lock);
  4847. end_uvhub_quiesce(hmaster);
  4848. @@ -759,7 +759,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
  4849. cycles_t tm1;
  4850. hmaster = bcp->uvhub_master;
  4851. - spin_lock(&hmaster->disable_lock);
  4852. + raw_spin_lock(&hmaster->disable_lock);
  4853. if (!bcp->baudisabled) {
  4854. stat->s_bau_disabled++;
  4855. tm1 = get_cycles();
  4856. @@ -772,7 +772,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
  4857. }
  4858. }
  4859. }
  4860. - spin_unlock(&hmaster->disable_lock);
  4861. + raw_spin_unlock(&hmaster->disable_lock);
  4862. }
  4863. static void count_max_concurr(int stat, struct bau_control *bcp,
  4864. @@ -835,7 +835,7 @@ static void record_send_stats(cycles_t time1, cycles_t time2,
  4865. */
  4866. static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
  4867. {
  4868. - spinlock_t *lock = &hmaster->uvhub_lock;
  4869. + raw_spinlock_t *lock = &hmaster->uvhub_lock;
  4870. atomic_t *v;
  4871. v = &hmaster->active_descriptor_count;
  4872. @@ -968,7 +968,7 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
  4873. struct bau_control *hmaster;
  4874. hmaster = bcp->uvhub_master;
  4875. - spin_lock(&hmaster->disable_lock);
  4876. + raw_spin_lock(&hmaster->disable_lock);
  4877. if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
  4878. stat->s_bau_reenabled++;
  4879. for_each_present_cpu(tcpu) {
  4880. @@ -980,10 +980,10 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
  4881. tbcp->period_giveups = 0;
  4882. }
  4883. }
  4884. - spin_unlock(&hmaster->disable_lock);
  4885. + raw_spin_unlock(&hmaster->disable_lock);
  4886. return 0;
  4887. }
  4888. - spin_unlock(&hmaster->disable_lock);
  4889. + raw_spin_unlock(&hmaster->disable_lock);
  4890. return -1;
  4891. }
  4892. @@ -1901,9 +1901,9 @@ static void __init init_per_cpu_tunables(void)
  4893. bcp->cong_reps = congested_reps;
  4894. bcp->disabled_period = sec_2_cycles(disabled_period);
  4895. bcp->giveup_limit = giveup_limit;
  4896. - spin_lock_init(&bcp->queue_lock);
  4897. - spin_lock_init(&bcp->uvhub_lock);
  4898. - spin_lock_init(&bcp->disable_lock);
  4899. + raw_spin_lock_init(&bcp->queue_lock);
  4900. + raw_spin_lock_init(&bcp->uvhub_lock);
  4901. + raw_spin_lock_init(&bcp->disable_lock);
  4902. }
  4903. }
  4904. diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
  4905. index a244237f3cfa..a718fe0d2e73 100644
  4906. --- a/arch/x86/platform/uv/uv_time.c
  4907. +++ b/arch/x86/platform/uv/uv_time.c
  4908. @@ -58,7 +58,7 @@ static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
  4909. /* There is one of these allocated per node */
  4910. struct uv_rtc_timer_head {
  4911. - spinlock_t lock;
  4912. + raw_spinlock_t lock;
  4913. /* next cpu waiting for timer, local node relative: */
  4914. int next_cpu;
  4915. /* number of cpus on this node: */
  4916. @@ -178,7 +178,7 @@ static __init int uv_rtc_allocate_timers(void)
  4917. uv_rtc_deallocate_timers();
  4918. return -ENOMEM;
  4919. }
  4920. - spin_lock_init(&head->lock);
  4921. + raw_spin_lock_init(&head->lock);
  4922. head->ncpus = uv_blade_nr_possible_cpus(bid);
  4923. head->next_cpu = -1;
  4924. blade_info[bid] = head;
  4925. @@ -232,7 +232,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
  4926. unsigned long flags;
  4927. int next_cpu;
  4928. - spin_lock_irqsave(&head->lock, flags);
  4929. + raw_spin_lock_irqsave(&head->lock, flags);
  4930. next_cpu = head->next_cpu;
  4931. *t = expires;
  4932. @@ -244,12 +244,12 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
  4933. if (uv_setup_intr(cpu, expires)) {
  4934. *t = ULLONG_MAX;
  4935. uv_rtc_find_next_timer(head, pnode);
  4936. - spin_unlock_irqrestore(&head->lock, flags);
  4937. + raw_spin_unlock_irqrestore(&head->lock, flags);
  4938. return -ETIME;
  4939. }
  4940. }
  4941. - spin_unlock_irqrestore(&head->lock, flags);
  4942. + raw_spin_unlock_irqrestore(&head->lock, flags);
  4943. return 0;
  4944. }
  4945. @@ -268,7 +268,7 @@ static int uv_rtc_unset_timer(int cpu, int force)
  4946. unsigned long flags;
  4947. int rc = 0;
  4948. - spin_lock_irqsave(&head->lock, flags);
  4949. + raw_spin_lock_irqsave(&head->lock, flags);
  4950. if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
  4951. rc = 1;
  4952. @@ -280,7 +280,7 @@ static int uv_rtc_unset_timer(int cpu, int force)
  4953. uv_rtc_find_next_timer(head, pnode);
  4954. }
  4955. - spin_unlock_irqrestore(&head->lock, flags);
  4956. + raw_spin_unlock_irqrestore(&head->lock, flags);
  4957. return rc;
  4958. }
  4959. @@ -300,13 +300,18 @@ static int uv_rtc_unset_timer(int cpu, int force)
  4960. static cycle_t uv_read_rtc(struct clocksource *cs)
  4961. {
  4962. unsigned long offset;
  4963. + cycle_t cycles;
  4964. + preempt_disable();
  4965. if (uv_get_min_hub_revision_id() == 1)
  4966. offset = 0;
  4967. else
  4968. offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
  4969. - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
  4970. + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
  4971. + preempt_enable();
  4972. +
  4973. + return cycles;
  4974. }
  4975. /*
  4976. diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
  4977. index 9e3571a6535c..83a44a33cfa1 100644
  4978. --- a/arch/xtensa/mm/fault.c
  4979. +++ b/arch/xtensa/mm/fault.c
  4980. @@ -15,10 +15,10 @@
  4981. #include <linux/mm.h>
  4982. #include <linux/module.h>
  4983. #include <linux/hardirq.h>
  4984. +#include <linux/uaccess.h>
  4985. #include <asm/mmu_context.h>
  4986. #include <asm/cacheflush.h>
  4987. #include <asm/hardirq.h>
  4988. -#include <asm/uaccess.h>
  4989. #include <asm/pgalloc.h>
  4990. DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST;
  4991. @@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs)
  4992. /* If we're in an interrupt or have no user
  4993. * context, we must not take the fault..
  4994. */
  4995. - if (in_atomic() || !mm) {
  4996. + if (faulthandler_disabled() || !mm) {
  4997. bad_page_fault(regs, address, SIGSEGV);
  4998. return;
  4999. }
  5000. diff --git a/arch/xtensa/mm/highmem.c b/arch/xtensa/mm/highmem.c
  5001. index 8cfb71ec0937..184ceadccc1a 100644
  5002. --- a/arch/xtensa/mm/highmem.c
  5003. +++ b/arch/xtensa/mm/highmem.c
  5004. @@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page)
  5005. enum fixed_addresses idx;
  5006. unsigned long vaddr;
  5007. + preempt_disable();
  5008. pagefault_disable();
  5009. if (!PageHighMem(page))
  5010. return page_address(page);
  5011. @@ -79,6 +80,7 @@ void __kunmap_atomic(void *kvaddr)
  5012. }
  5013. pagefault_enable();
  5014. + preempt_enable();
  5015. }
  5016. EXPORT_SYMBOL(__kunmap_atomic);
  5017. diff --git a/block/blk-core.c b/block/blk-core.c
  5018. index bbbf36e6066b..24935f6ca5bb 100644
  5019. --- a/block/blk-core.c
  5020. +++ b/block/blk-core.c
  5021. @@ -100,6 +100,9 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
  5022. INIT_LIST_HEAD(&rq->queuelist);
  5023. INIT_LIST_HEAD(&rq->timeout_list);
  5024. +#ifdef CONFIG_PREEMPT_RT_FULL
  5025. + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
  5026. +#endif
  5027. rq->cpu = -1;
  5028. rq->q = q;
  5029. rq->__sector = (sector_t) -1;
  5030. @@ -194,7 +197,7 @@ EXPORT_SYMBOL(blk_delay_queue);
  5031. **/
  5032. void blk_start_queue(struct request_queue *q)
  5033. {
  5034. - WARN_ON(!irqs_disabled());
  5035. + WARN_ON_NONRT(!irqs_disabled());
  5036. queue_flag_clear(QUEUE_FLAG_STOPPED, q);
  5037. __blk_run_queue(q);
  5038. @@ -663,7 +666,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
  5039. q->bypass_depth = 1;
  5040. __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
  5041. - init_waitqueue_head(&q->mq_freeze_wq);
  5042. + init_swait_head(&q->mq_freeze_wq);
  5043. if (blkcg_init_queue(q))
  5044. goto fail_bdi;
  5045. @@ -3079,7 +3082,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
  5046. blk_run_queue_async(q);
  5047. else
  5048. __blk_run_queue(q);
  5049. - spin_unlock(q->queue_lock);
  5050. + spin_unlock_irq(q->queue_lock);
  5051. }
  5052. static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
  5053. @@ -3127,7 +3130,6 @@ EXPORT_SYMBOL(blk_check_plugged);
  5054. void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  5055. {
  5056. struct request_queue *q;
  5057. - unsigned long flags;
  5058. struct request *rq;
  5059. LIST_HEAD(list);
  5060. unsigned int depth;
  5061. @@ -3147,11 +3149,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  5062. q = NULL;
  5063. depth = 0;
  5064. - /*
  5065. - * Save and disable interrupts here, to avoid doing it for every
  5066. - * queue lock we have to take.
  5067. - */
  5068. - local_irq_save(flags);
  5069. while (!list_empty(&list)) {
  5070. rq = list_entry_rq(list.next);
  5071. list_del_init(&rq->queuelist);
  5072. @@ -3164,7 +3161,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  5073. queue_unplugged(q, depth, from_schedule);
  5074. q = rq->q;
  5075. depth = 0;
  5076. - spin_lock(q->queue_lock);
  5077. + spin_lock_irq(q->queue_lock);
  5078. }
  5079. /*
  5080. @@ -3191,8 +3188,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  5081. */
  5082. if (q)
  5083. queue_unplugged(q, depth, from_schedule);
  5084. -
  5085. - local_irq_restore(flags);
  5086. }
  5087. void blk_finish_plug(struct blk_plug *plug)
  5088. diff --git a/block/blk-ioc.c b/block/blk-ioc.c
  5089. index 1a27f45ec776..28f467e636cc 100644
  5090. --- a/block/blk-ioc.c
  5091. +++ b/block/blk-ioc.c
  5092. @@ -7,6 +7,7 @@
  5093. #include <linux/bio.h>
  5094. #include <linux/blkdev.h>
  5095. #include <linux/slab.h>
  5096. +#include <linux/delay.h>
  5097. #include "blk.h"
  5098. @@ -109,7 +110,7 @@ static void ioc_release_fn(struct work_struct *work)
  5099. spin_unlock(q->queue_lock);
  5100. } else {
  5101. spin_unlock_irqrestore(&ioc->lock, flags);
  5102. - cpu_relax();
  5103. + cpu_chill();
  5104. spin_lock_irqsave_nested(&ioc->lock, flags, 1);
  5105. }
  5106. }
  5107. @@ -187,7 +188,7 @@ retry:
  5108. spin_unlock(icq->q->queue_lock);
  5109. } else {
  5110. spin_unlock_irqrestore(&ioc->lock, flags);
  5111. - cpu_relax();
  5112. + cpu_chill();
  5113. goto retry;
  5114. }
  5115. }
  5116. diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
  5117. index 0736729d6494..3e21e31d0d7e 100644
  5118. --- a/block/blk-iopoll.c
  5119. +++ b/block/blk-iopoll.c
  5120. @@ -35,6 +35,7 @@ void blk_iopoll_sched(struct blk_iopoll *iop)
  5121. list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
  5122. __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
  5123. local_irq_restore(flags);
  5124. + preempt_check_resched_rt();
  5125. }
  5126. EXPORT_SYMBOL(blk_iopoll_sched);
  5127. @@ -132,6 +133,7 @@ static void blk_iopoll_softirq(struct softirq_action *h)
  5128. __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
  5129. local_irq_enable();
  5130. + preempt_check_resched_rt();
  5131. }
  5132. /**
  5133. @@ -201,6 +203,7 @@ static int blk_iopoll_cpu_notify(struct notifier_block *self,
  5134. this_cpu_ptr(&blk_cpu_iopoll));
  5135. __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
  5136. local_irq_enable();
  5137. + preempt_check_resched_rt();
  5138. }
  5139. return NOTIFY_OK;
  5140. diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c
  5141. index bb3ed488f7b5..628c6c13c482 100644
  5142. --- a/block/blk-mq-cpu.c
  5143. +++ b/block/blk-mq-cpu.c
  5144. @@ -16,7 +16,7 @@
  5145. #include "blk-mq.h"
  5146. static LIST_HEAD(blk_mq_cpu_notify_list);
  5147. -static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
  5148. +static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
  5149. static int blk_mq_main_cpu_notify(struct notifier_block *self,
  5150. unsigned long action, void *hcpu)
  5151. @@ -25,7 +25,10 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self,
  5152. struct blk_mq_cpu_notifier *notify;
  5153. int ret = NOTIFY_OK;
  5154. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  5155. + if (action != CPU_POST_DEAD)
  5156. + return NOTIFY_OK;
  5157. +
  5158. + spin_lock(&blk_mq_cpu_notify_lock);
  5159. list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) {
  5160. ret = notify->notify(notify->data, action, cpu);
  5161. @@ -33,7 +36,7 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self,
  5162. break;
  5163. }
  5164. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  5165. + spin_unlock(&blk_mq_cpu_notify_lock);
  5166. return ret;
  5167. }
  5168. @@ -41,16 +44,16 @@ void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
  5169. {
  5170. BUG_ON(!notifier->notify);
  5171. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  5172. + spin_lock(&blk_mq_cpu_notify_lock);
  5173. list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
  5174. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  5175. + spin_unlock(&blk_mq_cpu_notify_lock);
  5176. }
  5177. void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
  5178. {
  5179. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  5180. + spin_lock(&blk_mq_cpu_notify_lock);
  5181. list_del(&notifier->list);
  5182. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  5183. + spin_unlock(&blk_mq_cpu_notify_lock);
  5184. }
  5185. void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
  5186. diff --git a/block/blk-mq.c b/block/blk-mq.c
  5187. index 2dc1fd6c5bdb..c473bd192a41 100644
  5188. --- a/block/blk-mq.c
  5189. +++ b/block/blk-mq.c
  5190. @@ -88,7 +88,7 @@ static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp)
  5191. if (!(gfp & __GFP_WAIT))
  5192. return -EBUSY;
  5193. - ret = wait_event_interruptible(q->mq_freeze_wq,
  5194. + ret = swait_event_interruptible(q->mq_freeze_wq,
  5195. !q->mq_freeze_depth || blk_queue_dying(q));
  5196. if (blk_queue_dying(q))
  5197. return -ENODEV;
  5198. @@ -107,7 +107,7 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref)
  5199. struct request_queue *q =
  5200. container_of(ref, struct request_queue, mq_usage_counter);
  5201. - wake_up_all(&q->mq_freeze_wq);
  5202. + swait_wake_all(&q->mq_freeze_wq);
  5203. }
  5204. void blk_mq_freeze_queue_start(struct request_queue *q)
  5205. @@ -127,7 +127,7 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
  5206. static void blk_mq_freeze_queue_wait(struct request_queue *q)
  5207. {
  5208. - wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
  5209. + swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
  5210. }
  5211. /*
  5212. @@ -151,7 +151,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
  5213. spin_unlock_irq(q->queue_lock);
  5214. if (wake) {
  5215. percpu_ref_reinit(&q->mq_usage_counter);
  5216. - wake_up_all(&q->mq_freeze_wq);
  5217. + swait_wake_all(&q->mq_freeze_wq);
  5218. }
  5219. }
  5220. EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
  5221. @@ -170,7 +170,7 @@ void blk_mq_wake_waiters(struct request_queue *q)
  5222. * dying, we need to ensure that processes currently waiting on
  5223. * the queue are notified as well.
  5224. */
  5225. - wake_up_all(&q->mq_freeze_wq);
  5226. + swait_wake_all(&q->mq_freeze_wq);
  5227. }
  5228. bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
  5229. @@ -217,6 +217,9 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
  5230. rq->resid_len = 0;
  5231. rq->sense = NULL;
  5232. +#ifdef CONFIG_PREEMPT_RT_FULL
  5233. + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
  5234. +#endif
  5235. INIT_LIST_HEAD(&rq->timeout_list);
  5236. rq->timeout = 0;
  5237. @@ -346,6 +349,17 @@ void blk_mq_end_request(struct request *rq, int error)
  5238. }
  5239. EXPORT_SYMBOL(blk_mq_end_request);
  5240. +#ifdef CONFIG_PREEMPT_RT_FULL
  5241. +
  5242. +void __blk_mq_complete_request_remote_work(struct work_struct *work)
  5243. +{
  5244. + struct request *rq = container_of(work, struct request, work);
  5245. +
  5246. + rq->q->softirq_done_fn(rq);
  5247. +}
  5248. +
  5249. +#else
  5250. +
  5251. static void __blk_mq_complete_request_remote(void *data)
  5252. {
  5253. struct request *rq = data;
  5254. @@ -353,6 +367,8 @@ static void __blk_mq_complete_request_remote(void *data)
  5255. rq->q->softirq_done_fn(rq);
  5256. }
  5257. +#endif
  5258. +
  5259. static void blk_mq_ipi_complete_request(struct request *rq)
  5260. {
  5261. struct blk_mq_ctx *ctx = rq->mq_ctx;
  5262. @@ -364,19 +380,23 @@ static void blk_mq_ipi_complete_request(struct request *rq)
  5263. return;
  5264. }
  5265. - cpu = get_cpu();
  5266. + cpu = get_cpu_light();
  5267. if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
  5268. shared = cpus_share_cache(cpu, ctx->cpu);
  5269. if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
  5270. +#ifdef CONFIG_PREEMPT_RT_FULL
  5271. + schedule_work_on(ctx->cpu, &rq->work);
  5272. +#else
  5273. rq->csd.func = __blk_mq_complete_request_remote;
  5274. rq->csd.info = rq;
  5275. rq->csd.flags = 0;
  5276. smp_call_function_single_async(ctx->cpu, &rq->csd);
  5277. +#endif
  5278. } else {
  5279. rq->q->softirq_done_fn(rq);
  5280. }
  5281. - put_cpu();
  5282. + put_cpu_light();
  5283. }
  5284. void __blk_mq_complete_request(struct request *rq)
  5285. @@ -905,14 +925,14 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
  5286. return;
  5287. if (!async) {
  5288. - int cpu = get_cpu();
  5289. + int cpu = get_cpu_light();
  5290. if (cpumask_test_cpu(cpu, hctx->cpumask)) {
  5291. __blk_mq_run_hw_queue(hctx);
  5292. - put_cpu();
  5293. + put_cpu_light();
  5294. return;
  5295. }
  5296. - put_cpu();
  5297. + put_cpu_light();
  5298. }
  5299. kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
  5300. @@ -1589,7 +1609,7 @@ static int blk_mq_hctx_notify(void *data, unsigned long action,
  5301. {
  5302. struct blk_mq_hw_ctx *hctx = data;
  5303. - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
  5304. + if (action == CPU_POST_DEAD)
  5305. return blk_mq_hctx_cpu_offline(hctx, cpu);
  5306. /*
  5307. diff --git a/block/blk-mq.h b/block/blk-mq.h
  5308. index 6a48c4c0d8a2..4b7cbf0e6e82 100644
  5309. --- a/block/blk-mq.h
  5310. +++ b/block/blk-mq.h
  5311. @@ -76,7 +76,10 @@ struct blk_align_bitmap {
  5312. static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
  5313. unsigned int cpu)
  5314. {
  5315. - return per_cpu_ptr(q->queue_ctx, cpu);
  5316. + struct blk_mq_ctx *ctx;
  5317. +
  5318. + ctx = per_cpu_ptr(q->queue_ctx, cpu);
  5319. + return ctx;
  5320. }
  5321. /*
  5322. @@ -87,12 +90,12 @@ static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
  5323. */
  5324. static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
  5325. {
  5326. - return __blk_mq_get_ctx(q, get_cpu());
  5327. + return __blk_mq_get_ctx(q, get_cpu_light());
  5328. }
  5329. static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
  5330. {
  5331. - put_cpu();
  5332. + put_cpu_light();
  5333. }
  5334. struct blk_mq_alloc_data {
  5335. diff --git a/block/blk-softirq.c b/block/blk-softirq.c
  5336. index 53b1737e978d..81c3c0a62edf 100644
  5337. --- a/block/blk-softirq.c
  5338. +++ b/block/blk-softirq.c
  5339. @@ -51,6 +51,7 @@ static void trigger_softirq(void *data)
  5340. raise_softirq_irqoff(BLOCK_SOFTIRQ);
  5341. local_irq_restore(flags);
  5342. + preempt_check_resched_rt();
  5343. }
  5344. /*
  5345. @@ -93,6 +94,7 @@ static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
  5346. this_cpu_ptr(&blk_cpu_done));
  5347. raise_softirq_irqoff(BLOCK_SOFTIRQ);
  5348. local_irq_enable();
  5349. + preempt_check_resched_rt();
  5350. }
  5351. return NOTIFY_OK;
  5352. @@ -150,6 +152,7 @@ do_local:
  5353. goto do_local;
  5354. local_irq_restore(flags);
  5355. + preempt_check_resched_rt();
  5356. }
  5357. /**
  5358. diff --git a/block/bounce.c b/block/bounce.c
  5359. index ed9dd8067120..39d123e0a989 100644
  5360. --- a/block/bounce.c
  5361. +++ b/block/bounce.c
  5362. @@ -54,11 +54,11 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
  5363. unsigned long flags;
  5364. unsigned char *vto;
  5365. - local_irq_save(flags);
  5366. + local_irq_save_nort(flags);
  5367. vto = kmap_atomic(to->bv_page);
  5368. memcpy(vto + to->bv_offset, vfrom, to->bv_len);
  5369. kunmap_atomic(vto);
  5370. - local_irq_restore(flags);
  5371. + local_irq_restore_nort(flags);
  5372. }
  5373. #else /* CONFIG_HIGHMEM */
  5374. diff --git a/crypto/algapi.c b/crypto/algapi.c
  5375. index dda720c6ab08..1629b110dabd 100644
  5376. --- a/crypto/algapi.c
  5377. +++ b/crypto/algapi.c
  5378. @@ -695,13 +695,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2);
  5379. int crypto_register_notifier(struct notifier_block *nb)
  5380. {
  5381. - return blocking_notifier_chain_register(&crypto_chain, nb);
  5382. + return srcu_notifier_chain_register(&crypto_chain, nb);
  5383. }
  5384. EXPORT_SYMBOL_GPL(crypto_register_notifier);
  5385. int crypto_unregister_notifier(struct notifier_block *nb)
  5386. {
  5387. - return blocking_notifier_chain_unregister(&crypto_chain, nb);
  5388. + return srcu_notifier_chain_unregister(&crypto_chain, nb);
  5389. }
  5390. EXPORT_SYMBOL_GPL(crypto_unregister_notifier);
  5391. diff --git a/crypto/api.c b/crypto/api.c
  5392. index bbc147cb5dec..bc1a848f02ec 100644
  5393. --- a/crypto/api.c
  5394. +++ b/crypto/api.c
  5395. @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(crypto_alg_list);
  5396. DECLARE_RWSEM(crypto_alg_sem);
  5397. EXPORT_SYMBOL_GPL(crypto_alg_sem);
  5398. -BLOCKING_NOTIFIER_HEAD(crypto_chain);
  5399. +SRCU_NOTIFIER_HEAD(crypto_chain);
  5400. EXPORT_SYMBOL_GPL(crypto_chain);
  5401. static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg);
  5402. @@ -236,10 +236,10 @@ int crypto_probing_notify(unsigned long val, void *v)
  5403. {
  5404. int ok;
  5405. - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
  5406. + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
  5407. if (ok == NOTIFY_DONE) {
  5408. request_module("cryptomgr");
  5409. - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
  5410. + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
  5411. }
  5412. return ok;
  5413. diff --git a/crypto/internal.h b/crypto/internal.h
  5414. index bd39bfc92eab..a5db167cba84 100644
  5415. --- a/crypto/internal.h
  5416. +++ b/crypto/internal.h
  5417. @@ -48,7 +48,7 @@ struct crypto_larval {
  5418. extern struct list_head crypto_alg_list;
  5419. extern struct rw_semaphore crypto_alg_sem;
  5420. -extern struct blocking_notifier_head crypto_chain;
  5421. +extern struct srcu_notifier_head crypto_chain;
  5422. #ifdef CONFIG_PROC_FS
  5423. void __init crypto_init_proc(void);
  5424. @@ -142,7 +142,7 @@ static inline int crypto_is_moribund(struct crypto_alg *alg)
  5425. static inline void crypto_notify(unsigned long val, void *v)
  5426. {
  5427. - blocking_notifier_call_chain(&crypto_chain, val, v);
  5428. + srcu_notifier_call_chain(&crypto_chain, val, v);
  5429. }
  5430. #endif /* _CRYPTO_INTERNAL_H */
  5431. diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
  5432. index a0c478784314..166ee955405f 100644
  5433. --- a/drivers/acpi/acpica/acglobal.h
  5434. +++ b/drivers/acpi/acpica/acglobal.h
  5435. @@ -112,7 +112,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pending);
  5436. * interrupt level
  5437. */
  5438. ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */
  5439. -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
  5440. +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
  5441. ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock);
  5442. /* Mutex for _OSI support */
  5443. diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
  5444. index 3cf77afd142c..dc32e72132f1 100644
  5445. --- a/drivers/acpi/acpica/hwregs.c
  5446. +++ b/drivers/acpi/acpica/hwregs.c
  5447. @@ -269,14 +269,14 @@ acpi_status acpi_hw_clear_acpi_status(void)
  5448. ACPI_BITMASK_ALL_FIXED_STATUS,
  5449. ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address)));
  5450. - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
  5451. + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
  5452. /* Clear the fixed events in PM1 A/B */
  5453. status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
  5454. ACPI_BITMASK_ALL_FIXED_STATUS);
  5455. - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
  5456. + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
  5457. if (ACPI_FAILURE(status)) {
  5458. goto exit;
  5459. diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c
  5460. index 5f97468df8ff..8c017f15da7d 100644
  5461. --- a/drivers/acpi/acpica/hwxface.c
  5462. +++ b/drivers/acpi/acpica/hwxface.c
  5463. @@ -374,7 +374,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value)
  5464. return_ACPI_STATUS(AE_BAD_PARAMETER);
  5465. }
  5466. - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
  5467. + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
  5468. /*
  5469. * At this point, we know that the parent register is one of the
  5470. @@ -435,7 +435,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value)
  5471. unlock_and_exit:
  5472. - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
  5473. + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
  5474. return_ACPI_STATUS(status);
  5475. }
  5476. diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c
  5477. index 37b8b58fcd56..938795507d87 100644
  5478. --- a/drivers/acpi/acpica/utmutex.c
  5479. +++ b/drivers/acpi/acpica/utmutex.c
  5480. @@ -88,7 +88,7 @@ acpi_status acpi_ut_mutex_initialize(void)
  5481. return_ACPI_STATUS (status);
  5482. }
  5483. - status = acpi_os_create_lock (&acpi_gbl_hardware_lock);
  5484. + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock);
  5485. if (ACPI_FAILURE (status)) {
  5486. return_ACPI_STATUS (status);
  5487. }
  5488. @@ -141,7 +141,7 @@ void acpi_ut_mutex_terminate(void)
  5489. /* Delete the spinlocks */
  5490. acpi_os_delete_lock(acpi_gbl_gpe_lock);
  5491. - acpi_os_delete_lock(acpi_gbl_hardware_lock);
  5492. + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock);
  5493. acpi_os_delete_lock(acpi_gbl_reference_count_lock);
  5494. /* Delete the reader/writer lock */
  5495. diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
  5496. index 7dbba387d12a..65beb7abb4e7 100644
  5497. --- a/drivers/ata/libata-sff.c
  5498. +++ b/drivers/ata/libata-sff.c
  5499. @@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf,
  5500. unsigned long flags;
  5501. unsigned int consumed;
  5502. - local_irq_save(flags);
  5503. + local_irq_save_nort(flags);
  5504. consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
  5505. - local_irq_restore(flags);
  5506. + local_irq_restore_nort(flags);
  5507. return consumed;
  5508. }
  5509. @@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
  5510. unsigned long flags;
  5511. /* FIXME: use a bounce buffer */
  5512. - local_irq_save(flags);
  5513. + local_irq_save_nort(flags);
  5514. buf = kmap_atomic(page);
  5515. /* do the actual data transfer */
  5516. @@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
  5517. do_write);
  5518. kunmap_atomic(buf);
  5519. - local_irq_restore(flags);
  5520. + local_irq_restore_nort(flags);
  5521. } else {
  5522. buf = page_address(page);
  5523. ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
  5524. @@ -864,7 +864,7 @@ next_sg:
  5525. unsigned long flags;
  5526. /* FIXME: use bounce buffer */
  5527. - local_irq_save(flags);
  5528. + local_irq_save_nort(flags);
  5529. buf = kmap_atomic(page);
  5530. /* do the actual data transfer */
  5531. @@ -872,7 +872,7 @@ next_sg:
  5532. count, rw);
  5533. kunmap_atomic(buf);
  5534. - local_irq_restore(flags);
  5535. + local_irq_restore_nort(flags);
  5536. } else {
  5537. buf = page_address(page);
  5538. consumed = ap->ops->sff_data_xfer(dev, buf + offset,
  5539. diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
  5540. index 6e134f4759c0..d2782d492630 100644
  5541. --- a/drivers/block/zram/zram_drv.c
  5542. +++ b/drivers/block/zram/zram_drv.c
  5543. @@ -386,6 +386,8 @@ static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)
  5544. goto out_error;
  5545. }
  5546. + zram_meta_init_table_locks(meta, disksize);
  5547. +
  5548. return meta;
  5549. out_error:
  5550. @@ -484,12 +486,12 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
  5551. unsigned long handle;
  5552. size_t size;
  5553. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  5554. + zram_lock_table(&meta->table[index]);
  5555. handle = meta->table[index].handle;
  5556. size = zram_get_obj_size(meta, index);
  5557. if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
  5558. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5559. + zram_unlock_table(&meta->table[index]);
  5560. clear_page(mem);
  5561. return 0;
  5562. }
  5563. @@ -500,7 +502,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
  5564. else
  5565. ret = zcomp_decompress(zram->comp, cmem, size, mem);
  5566. zs_unmap_object(meta->mem_pool, handle);
  5567. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5568. + zram_unlock_table(&meta->table[index]);
  5569. /* Should NEVER happen. Return bio error if it does. */
  5570. if (unlikely(ret)) {
  5571. @@ -520,14 +522,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
  5572. struct zram_meta *meta = zram->meta;
  5573. page = bvec->bv_page;
  5574. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  5575. + zram_lock_table(&meta->table[index]);
  5576. if (unlikely(!meta->table[index].handle) ||
  5577. zram_test_flag(meta, index, ZRAM_ZERO)) {
  5578. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5579. + zram_unlock_table(&meta->table[index]);
  5580. handle_zero_page(bvec);
  5581. return 0;
  5582. }
  5583. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5584. + zram_unlock_table(&meta->table[index]);
  5585. if (is_partial_io(bvec))
  5586. /* Use a temporary buffer to decompress the page */
  5587. @@ -622,10 +624,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
  5588. if (user_mem)
  5589. kunmap_atomic(user_mem);
  5590. /* Free memory associated with this sector now. */
  5591. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  5592. + zram_lock_table(&meta->table[index]);
  5593. zram_free_page(zram, index);
  5594. zram_set_flag(meta, index, ZRAM_ZERO);
  5595. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5596. + zram_unlock_table(&meta->table[index]);
  5597. atomic64_inc(&zram->stats.zero_pages);
  5598. ret = 0;
  5599. @@ -685,12 +687,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
  5600. * Free memory associated with this sector
  5601. * before overwriting unused sectors.
  5602. */
  5603. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  5604. + zram_lock_table(&meta->table[index]);
  5605. zram_free_page(zram, index);
  5606. meta->table[index].handle = handle;
  5607. zram_set_obj_size(meta, index, clen);
  5608. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5609. + zram_unlock_table(&meta->table[index]);
  5610. /* Update stats */
  5611. atomic64_add(clen, &zram->stats.compr_data_size);
  5612. @@ -762,9 +764,9 @@ static void zram_bio_discard(struct zram *zram, u32 index,
  5613. }
  5614. while (n >= PAGE_SIZE) {
  5615. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  5616. + zram_lock_table(&meta->table[index]);
  5617. zram_free_page(zram, index);
  5618. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5619. + zram_unlock_table(&meta->table[index]);
  5620. atomic64_inc(&zram->stats.notify_free);
  5621. index++;
  5622. n -= PAGE_SIZE;
  5623. @@ -1007,9 +1009,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
  5624. zram = bdev->bd_disk->private_data;
  5625. meta = zram->meta;
  5626. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  5627. + zram_lock_table(&meta->table[index]);
  5628. zram_free_page(zram, index);
  5629. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5630. + zram_unlock_table(&meta->table[index]);
  5631. atomic64_inc(&zram->stats.notify_free);
  5632. }
  5633. diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
  5634. index 570c598f4ce9..22c0173b00e3 100644
  5635. --- a/drivers/block/zram/zram_drv.h
  5636. +++ b/drivers/block/zram/zram_drv.h
  5637. @@ -78,6 +78,9 @@ enum zram_pageflags {
  5638. struct zram_table_entry {
  5639. unsigned long handle;
  5640. unsigned long value;
  5641. +#ifdef CONFIG_PREEMPT_RT_BASE
  5642. + spinlock_t lock;
  5643. +#endif
  5644. };
  5645. struct zram_stats {
  5646. @@ -122,4 +125,42 @@ struct zram {
  5647. u64 disksize; /* bytes */
  5648. char compressor[10];
  5649. };
  5650. +
  5651. +#ifndef CONFIG_PREEMPT_RT_BASE
  5652. +static inline void zram_lock_table(struct zram_table_entry *table)
  5653. +{
  5654. + bit_spin_lock(ZRAM_ACCESS, &table->value);
  5655. +}
  5656. +
  5657. +static inline void zram_unlock_table(struct zram_table_entry *table)
  5658. +{
  5659. + bit_spin_unlock(ZRAM_ACCESS, &table->value);
  5660. +}
  5661. +
  5662. +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { }
  5663. +#else /* CONFIG_PREEMPT_RT_BASE */
  5664. +static inline void zram_lock_table(struct zram_table_entry *table)
  5665. +{
  5666. + spin_lock(&table->lock);
  5667. + __set_bit(ZRAM_ACCESS, &table->value);
  5668. +}
  5669. +
  5670. +static inline void zram_unlock_table(struct zram_table_entry *table)
  5671. +{
  5672. + __clear_bit(ZRAM_ACCESS, &table->value);
  5673. + spin_unlock(&table->lock);
  5674. +}
  5675. +
  5676. +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize)
  5677. +{
  5678. + size_t num_pages = disksize >> PAGE_SHIFT;
  5679. + size_t index;
  5680. +
  5681. + for (index = 0; index < num_pages; index++) {
  5682. + spinlock_t *lock = &meta->table[index].lock;
  5683. + spin_lock_init(lock);
  5684. + }
  5685. +}
  5686. +#endif /* CONFIG_PREEMPT_RT_BASE */
  5687. +
  5688. #endif
  5689. diff --git a/drivers/char/random.c b/drivers/char/random.c
  5690. index 9cd6968e2f92..eb47efec2506 100644
  5691. --- a/drivers/char/random.c
  5692. +++ b/drivers/char/random.c
  5693. @@ -776,8 +776,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
  5694. } sample;
  5695. long delta, delta2, delta3;
  5696. - preempt_disable();
  5697. -
  5698. sample.jiffies = jiffies;
  5699. sample.cycles = random_get_entropy();
  5700. sample.num = num;
  5701. @@ -818,7 +816,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
  5702. */
  5703. credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
  5704. }
  5705. - preempt_enable();
  5706. }
  5707. void add_input_randomness(unsigned int type, unsigned int code,
  5708. @@ -871,28 +868,27 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
  5709. return *(ptr + f->reg_idx++);
  5710. }
  5711. -void add_interrupt_randomness(int irq, int irq_flags)
  5712. +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip)
  5713. {
  5714. struct entropy_store *r;
  5715. struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
  5716. - struct pt_regs *regs = get_irq_regs();
  5717. unsigned long now = jiffies;
  5718. cycles_t cycles = random_get_entropy();
  5719. __u32 c_high, j_high;
  5720. - __u64 ip;
  5721. unsigned long seed;
  5722. int credit = 0;
  5723. if (cycles == 0)
  5724. - cycles = get_reg(fast_pool, regs);
  5725. + cycles = get_reg(fast_pool, NULL);
  5726. c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
  5727. j_high = (sizeof(now) > 4) ? now >> 32 : 0;
  5728. fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
  5729. fast_pool->pool[1] ^= now ^ c_high;
  5730. - ip = regs ? instruction_pointer(regs) : _RET_IP_;
  5731. + if (!ip)
  5732. + ip = _RET_IP_;
  5733. fast_pool->pool[2] ^= ip;
  5734. fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 :
  5735. - get_reg(fast_pool, regs);
  5736. + get_reg(fast_pool, NULL);
  5737. fast_mix(fast_pool);
  5738. add_interrupt_bench(cycles);
  5739. diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c
  5740. index 3f27d21fb729..b83480f599ce 100644
  5741. --- a/drivers/clk/at91/pmc.c
  5742. +++ b/drivers/clk/at91/pmc.c
  5743. @@ -27,21 +27,6 @@
  5744. void __iomem *at91_pmc_base;
  5745. EXPORT_SYMBOL_GPL(at91_pmc_base);
  5746. -void at91rm9200_idle(void)
  5747. -{
  5748. - /*
  5749. - * Disable the processor clock. The processor will be automatically
  5750. - * re-enabled by an interrupt or by a reset.
  5751. - */
  5752. - at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK);
  5753. -}
  5754. -
  5755. -void at91sam9_idle(void)
  5756. -{
  5757. - at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK);
  5758. - cpu_do_idle();
  5759. -}
  5760. -
  5761. int of_at91_get_clk_range(struct device_node *np, const char *propname,
  5762. struct clk_range *range)
  5763. {
  5764. diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
  5765. index 8bdbc45c6dad..43f1c6bc6e28 100644
  5766. --- a/drivers/clocksource/tcb_clksrc.c
  5767. +++ b/drivers/clocksource/tcb_clksrc.c
  5768. @@ -23,8 +23,7 @@
  5769. * this 32 bit free-running counter. the second channel is not used.
  5770. *
  5771. * - The third channel may be used to provide a 16-bit clockevent
  5772. - * source, used in either periodic or oneshot mode. This runs
  5773. - * at 32 KiHZ, and can handle delays of up to two seconds.
  5774. + * source, used in either periodic or oneshot mode.
  5775. *
  5776. * A boot clocksource and clockevent source are also currently needed,
  5777. * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
  5778. @@ -74,6 +73,7 @@ static struct clocksource clksrc = {
  5779. struct tc_clkevt_device {
  5780. struct clock_event_device clkevt;
  5781. struct clk *clk;
  5782. + u32 freq;
  5783. void __iomem *regs;
  5784. };
  5785. @@ -82,13 +82,6 @@ static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt)
  5786. return container_of(clkevt, struct tc_clkevt_device, clkevt);
  5787. }
  5788. -/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
  5789. - * because using one of the divided clocks would usually mean the
  5790. - * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
  5791. - *
  5792. - * A divided clock could be good for high resolution timers, since
  5793. - * 30.5 usec resolution can seem "low".
  5794. - */
  5795. static u32 timer_clock;
  5796. static void tc_mode(enum clock_event_mode m, struct clock_event_device *d)
  5797. @@ -111,11 +104,12 @@ static void tc_mode(enum clock_event_mode m, struct clock_event_device *d)
  5798. case CLOCK_EVT_MODE_PERIODIC:
  5799. clk_enable(tcd->clk);
  5800. - /* slow clock, count up to RC, then irq and restart */
  5801. + /* count up to RC, then irq and restart */
  5802. __raw_writel(timer_clock
  5803. | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
  5804. regs + ATMEL_TC_REG(2, CMR));
  5805. - __raw_writel((32768 + HZ/2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
  5806. + __raw_writel((tcd->freq + HZ / 2) / HZ,
  5807. + tcaddr + ATMEL_TC_REG(2, RC));
  5808. /* Enable clock and interrupts on RC compare */
  5809. __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
  5810. @@ -128,7 +122,7 @@ static void tc_mode(enum clock_event_mode m, struct clock_event_device *d)
  5811. case CLOCK_EVT_MODE_ONESHOT:
  5812. clk_enable(tcd->clk);
  5813. - /* slow clock, count up to RC, then irq and stop */
  5814. + /* count up to RC, then irq and stop */
  5815. __raw_writel(timer_clock | ATMEL_TC_CPCSTOP
  5816. | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
  5817. regs + ATMEL_TC_REG(2, CMR));
  5818. @@ -157,8 +151,12 @@ static struct tc_clkevt_device clkevt = {
  5819. .name = "tc_clkevt",
  5820. .features = CLOCK_EVT_FEAT_PERIODIC
  5821. | CLOCK_EVT_FEAT_ONESHOT,
  5822. +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  5823. /* Should be lower than at91rm9200's system timer */
  5824. .rating = 125,
  5825. +#else
  5826. + .rating = 200,
  5827. +#endif
  5828. .set_next_event = tc_next_event,
  5829. .set_mode = tc_mode,
  5830. },
  5831. @@ -178,8 +176,9 @@ static irqreturn_t ch2_irq(int irq, void *handle)
  5832. return IRQ_NONE;
  5833. }
  5834. -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
  5835. +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
  5836. {
  5837. + unsigned divisor = atmel_tc_divisors[divisor_idx];
  5838. int ret;
  5839. struct clk *t2_clk = tc->clk[2];
  5840. int irq = tc->irq[2];
  5841. @@ -193,7 +192,11 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
  5842. clkevt.regs = tc->regs;
  5843. clkevt.clk = t2_clk;
  5844. - timer_clock = clk32k_divisor_idx;
  5845. + timer_clock = divisor_idx;
  5846. + if (!divisor)
  5847. + clkevt.freq = 32768;
  5848. + else
  5849. + clkevt.freq = clk_get_rate(t2_clk) / divisor;
  5850. clkevt.clkevt.cpumask = cpumask_of(0);
  5851. @@ -203,7 +206,7 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
  5852. return ret;
  5853. }
  5854. - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
  5855. + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff);
  5856. return ret;
  5857. }
  5858. @@ -340,7 +343,11 @@ static int __init tcb_clksrc_init(void)
  5859. goto err_disable_t1;
  5860. /* channel 2: periodic and oneshot timer support */
  5861. +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  5862. ret = setup_clkevents(tc, clk32k_divisor_idx);
  5863. +#else
  5864. + ret = setup_clkevents(tc, best_divisor_idx);
  5865. +#endif
  5866. if (ret)
  5867. goto err_unregister_clksrc;
  5868. diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c
  5869. index c0304ff608b0..6eb7bf435d9b 100644
  5870. --- a/drivers/clocksource/timer-atmel-pit.c
  5871. +++ b/drivers/clocksource/timer-atmel-pit.c
  5872. @@ -90,6 +90,7 @@ static cycle_t read_pit_clk(struct clocksource *cs)
  5873. return elapsed;
  5874. }
  5875. +static struct irqaction at91sam926x_pit_irq;
  5876. /*
  5877. * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16)
  5878. */
  5879. @@ -100,6 +101,8 @@ pit_clkevt_mode(enum clock_event_mode mode, struct clock_event_device *dev)
  5880. switch (mode) {
  5881. case CLOCK_EVT_MODE_PERIODIC:
  5882. + /* Set up irq handler */
  5883. + setup_irq(at91sam926x_pit_irq.irq, &at91sam926x_pit_irq);
  5884. /* update clocksource counter */
  5885. data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR));
  5886. pit_write(data->base, AT91_PIT_MR,
  5887. @@ -113,6 +116,7 @@ pit_clkevt_mode(enum clock_event_mode mode, struct clock_event_device *dev)
  5888. /* disable irq, leaving the clocksource active */
  5889. pit_write(data->base, AT91_PIT_MR,
  5890. (data->cycle - 1) | AT91_PIT_PITEN);
  5891. + remove_irq(at91sam926x_pit_irq.irq, &at91sam926x_pit_irq);
  5892. break;
  5893. case CLOCK_EVT_MODE_RESUME:
  5894. break;
  5895. diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c
  5896. index 1692e17e096b..306e2051f112 100644
  5897. --- a/drivers/clocksource/timer-atmel-st.c
  5898. +++ b/drivers/clocksource/timer-atmel-st.c
  5899. @@ -131,6 +131,7 @@ clkevt32k_mode(enum clock_event_mode mode, struct clock_event_device *dev)
  5900. break;
  5901. case CLOCK_EVT_MODE_SHUTDOWN:
  5902. case CLOCK_EVT_MODE_UNUSED:
  5903. + remove_irq(NR_IRQS_LEGACY + AT91_ID_SYS, &at91rm9200_timer_irq);
  5904. case CLOCK_EVT_MODE_RESUME:
  5905. irqmask = 0;
  5906. break;
  5907. diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
  5908. index c59bdcb83217..8f23161d80be 100644
  5909. --- a/drivers/cpufreq/Kconfig.x86
  5910. +++ b/drivers/cpufreq/Kconfig.x86
  5911. @@ -123,7 +123,7 @@ config X86_POWERNOW_K7_ACPI
  5912. config X86_POWERNOW_K8
  5913. tristate "AMD Opteron/Athlon64 PowerNow!"
  5914. - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ
  5915. + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE
  5916. help
  5917. This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors.
  5918. Support for K10 and newer processors is now in acpi-cpufreq.
  5919. diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
  5920. index 8ae655c364f4..ce1d93e93d1a 100644
  5921. --- a/drivers/cpufreq/cpufreq.c
  5922. +++ b/drivers/cpufreq/cpufreq.c
  5923. @@ -64,12 +64,6 @@ static inline bool has_target(void)
  5924. return cpufreq_driver->target_index || cpufreq_driver->target;
  5925. }
  5926. -/*
  5927. - * rwsem to guarantee that cpufreq driver module doesn't unload during critical
  5928. - * sections
  5929. - */
  5930. -static DECLARE_RWSEM(cpufreq_rwsem);
  5931. -
  5932. /* internal prototypes */
  5933. static int __cpufreq_governor(struct cpufreq_policy *policy,
  5934. unsigned int event);
  5935. @@ -215,9 +209,6 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
  5936. if (cpu >= nr_cpu_ids)
  5937. return NULL;
  5938. - if (!down_read_trylock(&cpufreq_rwsem))
  5939. - return NULL;
  5940. -
  5941. /* get the cpufreq driver */
  5942. read_lock_irqsave(&cpufreq_driver_lock, flags);
  5943. @@ -230,9 +221,6 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
  5944. read_unlock_irqrestore(&cpufreq_driver_lock, flags);
  5945. - if (!policy)
  5946. - up_read(&cpufreq_rwsem);
  5947. -
  5948. return policy;
  5949. }
  5950. EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
  5951. @@ -240,7 +228,6 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
  5952. void cpufreq_cpu_put(struct cpufreq_policy *policy)
  5953. {
  5954. kobject_put(&policy->kobj);
  5955. - up_read(&cpufreq_rwsem);
  5956. }
  5957. EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
  5958. @@ -765,9 +752,6 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
  5959. struct freq_attr *fattr = to_attr(attr);
  5960. ssize_t ret;
  5961. - if (!down_read_trylock(&cpufreq_rwsem))
  5962. - return -EINVAL;
  5963. -
  5964. down_read(&policy->rwsem);
  5965. if (fattr->show)
  5966. @@ -776,7 +760,6 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
  5967. ret = -EIO;
  5968. up_read(&policy->rwsem);
  5969. - up_read(&cpufreq_rwsem);
  5970. return ret;
  5971. }
  5972. @@ -793,9 +776,6 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
  5973. if (!cpu_online(policy->cpu))
  5974. goto unlock;
  5975. - if (!down_read_trylock(&cpufreq_rwsem))
  5976. - goto unlock;
  5977. -
  5978. down_write(&policy->rwsem);
  5979. if (fattr->store)
  5980. @@ -804,8 +784,6 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
  5981. ret = -EIO;
  5982. up_write(&policy->rwsem);
  5983. -
  5984. - up_read(&cpufreq_rwsem);
  5985. unlock:
  5986. put_online_cpus();
  5987. @@ -1117,16 +1095,12 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
  5988. if (unlikely(policy))
  5989. return 0;
  5990. - if (!down_read_trylock(&cpufreq_rwsem))
  5991. - return 0;
  5992. -
  5993. /* Check if this cpu was hot-unplugged earlier and has siblings */
  5994. read_lock_irqsave(&cpufreq_driver_lock, flags);
  5995. for_each_policy(policy) {
  5996. if (cpumask_test_cpu(cpu, policy->related_cpus)) {
  5997. read_unlock_irqrestore(&cpufreq_driver_lock, flags);
  5998. ret = cpufreq_add_policy_cpu(policy, cpu, dev);
  5999. - up_read(&cpufreq_rwsem);
  6000. return ret;
  6001. }
  6002. }
  6003. @@ -1269,8 +1243,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
  6004. kobject_uevent(&policy->kobj, KOBJ_ADD);
  6005. - up_read(&cpufreq_rwsem);
  6006. -
  6007. /* Callback for handling stuff after policy is ready */
  6008. if (cpufreq_driver->ready)
  6009. cpufreq_driver->ready(policy);
  6010. @@ -1304,8 +1276,6 @@ err_set_policy_cpu:
  6011. cpufreq_policy_free(policy);
  6012. nomem_out:
  6013. - up_read(&cpufreq_rwsem);
  6014. -
  6015. return ret;
  6016. }
  6017. @@ -2499,19 +2469,20 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
  6018. pr_debug("unregistering driver %s\n", driver->name);
  6019. + /* Protect against concurrent cpu hotplug */
  6020. + get_online_cpus();
  6021. subsys_interface_unregister(&cpufreq_interface);
  6022. if (cpufreq_boost_supported())
  6023. cpufreq_sysfs_remove_file(&boost.attr);
  6024. unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
  6025. - down_write(&cpufreq_rwsem);
  6026. write_lock_irqsave(&cpufreq_driver_lock, flags);
  6027. cpufreq_driver = NULL;
  6028. write_unlock_irqrestore(&cpufreq_driver_lock, flags);
  6029. - up_write(&cpufreq_rwsem);
  6030. + put_online_cpus();
  6031. return 0;
  6032. }
  6033. diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
  6034. index c88b01bbf9a3..0fd82b872f63 100644
  6035. --- a/drivers/gpio/Kconfig
  6036. +++ b/drivers/gpio/Kconfig
  6037. @@ -309,7 +309,7 @@ config GPIO_OCTEON
  6038. family of SOCs.
  6039. config GPIO_OMAP
  6040. - bool "TI OMAP GPIO support" if COMPILE_TEST && !ARCH_OMAP2PLUS
  6041. + tristate "TI OMAP GPIO support" if ARCH_OMAP2PLUS || COMPILE_TEST
  6042. default y if ARCH_OMAP
  6043. depends on ARM
  6044. select GENERIC_IRQ_CHIP
  6045. diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
  6046. index b232397ad7ec..4916fd726dce 100644
  6047. --- a/drivers/gpio/gpio-omap.c
  6048. +++ b/drivers/gpio/gpio-omap.c
  6049. @@ -29,6 +29,7 @@
  6050. #include <linux/platform_data/gpio-omap.h>
  6051. #define OFF_MODE 1
  6052. +#define OMAP4_GPIO_DEBOUNCINGTIME_MASK 0xFF
  6053. static LIST_HEAD(omap_gpio_list);
  6054. @@ -50,14 +51,15 @@ struct gpio_regs {
  6055. struct gpio_bank {
  6056. struct list_head node;
  6057. void __iomem *base;
  6058. - u16 irq;
  6059. + int irq;
  6060. u32 non_wakeup_gpios;
  6061. u32 enabled_non_wakeup_gpios;
  6062. struct gpio_regs context;
  6063. u32 saved_datain;
  6064. u32 level_mask;
  6065. u32 toggle_mask;
  6066. - spinlock_t lock;
  6067. + raw_spinlock_t lock;
  6068. + raw_spinlock_t wa_lock;
  6069. struct gpio_chip chip;
  6070. struct clk *dbck;
  6071. u32 mod_usage;
  6072. @@ -67,7 +69,7 @@ struct gpio_bank {
  6073. struct device *dev;
  6074. bool is_mpuio;
  6075. bool dbck_flag;
  6076. - bool loses_context;
  6077. +
  6078. bool context_valid;
  6079. int stride;
  6080. u32 width;
  6081. @@ -175,7 +177,7 @@ static inline void omap_gpio_rmw(void __iomem *base, u32 reg, u32 mask, bool set
  6082. static inline void omap_gpio_dbck_enable(struct gpio_bank *bank)
  6083. {
  6084. if (bank->dbck_enable_mask && !bank->dbck_enabled) {
  6085. - clk_prepare_enable(bank->dbck);
  6086. + clk_enable(bank->dbck);
  6087. bank->dbck_enabled = true;
  6088. writel_relaxed(bank->dbck_enable_mask,
  6089. @@ -193,7 +195,7 @@ static inline void omap_gpio_dbck_disable(struct gpio_bank *bank)
  6090. */
  6091. writel_relaxed(0, bank->base + bank->regs->debounce_en);
  6092. - clk_disable_unprepare(bank->dbck);
  6093. + clk_disable(bank->dbck);
  6094. bank->dbck_enabled = false;
  6095. }
  6096. }
  6097. @@ -204,8 +206,9 @@ static inline void omap_gpio_dbck_disable(struct gpio_bank *bank)
  6098. * @offset: the gpio number on this @bank
  6099. * @debounce: debounce time to use
  6100. *
  6101. - * OMAP's debounce time is in 31us steps so we need
  6102. - * to convert and round up to the closest unit.
  6103. + * OMAP's debounce time is in 31us steps
  6104. + * <debounce time> = (GPIO_DEBOUNCINGTIME[7:0].DEBOUNCETIME + 1) x 31
  6105. + * so we need to convert and round up to the closest unit.
  6106. */
  6107. static void omap2_set_gpio_debounce(struct gpio_bank *bank, unsigned offset,
  6108. unsigned debounce)
  6109. @@ -213,34 +216,33 @@ static void omap2_set_gpio_debounce(struct gpio_bank *bank, unsigned offset,
  6110. void __iomem *reg;
  6111. u32 val;
  6112. u32 l;
  6113. + bool enable = !!debounce;
  6114. if (!bank->dbck_flag)
  6115. return;
  6116. - if (debounce < 32)
  6117. - debounce = 0x01;
  6118. - else if (debounce > 7936)
  6119. - debounce = 0xff;
  6120. - else
  6121. - debounce = (debounce / 0x1f) - 1;
  6122. + if (enable) {
  6123. + debounce = DIV_ROUND_UP(debounce, 31) - 1;
  6124. + debounce &= OMAP4_GPIO_DEBOUNCINGTIME_MASK;
  6125. + }
  6126. l = BIT(offset);
  6127. - clk_prepare_enable(bank->dbck);
  6128. + clk_enable(bank->dbck);
  6129. reg = bank->base + bank->regs->debounce;
  6130. writel_relaxed(debounce, reg);
  6131. reg = bank->base + bank->regs->debounce_en;
  6132. val = readl_relaxed(reg);
  6133. - if (debounce)
  6134. + if (enable)
  6135. val |= l;
  6136. else
  6137. val &= ~l;
  6138. bank->dbck_enable_mask = val;
  6139. writel_relaxed(val, reg);
  6140. - clk_disable_unprepare(bank->dbck);
  6141. + clk_disable(bank->dbck);
  6142. /*
  6143. * Enable debounce clock per module.
  6144. * This call is mandatory because in omap_gpio_request() when
  6145. @@ -285,7 +287,7 @@ static void omap_clear_gpio_debounce(struct gpio_bank *bank, unsigned offset)
  6146. bank->context.debounce = 0;
  6147. writel_relaxed(bank->context.debounce, bank->base +
  6148. bank->regs->debounce);
  6149. - clk_disable_unprepare(bank->dbck);
  6150. + clk_disable(bank->dbck);
  6151. bank->dbck_enabled = false;
  6152. }
  6153. }
  6154. @@ -488,9 +490,6 @@ static int omap_gpio_irq_type(struct irq_data *d, unsigned type)
  6155. unsigned long flags;
  6156. unsigned offset = d->hwirq;
  6157. - if (!BANK_USED(bank))
  6158. - pm_runtime_get_sync(bank->dev);
  6159. -
  6160. if (type & ~IRQ_TYPE_SENSE_MASK)
  6161. return -EINVAL;
  6162. @@ -498,20 +497,28 @@ static int omap_gpio_irq_type(struct irq_data *d, unsigned type)
  6163. (type & (IRQ_TYPE_LEVEL_LOW|IRQ_TYPE_LEVEL_HIGH)))
  6164. return -EINVAL;
  6165. - spin_lock_irqsave(&bank->lock, flags);
  6166. + raw_spin_lock_irqsave(&bank->lock, flags);
  6167. retval = omap_set_gpio_triggering(bank, offset, type);
  6168. + if (retval) {
  6169. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6170. + goto error;
  6171. + }
  6172. omap_gpio_init_irq(bank, offset);
  6173. if (!omap_gpio_is_input(bank, offset)) {
  6174. - spin_unlock_irqrestore(&bank->lock, flags);
  6175. - return -EINVAL;
  6176. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6177. + retval = -EINVAL;
  6178. + goto error;
  6179. }
  6180. - spin_unlock_irqrestore(&bank->lock, flags);
  6181. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6182. if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
  6183. __irq_set_handler_locked(d->irq, handle_level_irq);
  6184. else if (type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
  6185. __irq_set_handler_locked(d->irq, handle_edge_irq);
  6186. + return 0;
  6187. +
  6188. +error:
  6189. return retval;
  6190. }
  6191. @@ -626,34 +633,30 @@ static int omap_set_gpio_wakeup(struct gpio_bank *bank, unsigned offset,
  6192. return -EINVAL;
  6193. }
  6194. - spin_lock_irqsave(&bank->lock, flags);
  6195. + raw_spin_lock_irqsave(&bank->lock, flags);
  6196. if (enable)
  6197. bank->context.wake_en |= gpio_bit;
  6198. else
  6199. bank->context.wake_en &= ~gpio_bit;
  6200. writel_relaxed(bank->context.wake_en, bank->base + bank->regs->wkup_en);
  6201. - spin_unlock_irqrestore(&bank->lock, flags);
  6202. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6203. return 0;
  6204. }
  6205. -static void omap_reset_gpio(struct gpio_bank *bank, unsigned offset)
  6206. -{
  6207. - omap_set_gpio_direction(bank, offset, 1);
  6208. - omap_set_gpio_irqenable(bank, offset, 0);
  6209. - omap_clear_gpio_irqstatus(bank, offset);
  6210. - omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
  6211. - omap_clear_gpio_debounce(bank, offset);
  6212. -}
  6213. -
  6214. /* Use disable_irq_wake() and enable_irq_wake() functions from drivers */
  6215. static int omap_gpio_wake_enable(struct irq_data *d, unsigned int enable)
  6216. {
  6217. struct gpio_bank *bank = omap_irq_data_get_bank(d);
  6218. unsigned offset = d->hwirq;
  6219. + int ret;
  6220. +
  6221. + ret = omap_set_gpio_wakeup(bank, offset, enable);
  6222. + if (!ret)
  6223. + ret = irq_set_irq_wake(bank->irq, enable);
  6224. - return omap_set_gpio_wakeup(bank, offset, enable);
  6225. + return ret;
  6226. }
  6227. static int omap_gpio_request(struct gpio_chip *chip, unsigned offset)
  6228. @@ -668,17 +671,10 @@ static int omap_gpio_request(struct gpio_chip *chip, unsigned offset)
  6229. if (!BANK_USED(bank))
  6230. pm_runtime_get_sync(bank->dev);
  6231. - spin_lock_irqsave(&bank->lock, flags);
  6232. - /* Set trigger to none. You need to enable the desired trigger with
  6233. - * request_irq() or set_irq_type(). Only do this if the IRQ line has
  6234. - * not already been requested.
  6235. - */
  6236. - if (!LINE_USED(bank->irq_usage, offset)) {
  6237. - omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
  6238. - omap_enable_gpio_module(bank, offset);
  6239. - }
  6240. + raw_spin_lock_irqsave(&bank->lock, flags);
  6241. + omap_enable_gpio_module(bank, offset);
  6242. bank->mod_usage |= BIT(offset);
  6243. - spin_unlock_irqrestore(&bank->lock, flags);
  6244. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6245. return 0;
  6246. }
  6247. @@ -688,11 +684,14 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset)
  6248. struct gpio_bank *bank = container_of(chip, struct gpio_bank, chip);
  6249. unsigned long flags;
  6250. - spin_lock_irqsave(&bank->lock, flags);
  6251. + raw_spin_lock_irqsave(&bank->lock, flags);
  6252. bank->mod_usage &= ~(BIT(offset));
  6253. + if (!LINE_USED(bank->irq_usage, offset)) {
  6254. + omap_set_gpio_direction(bank, offset, 1);
  6255. + omap_clear_gpio_debounce(bank, offset);
  6256. + }
  6257. omap_disable_gpio_module(bank, offset);
  6258. - omap_reset_gpio(bank, offset);
  6259. - spin_unlock_irqrestore(&bank->lock, flags);
  6260. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6261. /*
  6262. * If this is the last gpio to be freed in the bank,
  6263. @@ -711,29 +710,27 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset)
  6264. * line's interrupt handler has been run, we may miss some nested
  6265. * interrupts.
  6266. */
  6267. -static void omap_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
  6268. +static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank)
  6269. {
  6270. void __iomem *isr_reg = NULL;
  6271. u32 isr;
  6272. unsigned int bit;
  6273. - struct gpio_bank *bank;
  6274. - int unmasked = 0;
  6275. - struct irq_chip *irqchip = irq_desc_get_chip(desc);
  6276. - struct gpio_chip *chip = irq_get_handler_data(irq);
  6277. + struct gpio_bank *bank = gpiobank;
  6278. + unsigned long wa_lock_flags;
  6279. + unsigned long lock_flags;
  6280. - chained_irq_enter(irqchip, desc);
  6281. -
  6282. - bank = container_of(chip, struct gpio_bank, chip);
  6283. isr_reg = bank->base + bank->regs->irqstatus;
  6284. - pm_runtime_get_sync(bank->dev);
  6285. -
  6286. if (WARN_ON(!isr_reg))
  6287. goto exit;
  6288. + pm_runtime_get_sync(bank->dev);
  6289. +
  6290. while (1) {
  6291. u32 isr_saved, level_mask = 0;
  6292. u32 enabled;
  6293. + raw_spin_lock_irqsave(&bank->lock, lock_flags);
  6294. +
  6295. enabled = omap_get_gpio_irqbank_mask(bank);
  6296. isr_saved = isr = readl_relaxed(isr_reg) & enabled;
  6297. @@ -747,12 +744,7 @@ static void omap_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
  6298. omap_clear_gpio_irqbank(bank, isr_saved & ~level_mask);
  6299. omap_enable_gpio_irqbank(bank, isr_saved & ~level_mask);
  6300. - /* if there is only edge sensitive GPIO pin interrupts
  6301. - configured, we could unmask GPIO bank interrupt immediately */
  6302. - if (!level_mask && !unmasked) {
  6303. - unmasked = 1;
  6304. - chained_irq_exit(irqchip, desc);
  6305. - }
  6306. + raw_spin_unlock_irqrestore(&bank->lock, lock_flags);
  6307. if (!isr)
  6308. break;
  6309. @@ -761,6 +753,7 @@ static void omap_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
  6310. bit = __ffs(isr);
  6311. isr &= ~(BIT(bit));
  6312. + raw_spin_lock_irqsave(&bank->lock, lock_flags);
  6313. /*
  6314. * Some chips can't respond to both rising and falling
  6315. * at the same time. If this irq was requested with
  6316. @@ -771,18 +764,20 @@ static void omap_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
  6317. if (bank->toggle_mask & (BIT(bit)))
  6318. omap_toggle_gpio_edge_triggering(bank, bit);
  6319. + raw_spin_unlock_irqrestore(&bank->lock, lock_flags);
  6320. +
  6321. + raw_spin_lock_irqsave(&bank->wa_lock, wa_lock_flags);
  6322. +
  6323. generic_handle_irq(irq_find_mapping(bank->chip.irqdomain,
  6324. bit));
  6325. +
  6326. + raw_spin_unlock_irqrestore(&bank->wa_lock,
  6327. + wa_lock_flags);
  6328. }
  6329. }
  6330. - /* if bank has any level sensitive GPIO pin interrupt
  6331. - configured, we must unmask the bank interrupt only after
  6332. - handler(s) are executed in order to avoid spurious bank
  6333. - interrupt */
  6334. exit:
  6335. - if (!unmasked)
  6336. - chained_irq_exit(irqchip, desc);
  6337. pm_runtime_put(bank->dev);
  6338. + return IRQ_HANDLED;
  6339. }
  6340. static unsigned int omap_gpio_irq_startup(struct irq_data *d)
  6341. @@ -791,15 +786,22 @@ static unsigned int omap_gpio_irq_startup(struct irq_data *d)
  6342. unsigned long flags;
  6343. unsigned offset = d->hwirq;
  6344. - if (!BANK_USED(bank))
  6345. - pm_runtime_get_sync(bank->dev);
  6346. + raw_spin_lock_irqsave(&bank->lock, flags);
  6347. - spin_lock_irqsave(&bank->lock, flags);
  6348. - omap_gpio_init_irq(bank, offset);
  6349. - spin_unlock_irqrestore(&bank->lock, flags);
  6350. + if (!LINE_USED(bank->mod_usage, offset))
  6351. + omap_set_gpio_direction(bank, offset, 1);
  6352. + else if (!omap_gpio_is_input(bank, offset))
  6353. + goto err;
  6354. + omap_enable_gpio_module(bank, offset);
  6355. + bank->irq_usage |= BIT(offset);
  6356. +
  6357. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6358. omap_gpio_unmask_irq(d);
  6359. return 0;
  6360. +err:
  6361. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6362. + return -EINVAL;
  6363. }
  6364. static void omap_gpio_irq_shutdown(struct irq_data *d)
  6365. @@ -808,11 +810,28 @@ static void omap_gpio_irq_shutdown(struct irq_data *d)
  6366. unsigned long flags;
  6367. unsigned offset = d->hwirq;
  6368. - spin_lock_irqsave(&bank->lock, flags);
  6369. + raw_spin_lock_irqsave(&bank->lock, flags);
  6370. bank->irq_usage &= ~(BIT(offset));
  6371. + omap_set_gpio_irqenable(bank, offset, 0);
  6372. + omap_clear_gpio_irqstatus(bank, offset);
  6373. + omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
  6374. + if (!LINE_USED(bank->mod_usage, offset))
  6375. + omap_clear_gpio_debounce(bank, offset);
  6376. omap_disable_gpio_module(bank, offset);
  6377. - omap_reset_gpio(bank, offset);
  6378. - spin_unlock_irqrestore(&bank->lock, flags);
  6379. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6380. +}
  6381. +
  6382. +static void omap_gpio_irq_bus_lock(struct irq_data *data)
  6383. +{
  6384. + struct gpio_bank *bank = omap_irq_data_get_bank(data);
  6385. +
  6386. + if (!BANK_USED(bank))
  6387. + pm_runtime_get_sync(bank->dev);
  6388. +}
  6389. +
  6390. +static void gpio_irq_bus_sync_unlock(struct irq_data *data)
  6391. +{
  6392. + struct gpio_bank *bank = omap_irq_data_get_bank(data);
  6393. /*
  6394. * If this is the last IRQ to be freed in the bank,
  6395. @@ -836,10 +855,10 @@ static void omap_gpio_mask_irq(struct irq_data *d)
  6396. unsigned offset = d->hwirq;
  6397. unsigned long flags;
  6398. - spin_lock_irqsave(&bank->lock, flags);
  6399. + raw_spin_lock_irqsave(&bank->lock, flags);
  6400. omap_set_gpio_irqenable(bank, offset, 0);
  6401. omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
  6402. - spin_unlock_irqrestore(&bank->lock, flags);
  6403. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6404. }
  6405. static void omap_gpio_unmask_irq(struct irq_data *d)
  6406. @@ -849,7 +868,7 @@ static void omap_gpio_unmask_irq(struct irq_data *d)
  6407. u32 trigger = irqd_get_trigger_type(d);
  6408. unsigned long flags;
  6409. - spin_lock_irqsave(&bank->lock, flags);
  6410. + raw_spin_lock_irqsave(&bank->lock, flags);
  6411. if (trigger)
  6412. omap_set_gpio_triggering(bank, offset, trigger);
  6413. @@ -861,7 +880,7 @@ static void omap_gpio_unmask_irq(struct irq_data *d)
  6414. }
  6415. omap_set_gpio_irqenable(bank, offset, 1);
  6416. - spin_unlock_irqrestore(&bank->lock, flags);
  6417. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6418. }
  6419. /*---------------------------------------------------------------------*/
  6420. @@ -874,9 +893,9 @@ static int omap_mpuio_suspend_noirq(struct device *dev)
  6421. OMAP_MPUIO_GPIO_MASKIT / bank->stride;
  6422. unsigned long flags;
  6423. - spin_lock_irqsave(&bank->lock, flags);
  6424. + raw_spin_lock_irqsave(&bank->lock, flags);
  6425. writel_relaxed(0xffff & ~bank->context.wake_en, mask_reg);
  6426. - spin_unlock_irqrestore(&bank->lock, flags);
  6427. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6428. return 0;
  6429. }
  6430. @@ -889,9 +908,9 @@ static int omap_mpuio_resume_noirq(struct device *dev)
  6431. OMAP_MPUIO_GPIO_MASKIT / bank->stride;
  6432. unsigned long flags;
  6433. - spin_lock_irqsave(&bank->lock, flags);
  6434. + raw_spin_lock_irqsave(&bank->lock, flags);
  6435. writel_relaxed(bank->context.wake_en, mask_reg);
  6436. - spin_unlock_irqrestore(&bank->lock, flags);
  6437. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6438. return 0;
  6439. }
  6440. @@ -937,9 +956,9 @@ static int omap_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
  6441. bank = container_of(chip, struct gpio_bank, chip);
  6442. reg = bank->base + bank->regs->direction;
  6443. - spin_lock_irqsave(&bank->lock, flags);
  6444. + raw_spin_lock_irqsave(&bank->lock, flags);
  6445. dir = !!(readl_relaxed(reg) & BIT(offset));
  6446. - spin_unlock_irqrestore(&bank->lock, flags);
  6447. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6448. return dir;
  6449. }
  6450. @@ -949,9 +968,9 @@ static int omap_gpio_input(struct gpio_chip *chip, unsigned offset)
  6451. unsigned long flags;
  6452. bank = container_of(chip, struct gpio_bank, chip);
  6453. - spin_lock_irqsave(&bank->lock, flags);
  6454. + raw_spin_lock_irqsave(&bank->lock, flags);
  6455. omap_set_gpio_direction(bank, offset, 1);
  6456. - spin_unlock_irqrestore(&bank->lock, flags);
  6457. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6458. return 0;
  6459. }
  6460. @@ -973,10 +992,10 @@ static int omap_gpio_output(struct gpio_chip *chip, unsigned offset, int value)
  6461. unsigned long flags;
  6462. bank = container_of(chip, struct gpio_bank, chip);
  6463. - spin_lock_irqsave(&bank->lock, flags);
  6464. + raw_spin_lock_irqsave(&bank->lock, flags);
  6465. bank->set_dataout(bank, offset, value);
  6466. omap_set_gpio_direction(bank, offset, 0);
  6467. - spin_unlock_irqrestore(&bank->lock, flags);
  6468. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6469. return 0;
  6470. }
  6471. @@ -988,9 +1007,9 @@ static int omap_gpio_debounce(struct gpio_chip *chip, unsigned offset,
  6472. bank = container_of(chip, struct gpio_bank, chip);
  6473. - spin_lock_irqsave(&bank->lock, flags);
  6474. + raw_spin_lock_irqsave(&bank->lock, flags);
  6475. omap2_set_gpio_debounce(bank, offset, debounce);
  6476. - spin_unlock_irqrestore(&bank->lock, flags);
  6477. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6478. return 0;
  6479. }
  6480. @@ -1001,9 +1020,9 @@ static void omap_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
  6481. unsigned long flags;
  6482. bank = container_of(chip, struct gpio_bank, chip);
  6483. - spin_lock_irqsave(&bank->lock, flags);
  6484. + raw_spin_lock_irqsave(&bank->lock, flags);
  6485. bank->set_dataout(bank, offset, value);
  6486. - spin_unlock_irqrestore(&bank->lock, flags);
  6487. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6488. }
  6489. /*---------------------------------------------------------------------*/
  6490. @@ -1048,10 +1067,6 @@ static void omap_gpio_mod_init(struct gpio_bank *bank)
  6491. /* Initialize interface clk ungated, module enabled */
  6492. if (bank->regs->ctrl)
  6493. writel_relaxed(0, base + bank->regs->ctrl);
  6494. -
  6495. - bank->dbck = clk_get(bank->dev, "dbclk");
  6496. - if (IS_ERR(bank->dbck))
  6497. - dev_err(bank->dev, "Could not get gpio dbck\n");
  6498. }
  6499. static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
  6500. @@ -1080,7 +1095,6 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
  6501. } else {
  6502. bank->chip.label = "gpio";
  6503. bank->chip.base = gpio;
  6504. - gpio += bank->width;
  6505. }
  6506. bank->chip.ngpio = bank->width;
  6507. @@ -1090,6 +1104,9 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
  6508. return ret;
  6509. }
  6510. + if (!bank->is_mpuio)
  6511. + gpio += bank->width;
  6512. +
  6513. #ifdef CONFIG_ARCH_OMAP1
  6514. /*
  6515. * REVISIT: Once we have OMAP1 supporting SPARSE_IRQ, we can drop
  6516. @@ -1112,7 +1129,7 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
  6517. }
  6518. ret = gpiochip_irqchip_add(&bank->chip, irqc,
  6519. - irq_base, omap_gpio_irq_handler,
  6520. + irq_base, handle_bad_irq,
  6521. IRQ_TYPE_NONE);
  6522. if (ret) {
  6523. @@ -1121,10 +1138,14 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
  6524. return -ENODEV;
  6525. }
  6526. - gpiochip_set_chained_irqchip(&bank->chip, irqc,
  6527. - bank->irq, omap_gpio_irq_handler);
  6528. + gpiochip_set_chained_irqchip(&bank->chip, irqc, bank->irq, NULL);
  6529. - return 0;
  6530. + ret = devm_request_irq(bank->dev, bank->irq, omap_gpio_irq_handler,
  6531. + 0, dev_name(bank->dev), bank);
  6532. + if (ret)
  6533. + gpiochip_remove(&bank->chip);
  6534. +
  6535. + return ret;
  6536. }
  6537. static const struct of_device_id omap_gpio_match[];
  6538. @@ -1163,17 +1184,23 @@ static int omap_gpio_probe(struct platform_device *pdev)
  6539. irqc->irq_unmask = omap_gpio_unmask_irq,
  6540. irqc->irq_set_type = omap_gpio_irq_type,
  6541. irqc->irq_set_wake = omap_gpio_wake_enable,
  6542. + irqc->irq_bus_lock = omap_gpio_irq_bus_lock,
  6543. + irqc->irq_bus_sync_unlock = gpio_irq_bus_sync_unlock,
  6544. irqc->name = dev_name(&pdev->dev);
  6545. - res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
  6546. - if (unlikely(!res)) {
  6547. - dev_err(dev, "Invalid IRQ resource\n");
  6548. - return -ENODEV;
  6549. + bank->irq = platform_get_irq(pdev, 0);
  6550. + if (bank->irq <= 0) {
  6551. + if (!bank->irq)
  6552. + bank->irq = -ENXIO;
  6553. + if (bank->irq != -EPROBE_DEFER)
  6554. + dev_err(dev,
  6555. + "can't get irq resource ret=%d\n", bank->irq);
  6556. + return bank->irq;
  6557. }
  6558. - bank->irq = res->start;
  6559. bank->dev = dev;
  6560. bank->chip.dev = dev;
  6561. + bank->chip.owner = THIS_MODULE;
  6562. bank->dbck_flag = pdata->dbck_flag;
  6563. bank->stride = pdata->bank_stride;
  6564. bank->width = pdata->bank_width;
  6565. @@ -1183,15 +1210,9 @@ static int omap_gpio_probe(struct platform_device *pdev)
  6566. #ifdef CONFIG_OF_GPIO
  6567. bank->chip.of_node = of_node_get(node);
  6568. #endif
  6569. - if (node) {
  6570. - if (!of_property_read_bool(node, "ti,gpio-always-on"))
  6571. - bank->loses_context = true;
  6572. - } else {
  6573. - bank->loses_context = pdata->loses_context;
  6574. -
  6575. - if (bank->loses_context)
  6576. - bank->get_context_loss_count =
  6577. - pdata->get_context_loss_count;
  6578. + if (!node) {
  6579. + bank->get_context_loss_count =
  6580. + pdata->get_context_loss_count;
  6581. }
  6582. if (bank->regs->set_dataout && bank->regs->clr_dataout)
  6583. @@ -1199,16 +1220,27 @@ static int omap_gpio_probe(struct platform_device *pdev)
  6584. else
  6585. bank->set_dataout = omap_set_gpio_dataout_mask;
  6586. - spin_lock_init(&bank->lock);
  6587. + raw_spin_lock_init(&bank->lock);
  6588. + raw_spin_lock_init(&bank->wa_lock);
  6589. /* Static mapping, never released */
  6590. res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
  6591. bank->base = devm_ioremap_resource(dev, res);
  6592. if (IS_ERR(bank->base)) {
  6593. - irq_domain_remove(bank->chip.irqdomain);
  6594. return PTR_ERR(bank->base);
  6595. }
  6596. + if (bank->dbck_flag) {
  6597. + bank->dbck = devm_clk_get(bank->dev, "dbclk");
  6598. + if (IS_ERR(bank->dbck)) {
  6599. + dev_err(bank->dev,
  6600. + "Could not get gpio dbck. Disable debounce\n");
  6601. + bank->dbck_flag = false;
  6602. + } else {
  6603. + clk_prepare(bank->dbck);
  6604. + }
  6605. + }
  6606. +
  6607. platform_set_drvdata(pdev, bank);
  6608. pm_runtime_enable(bank->dev);
  6609. @@ -1221,8 +1253,11 @@ static int omap_gpio_probe(struct platform_device *pdev)
  6610. omap_gpio_mod_init(bank);
  6611. ret = omap_gpio_chip_init(bank, irqc);
  6612. - if (ret)
  6613. + if (ret) {
  6614. + pm_runtime_put_sync(bank->dev);
  6615. + pm_runtime_disable(bank->dev);
  6616. return ret;
  6617. + }
  6618. omap_gpio_show_rev(bank);
  6619. @@ -1233,6 +1268,19 @@ static int omap_gpio_probe(struct platform_device *pdev)
  6620. return 0;
  6621. }
  6622. +static int omap_gpio_remove(struct platform_device *pdev)
  6623. +{
  6624. + struct gpio_bank *bank = platform_get_drvdata(pdev);
  6625. +
  6626. + list_del(&bank->node);
  6627. + gpiochip_remove(&bank->chip);
  6628. + pm_runtime_disable(bank->dev);
  6629. + if (bank->dbck_flag)
  6630. + clk_unprepare(bank->dbck);
  6631. +
  6632. + return 0;
  6633. +}
  6634. +
  6635. #ifdef CONFIG_ARCH_OMAP2PLUS
  6636. #if defined(CONFIG_PM)
  6637. @@ -1246,7 +1294,7 @@ static int omap_gpio_runtime_suspend(struct device *dev)
  6638. unsigned long flags;
  6639. u32 wake_low, wake_hi;
  6640. - spin_lock_irqsave(&bank->lock, flags);
  6641. + raw_spin_lock_irqsave(&bank->lock, flags);
  6642. /*
  6643. * Only edges can generate a wakeup event to the PRCM.
  6644. @@ -1299,7 +1347,7 @@ update_gpio_context_count:
  6645. bank->get_context_loss_count(bank->dev);
  6646. omap_gpio_dbck_disable(bank);
  6647. - spin_unlock_irqrestore(&bank->lock, flags);
  6648. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6649. return 0;
  6650. }
  6651. @@ -1314,14 +1362,14 @@ static int omap_gpio_runtime_resume(struct device *dev)
  6652. unsigned long flags;
  6653. int c;
  6654. - spin_lock_irqsave(&bank->lock, flags);
  6655. + raw_spin_lock_irqsave(&bank->lock, flags);
  6656. /*
  6657. * On the first resume during the probe, the context has not
  6658. * been initialised and so initialise it now. Also initialise
  6659. * the context loss count.
  6660. */
  6661. - if (bank->loses_context && !bank->context_valid) {
  6662. + if (!bank->context_valid) {
  6663. omap_gpio_init_context(bank);
  6664. if (bank->get_context_loss_count)
  6665. @@ -1342,22 +1390,20 @@ static int omap_gpio_runtime_resume(struct device *dev)
  6666. writel_relaxed(bank->context.risingdetect,
  6667. bank->base + bank->regs->risingdetect);
  6668. - if (bank->loses_context) {
  6669. - if (!bank->get_context_loss_count) {
  6670. + if (!bank->get_context_loss_count) {
  6671. + omap_gpio_restore_context(bank);
  6672. + } else {
  6673. + c = bank->get_context_loss_count(bank->dev);
  6674. + if (c != bank->context_loss_count) {
  6675. omap_gpio_restore_context(bank);
  6676. } else {
  6677. - c = bank->get_context_loss_count(bank->dev);
  6678. - if (c != bank->context_loss_count) {
  6679. - omap_gpio_restore_context(bank);
  6680. - } else {
  6681. - spin_unlock_irqrestore(&bank->lock, flags);
  6682. - return 0;
  6683. - }
  6684. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6685. + return 0;
  6686. }
  6687. }
  6688. if (!bank->workaround_enabled) {
  6689. - spin_unlock_irqrestore(&bank->lock, flags);
  6690. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6691. return 0;
  6692. }
  6693. @@ -1412,18 +1458,19 @@ static int omap_gpio_runtime_resume(struct device *dev)
  6694. }
  6695. bank->workaround_enabled = false;
  6696. - spin_unlock_irqrestore(&bank->lock, flags);
  6697. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6698. return 0;
  6699. }
  6700. #endif /* CONFIG_PM */
  6701. +#if IS_BUILTIN(CONFIG_GPIO_OMAP)
  6702. void omap2_gpio_prepare_for_idle(int pwr_mode)
  6703. {
  6704. struct gpio_bank *bank;
  6705. list_for_each_entry(bank, &omap_gpio_list, node) {
  6706. - if (!BANK_USED(bank) || !bank->loses_context)
  6707. + if (!BANK_USED(bank))
  6708. continue;
  6709. bank->power_mode = pwr_mode;
  6710. @@ -1437,12 +1484,13 @@ void omap2_gpio_resume_after_idle(void)
  6711. struct gpio_bank *bank;
  6712. list_for_each_entry(bank, &omap_gpio_list, node) {
  6713. - if (!BANK_USED(bank) || !bank->loses_context)
  6714. + if (!BANK_USED(bank))
  6715. continue;
  6716. pm_runtime_get_sync(bank->dev);
  6717. }
  6718. }
  6719. +#endif
  6720. #if defined(CONFIG_PM)
  6721. static void omap_gpio_init_context(struct gpio_bank *p)
  6722. @@ -1598,6 +1646,7 @@ MODULE_DEVICE_TABLE(of, omap_gpio_match);
  6723. static struct platform_driver omap_gpio_driver = {
  6724. .probe = omap_gpio_probe,
  6725. + .remove = omap_gpio_remove,
  6726. .driver = {
  6727. .name = "omap_gpio",
  6728. .pm = &gpio_pm_ops,
  6729. @@ -1615,3 +1664,13 @@ static int __init omap_gpio_drv_reg(void)
  6730. return platform_driver_register(&omap_gpio_driver);
  6731. }
  6732. postcore_initcall(omap_gpio_drv_reg);
  6733. +
  6734. +static void __exit omap_gpio_exit(void)
  6735. +{
  6736. + platform_driver_unregister(&omap_gpio_driver);
  6737. +}
  6738. +module_exit(omap_gpio_exit);
  6739. +
  6740. +MODULE_DESCRIPTION("omap gpio driver");
  6741. +MODULE_ALIAS("platform:gpio-omap");
  6742. +MODULE_LICENSE("GPL v2");
  6743. diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  6744. index 479024a4caad..a67a351e88ab 100644
  6745. --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  6746. +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  6747. @@ -32,6 +32,7 @@
  6748. #include "i915_trace.h"
  6749. #include "intel_drv.h"
  6750. #include <linux/dma_remapping.h>
  6751. +#include <linux/uaccess.h>
  6752. #define __EXEC_OBJECT_HAS_PIN (1<<31)
  6753. #define __EXEC_OBJECT_HAS_FENCE (1<<30)
  6754. @@ -465,7 +466,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
  6755. }
  6756. /* We can't wait for rendering with pagefaults disabled */
  6757. - if (obj->active && in_atomic())
  6758. + if (obj->active && pagefault_disabled())
  6759. return -EFAULT;
  6760. if (use_cpu_reloc(obj))
  6761. @@ -1338,7 +1339,9 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
  6762. return ret;
  6763. }
  6764. +#ifndef CONFIG_PREEMPT_RT_BASE
  6765. trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
  6766. +#endif
  6767. i915_gem_execbuffer_move_to_active(vmas, ring);
  6768. i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
  6769. diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
  6770. index 7ab9cc456dd2..e06515f4eb7c 100644
  6771. --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
  6772. +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
  6773. @@ -39,7 +39,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
  6774. if (!mutex_is_locked(mutex))
  6775. return false;
  6776. -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
  6777. +#if (defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)) && !defined(CONFIG_PREEMPT_RT_BASE)
  6778. return mutex->owner == task;
  6779. #else
  6780. /* Since UP may be pre-empted, we cannot assume that we own the lock */
  6781. diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
  6782. index b0df8d10482a..8d34df020842 100644
  6783. --- a/drivers/gpu/drm/i915/i915_irq.c
  6784. +++ b/drivers/gpu/drm/i915/i915_irq.c
  6785. @@ -676,6 +676,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, int pipe,
  6786. spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  6787. /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
  6788. + preempt_disable_rt();
  6789. /* Get optional system timestamp before query. */
  6790. if (stime)
  6791. @@ -727,6 +728,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, int pipe,
  6792. *etime = ktime_get();
  6793. /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
  6794. + preempt_enable_rt();
  6795. spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
  6796. diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
  6797. index 56323732c748..d1f1781392bf 100644
  6798. --- a/drivers/gpu/drm/i915/intel_display.c
  6799. +++ b/drivers/gpu/drm/i915/intel_display.c
  6800. @@ -10084,7 +10084,7 @@ void intel_check_page_flip(struct drm_device *dev, int pipe)
  6801. struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
  6802. struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
  6803. - WARN_ON(!in_interrupt());
  6804. + WARN_ON_NONRT(!in_interrupt());
  6805. if (crtc == NULL)
  6806. return;
  6807. diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
  6808. index a4c0a04b5044..6da459fe20b2 100644
  6809. --- a/drivers/gpu/drm/i915/intel_sprite.c
  6810. +++ b/drivers/gpu/drm/i915/intel_sprite.c
  6811. @@ -37,6 +37,7 @@
  6812. #include "intel_drv.h"
  6813. #include <drm/i915_drm.h>
  6814. #include "i915_drv.h"
  6815. +#include <linux/locallock.h>
  6816. static bool
  6817. format_is_yuv(uint32_t format)
  6818. @@ -61,6 +62,8 @@ static int usecs_to_scanlines(const struct drm_display_mode *mode, int usecs)
  6819. return DIV_ROUND_UP(usecs * mode->crtc_clock, 1000 * mode->crtc_htotal);
  6820. }
  6821. +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock);
  6822. +
  6823. /**
  6824. * intel_pipe_update_start() - start update of a set of display registers
  6825. * @crtc: the crtc of which the registers are going to be updated
  6826. @@ -101,7 +104,7 @@ bool intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl_count)
  6827. if (WARN_ON(drm_crtc_vblank_get(&crtc->base)))
  6828. return false;
  6829. - local_irq_disable();
  6830. + local_lock_irq(pipe_update_lock);
  6831. trace_i915_pipe_update_start(crtc, min, max);
  6832. @@ -123,11 +126,11 @@ bool intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl_count)
  6833. break;
  6834. }
  6835. - local_irq_enable();
  6836. + local_unlock_irq(pipe_update_lock);
  6837. timeout = schedule_timeout(timeout);
  6838. - local_irq_disable();
  6839. + local_lock_irq(pipe_update_lock);
  6840. }
  6841. finish_wait(wq, &wait);
  6842. @@ -158,7 +161,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc, u32 start_vbl_count)
  6843. trace_i915_pipe_update_end(crtc, end_vbl_count);
  6844. - local_irq_enable();
  6845. + local_unlock_irq(pipe_update_lock);
  6846. if (start_vbl_count != end_vbl_count)
  6847. DRM_ERROR("Atomic update failure on pipe %c (start=%u end=%u)\n",
  6848. diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
  6849. index 6743174acdbc..8ad198bbc24d 100644
  6850. --- a/drivers/gpu/drm/radeon/radeon_display.c
  6851. +++ b/drivers/gpu/drm/radeon/radeon_display.c
  6852. @@ -1798,6 +1798,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int fl
  6853. struct radeon_device *rdev = dev->dev_private;
  6854. /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
  6855. + preempt_disable_rt();
  6856. /* Get optional system timestamp before query. */
  6857. if (stime)
  6858. @@ -1890,6 +1891,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int fl
  6859. *etime = ktime_get();
  6860. /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
  6861. + preempt_enable_rt();
  6862. /* Decode into vertical and horizontal scanout position. */
  6863. *vpos = position & 0x1fff;
  6864. diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
  6865. index 0e894193accf..2f9de5ecb6ed 100644
  6866. --- a/drivers/i2c/busses/i2c-omap.c
  6867. +++ b/drivers/i2c/busses/i2c-omap.c
  6868. @@ -996,15 +996,12 @@ omap_i2c_isr(int irq, void *dev_id)
  6869. u16 mask;
  6870. u16 stat;
  6871. - spin_lock(&dev->lock);
  6872. - mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
  6873. stat = omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG);
  6874. + mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
  6875. if (stat & mask)
  6876. ret = IRQ_WAKE_THREAD;
  6877. - spin_unlock(&dev->lock);
  6878. -
  6879. return ret;
  6880. }
  6881. diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
  6882. index 36f76e28a0bf..394f142f90c7 100644
  6883. --- a/drivers/ide/alim15x3.c
  6884. +++ b/drivers/ide/alim15x3.c
  6885. @@ -234,7 +234,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev)
  6886. isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
  6887. - local_irq_save(flags);
  6888. + local_irq_save_nort(flags);
  6889. if (m5229_revision < 0xC2) {
  6890. /*
  6891. @@ -325,7 +325,7 @@ out:
  6892. }
  6893. pci_dev_put(north);
  6894. pci_dev_put(isa_dev);
  6895. - local_irq_restore(flags);
  6896. + local_irq_restore_nort(flags);
  6897. return 0;
  6898. }
  6899. diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
  6900. index 696b6c1ec940..0d0a96629b73 100644
  6901. --- a/drivers/ide/hpt366.c
  6902. +++ b/drivers/ide/hpt366.c
  6903. @@ -1241,7 +1241,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif,
  6904. dma_old = inb(base + 2);
  6905. - local_irq_save(flags);
  6906. + local_irq_save_nort(flags);
  6907. dma_new = dma_old;
  6908. pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
  6909. @@ -1252,7 +1252,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif,
  6910. if (dma_new != dma_old)
  6911. outb(dma_new, base + 2);
  6912. - local_irq_restore(flags);
  6913. + local_irq_restore_nort(flags);
  6914. printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n",
  6915. hwif->name, base, base + 7);
  6916. diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c
  6917. index 19763977568c..4169433faab5 100644
  6918. --- a/drivers/ide/ide-io-std.c
  6919. +++ b/drivers/ide/ide-io-std.c
  6920. @@ -175,7 +175,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
  6921. unsigned long uninitialized_var(flags);
  6922. if ((io_32bit & 2) && !mmio) {
  6923. - local_irq_save(flags);
  6924. + local_irq_save_nort(flags);
  6925. ata_vlb_sync(io_ports->nsect_addr);
  6926. }
  6927. @@ -186,7 +186,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
  6928. insl(data_addr, buf, words);
  6929. if ((io_32bit & 2) && !mmio)
  6930. - local_irq_restore(flags);
  6931. + local_irq_restore_nort(flags);
  6932. if (((len + 1) & 3) < 2)
  6933. return;
  6934. @@ -219,7 +219,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
  6935. unsigned long uninitialized_var(flags);
  6936. if ((io_32bit & 2) && !mmio) {
  6937. - local_irq_save(flags);
  6938. + local_irq_save_nort(flags);
  6939. ata_vlb_sync(io_ports->nsect_addr);
  6940. }
  6941. @@ -230,7 +230,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
  6942. outsl(data_addr, buf, words);
  6943. if ((io_32bit & 2) && !mmio)
  6944. - local_irq_restore(flags);
  6945. + local_irq_restore_nort(flags);
  6946. if (((len + 1) & 3) < 2)
  6947. return;
  6948. diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
  6949. index 177db6d5b2f5..079ae6bebf18 100644
  6950. --- a/drivers/ide/ide-io.c
  6951. +++ b/drivers/ide/ide-io.c
  6952. @@ -659,7 +659,7 @@ void ide_timer_expiry (unsigned long data)
  6953. /* disable_irq_nosync ?? */
  6954. disable_irq(hwif->irq);
  6955. /* local CPU only, as if we were handling an interrupt */
  6956. - local_irq_disable();
  6957. + local_irq_disable_nort();
  6958. if (hwif->polling) {
  6959. startstop = handler(drive);
  6960. } else if (drive_is_ready(drive)) {
  6961. diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
  6962. index 376f2dc410c5..f014dd1b73dc 100644
  6963. --- a/drivers/ide/ide-iops.c
  6964. +++ b/drivers/ide/ide-iops.c
  6965. @@ -129,12 +129,12 @@ int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad,
  6966. if ((stat & ATA_BUSY) == 0)
  6967. break;
  6968. - local_irq_restore(flags);
  6969. + local_irq_restore_nort(flags);
  6970. *rstat = stat;
  6971. return -EBUSY;
  6972. }
  6973. }
  6974. - local_irq_restore(flags);
  6975. + local_irq_restore_nort(flags);
  6976. }
  6977. /*
  6978. * Allow status to settle, then read it again.
  6979. diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
  6980. index 0b63facd1d87..4ceba37afc0c 100644
  6981. --- a/drivers/ide/ide-probe.c
  6982. +++ b/drivers/ide/ide-probe.c
  6983. @@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id)
  6984. int bswap = 1;
  6985. /* local CPU only; some systems need this */
  6986. - local_irq_save(flags);
  6987. + local_irq_save_nort(flags);
  6988. /* read 512 bytes of id info */
  6989. hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
  6990. - local_irq_restore(flags);
  6991. + local_irq_restore_nort(flags);
  6992. drive->dev_flags |= IDE_DFLAG_ID_READ;
  6993. #ifdef DEBUG
  6994. diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
  6995. index dabb88b1cbec..2cecea72520a 100644
  6996. --- a/drivers/ide/ide-taskfile.c
  6997. +++ b/drivers/ide/ide-taskfile.c
  6998. @@ -250,7 +250,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd,
  6999. page_is_high = PageHighMem(page);
  7000. if (page_is_high)
  7001. - local_irq_save(flags);
  7002. + local_irq_save_nort(flags);
  7003. buf = kmap_atomic(page) + offset;
  7004. @@ -271,7 +271,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd,
  7005. kunmap_atomic(buf);
  7006. if (page_is_high)
  7007. - local_irq_restore(flags);
  7008. + local_irq_restore_nort(flags);
  7009. len -= nr_bytes;
  7010. }
  7011. @@ -414,7 +414,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive,
  7012. }
  7013. if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
  7014. - local_irq_disable();
  7015. + local_irq_disable_nort();
  7016. ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
  7017. diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  7018. index 0d23e0568deb..140c94ce71c5 100644
  7019. --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  7020. +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  7021. @@ -821,7 +821,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
  7022. ipoib_dbg_mcast(priv, "restarting multicast task\n");
  7023. - local_irq_save(flags);
  7024. + local_irq_save_nort(flags);
  7025. netif_addr_lock(dev);
  7026. spin_lock(&priv->lock);
  7027. @@ -903,7 +903,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
  7028. spin_unlock(&priv->lock);
  7029. netif_addr_unlock(dev);
  7030. - local_irq_restore(flags);
  7031. + local_irq_restore_nort(flags);
  7032. /*
  7033. * make sure the in-flight joins have finished before we attempt
  7034. diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
  7035. index e853a2134680..5b6aa39a1de7 100644
  7036. --- a/drivers/input/gameport/gameport.c
  7037. +++ b/drivers/input/gameport/gameport.c
  7038. @@ -124,12 +124,12 @@ static int old_gameport_measure_speed(struct gameport *gameport)
  7039. tx = 1 << 30;
  7040. for(i = 0; i < 50; i++) {
  7041. - local_irq_save(flags);
  7042. + local_irq_save_nort(flags);
  7043. GET_TIME(t1);
  7044. for (t = 0; t < 50; t++) gameport_read(gameport);
  7045. GET_TIME(t2);
  7046. GET_TIME(t3);
  7047. - local_irq_restore(flags);
  7048. + local_irq_restore_nort(flags);
  7049. udelay(i * 10);
  7050. if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t;
  7051. }
  7052. @@ -148,11 +148,11 @@ static int old_gameport_measure_speed(struct gameport *gameport)
  7053. tx = 1 << 30;
  7054. for(i = 0; i < 50; i++) {
  7055. - local_irq_save(flags);
  7056. + local_irq_save_nort(flags);
  7057. rdtscl(t1);
  7058. for (t = 0; t < 50; t++) gameport_read(gameport);
  7059. rdtscl(t2);
  7060. - local_irq_restore(flags);
  7061. + local_irq_restore_nort(flags);
  7062. udelay(i * 10);
  7063. if (t2 - t1 < tx) tx = t2 - t1;
  7064. }
  7065. diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
  7066. index 49794b47b51c..3d7245d6b2f8 100644
  7067. --- a/drivers/leds/trigger/Kconfig
  7068. +++ b/drivers/leds/trigger/Kconfig
  7069. @@ -61,7 +61,7 @@ config LEDS_TRIGGER_BACKLIGHT
  7070. config LEDS_TRIGGER_CPU
  7071. bool "LED CPU Trigger"
  7072. - depends on LEDS_TRIGGERS
  7073. + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE
  7074. help
  7075. This allows LEDs to be controlled by active CPUs. This shows
  7076. the active CPUs across an array of LEDs so you can see which
  7077. diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
  7078. index 4d200883c505..98b64ed5cb81 100644
  7079. --- a/drivers/md/bcache/Kconfig
  7080. +++ b/drivers/md/bcache/Kconfig
  7081. @@ -1,6 +1,7 @@
  7082. config BCACHE
  7083. tristate "Block device as cache"
  7084. + depends on !PREEMPT_RT_FULL
  7085. ---help---
  7086. Allows a block device to be used as cache for other devices; uses
  7087. a btree for indexing and the layout is optimized for SSDs.
  7088. diff --git a/drivers/md/dm.c b/drivers/md/dm.c
  7089. index 87de9a0848b7..86f64c13ccf6 100644
  7090. --- a/drivers/md/dm.c
  7091. +++ b/drivers/md/dm.c
  7092. @@ -2141,7 +2141,7 @@ static void dm_request_fn(struct request_queue *q)
  7093. /* Establish tio->ti before queuing work (map_tio_request) */
  7094. tio->ti = ti;
  7095. queue_kthread_work(&md->kworker, &tio->work);
  7096. - BUG_ON(!irqs_disabled());
  7097. + BUG_ON_NONRT(!irqs_disabled());
  7098. }
  7099. goto out;
  7100. diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
  7101. index ef0a99a3a779..4e60997ef19a 100644
  7102. --- a/drivers/md/raid5.c
  7103. +++ b/drivers/md/raid5.c
  7104. @@ -1918,8 +1918,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
  7105. struct raid5_percpu *percpu;
  7106. unsigned long cpu;
  7107. - cpu = get_cpu();
  7108. + cpu = get_cpu_light();
  7109. percpu = per_cpu_ptr(conf->percpu, cpu);
  7110. + spin_lock(&percpu->lock);
  7111. if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
  7112. ops_run_biofill(sh);
  7113. overlap_clear++;
  7114. @@ -1975,7 +1976,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
  7115. if (test_and_clear_bit(R5_Overlap, &dev->flags))
  7116. wake_up(&sh->raid_conf->wait_for_overlap);
  7117. }
  7118. - put_cpu();
  7119. + spin_unlock(&percpu->lock);
  7120. + put_cpu_light();
  7121. }
  7122. static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
  7123. @@ -6375,6 +6377,7 @@ static int raid5_alloc_percpu(struct r5conf *conf)
  7124. __func__, cpu);
  7125. break;
  7126. }
  7127. + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
  7128. }
  7129. put_online_cpus();
  7130. diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
  7131. index d31ed93bb8a9..82fc623bf0b0 100644
  7132. --- a/drivers/md/raid5.h
  7133. +++ b/drivers/md/raid5.h
  7134. @@ -495,6 +495,7 @@ struct r5conf {
  7135. int recovery_disabled;
  7136. /* per cpu variables */
  7137. struct raid5_percpu {
  7138. + spinlock_t lock; /* Protection for -RT */
  7139. struct page *spare_page; /* Used when checking P/Q in raid6 */
  7140. struct flex_array *scribble; /* space for constructing buffer
  7141. * lists and performing address
  7142. diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
  7143. index b3c10b7dae1f..b9d7f076f2f8 100644
  7144. --- a/drivers/misc/Kconfig
  7145. +++ b/drivers/misc/Kconfig
  7146. @@ -54,6 +54,7 @@ config AD525X_DPOT_SPI
  7147. config ATMEL_TCLIB
  7148. bool "Atmel AT32/AT91 Timer/Counter Library"
  7149. depends on (AVR32 || ARCH_AT91)
  7150. + default y if PREEMPT_RT_FULL
  7151. help
  7152. Select this if you want a library to allocate the Timer/Counter
  7153. blocks found on many Atmel processors. This facilitates using
  7154. @@ -69,8 +70,7 @@ config ATMEL_TCB_CLKSRC
  7155. are combined to make a single 32-bit timer.
  7156. When GENERIC_CLOCKEVENTS is defined, the third timer channel
  7157. - may be used as a clock event device supporting oneshot mode
  7158. - (delays of up to two seconds) based on the 32 KiHz clock.
  7159. + may be used as a clock event device supporting oneshot mode.
  7160. config ATMEL_TCB_CLKSRC_BLOCK
  7161. int
  7162. @@ -84,6 +84,15 @@ config ATMEL_TCB_CLKSRC_BLOCK
  7163. TC can be used for other purposes, such as PWM generation and
  7164. interval timing.
  7165. +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  7166. + bool "TC Block use 32 KiHz clock"
  7167. + depends on ATMEL_TCB_CLKSRC
  7168. + default y if !PREEMPT_RT_FULL
  7169. + help
  7170. + Select this to use 32 KiHz base clock rate as TC block clock
  7171. + source for clock events.
  7172. +
  7173. +
  7174. config DUMMY_IRQ
  7175. tristate "Dummy IRQ handler"
  7176. default n
  7177. @@ -113,6 +122,35 @@ config IBM_ASM
  7178. for information on the specific driver level and support statement
  7179. for your IBM server.
  7180. +config HWLAT_DETECTOR
  7181. + tristate "Testing module to detect hardware-induced latencies"
  7182. + depends on DEBUG_FS
  7183. + depends on RING_BUFFER
  7184. + default m
  7185. + ---help---
  7186. + A simple hardware latency detector. Use this module to detect
  7187. + large latencies introduced by the behavior of the underlying
  7188. + system firmware external to Linux. We do this using periodic
  7189. + use of stop_machine to grab all available CPUs and measure
  7190. + for unexplainable gaps in the CPU timestamp counter(s). By
  7191. + default, the module is not enabled until the "enable" file
  7192. + within the "hwlat_detector" debugfs directory is toggled.
  7193. +
  7194. + This module is often used to detect SMI (System Management
  7195. + Interrupts) on x86 systems, though is not x86 specific. To
  7196. + this end, we default to using a sample window of 1 second,
  7197. + during which we will sample for 0.5 seconds. If an SMI or
  7198. + similar event occurs during that time, it is recorded
  7199. + into an 8K samples global ring buffer until retreived.
  7200. +
  7201. + WARNING: This software should never be enabled (it can be built
  7202. + but should not be turned on after it is loaded) in a production
  7203. + environment where high latencies are a concern since the
  7204. + sampling mechanism actually introduces latencies for
  7205. + regular tasks while the CPU(s) are being held.
  7206. +
  7207. + If unsure, say N
  7208. +
  7209. config PHANTOM
  7210. tristate "Sensable PHANToM (PCI)"
  7211. depends on PCI
  7212. diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
  7213. index 7d5c4cd118c4..6a8e39388cf9 100644
  7214. --- a/drivers/misc/Makefile
  7215. +++ b/drivers/misc/Makefile
  7216. @@ -38,6 +38,7 @@ obj-$(CONFIG_C2PORT) += c2port/
  7217. obj-$(CONFIG_HMC6352) += hmc6352.o
  7218. obj-y += eeprom/
  7219. obj-y += cb710/
  7220. +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o
  7221. obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o
  7222. obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o
  7223. obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o
  7224. diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c
  7225. new file mode 100644
  7226. index 000000000000..2429c4331e68
  7227. --- /dev/null
  7228. +++ b/drivers/misc/hwlat_detector.c
  7229. @@ -0,0 +1,1240 @@
  7230. +/*
  7231. + * hwlat_detector.c - A simple Hardware Latency detector.
  7232. + *
  7233. + * Use this module to detect large system latencies induced by the behavior of
  7234. + * certain underlying system hardware or firmware, independent of Linux itself.
  7235. + * The code was developed originally to detect the presence of SMIs on Intel
  7236. + * and AMD systems, although there is no dependency upon x86 herein.
  7237. + *
  7238. + * The classical example usage of this module is in detecting the presence of
  7239. + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
  7240. + * somewhat special form of hardware interrupt spawned from earlier CPU debug
  7241. + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
  7242. + * LPC (or other device) to generate a special interrupt under certain
  7243. + * circumstances, for example, upon expiration of a special SMI timer device,
  7244. + * due to certain external thermal readings, on certain I/O address accesses,
  7245. + * and other situations. An SMI hits a special CPU pin, triggers a special
  7246. + * SMI mode (complete with special memory map), and the OS is unaware.
  7247. + *
  7248. + * Although certain hardware-inducing latencies are necessary (for example,
  7249. + * a modern system often requires an SMI handler for correct thermal control
  7250. + * and remote management) they can wreak havoc upon any OS-level performance
  7251. + * guarantees toward low-latency, especially when the OS is not even made
  7252. + * aware of the presence of these interrupts. For this reason, we need a
  7253. + * somewhat brute force mechanism to detect these interrupts. In this case,
  7254. + * we do it by hogging all of the CPU(s) for configurable timer intervals,
  7255. + * sampling the built-in CPU timer, looking for discontiguous readings.
  7256. + *
  7257. + * WARNING: This implementation necessarily introduces latencies. Therefore,
  7258. + * you should NEVER use this module in a production environment
  7259. + * requiring any kind of low-latency performance guarantee(s).
  7260. + *
  7261. + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
  7262. + *
  7263. + * Includes useful feedback from Clark Williams <clark@redhat.com>
  7264. + *
  7265. + * This file is licensed under the terms of the GNU General Public
  7266. + * License version 2. This program is licensed "as is" without any
  7267. + * warranty of any kind, whether express or implied.
  7268. + */
  7269. +
  7270. +#include <linux/module.h>
  7271. +#include <linux/init.h>
  7272. +#include <linux/ring_buffer.h>
  7273. +#include <linux/time.h>
  7274. +#include <linux/hrtimer.h>
  7275. +#include <linux/kthread.h>
  7276. +#include <linux/debugfs.h>
  7277. +#include <linux/seq_file.h>
  7278. +#include <linux/uaccess.h>
  7279. +#include <linux/version.h>
  7280. +#include <linux/delay.h>
  7281. +#include <linux/slab.h>
  7282. +#include <linux/trace_clock.h>
  7283. +
  7284. +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */
  7285. +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */
  7286. +#define U64STR_SIZE 22 /* 20 digits max */
  7287. +
  7288. +#define VERSION "1.0.0"
  7289. +#define BANNER "hwlat_detector: "
  7290. +#define DRVNAME "hwlat_detector"
  7291. +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */
  7292. +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
  7293. +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */
  7294. +
  7295. +/* Module metadata */
  7296. +
  7297. +MODULE_LICENSE("GPL");
  7298. +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
  7299. +MODULE_DESCRIPTION("A simple hardware latency detector");
  7300. +MODULE_VERSION(VERSION);
  7301. +
  7302. +/* Module parameters */
  7303. +
  7304. +static int debug;
  7305. +static int enabled;
  7306. +static int threshold;
  7307. +
  7308. +module_param(debug, int, 0); /* enable debug */
  7309. +module_param(enabled, int, 0); /* enable detector */
  7310. +module_param(threshold, int, 0); /* latency threshold */
  7311. +
  7312. +/* Buffering and sampling */
  7313. +
  7314. +static struct ring_buffer *ring_buffer; /* sample buffer */
  7315. +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */
  7316. +static unsigned long buf_size = BUF_SIZE_DEFAULT;
  7317. +static struct task_struct *kthread; /* sampling thread */
  7318. +
  7319. +/* DebugFS filesystem entries */
  7320. +
  7321. +static struct dentry *debug_dir; /* debugfs directory */
  7322. +static struct dentry *debug_max; /* maximum TSC delta */
  7323. +static struct dentry *debug_count; /* total detect count */
  7324. +static struct dentry *debug_sample_width; /* sample width us */
  7325. +static struct dentry *debug_sample_window; /* sample window us */
  7326. +static struct dentry *debug_sample; /* raw samples us */
  7327. +static struct dentry *debug_threshold; /* threshold us */
  7328. +static struct dentry *debug_enable; /* enable/disable */
  7329. +
  7330. +/* Individual samples and global state */
  7331. +
  7332. +struct sample; /* latency sample */
  7333. +struct data; /* Global state */
  7334. +
  7335. +/* Sampling functions */
  7336. +static int __buffer_add_sample(struct sample *sample);
  7337. +static struct sample *buffer_get_sample(struct sample *sample);
  7338. +
  7339. +/* Threading and state */
  7340. +static int kthread_fn(void *unused);
  7341. +static int start_kthread(void);
  7342. +static int stop_kthread(void);
  7343. +static void __reset_stats(void);
  7344. +static int init_stats(void);
  7345. +
  7346. +/* Debugfs interface */
  7347. +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
  7348. + size_t cnt, loff_t *ppos, const u64 *entry);
  7349. +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
  7350. + size_t cnt, loff_t *ppos, u64 *entry);
  7351. +static int debug_sample_fopen(struct inode *inode, struct file *filp);
  7352. +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
  7353. + size_t cnt, loff_t *ppos);
  7354. +static int debug_sample_release(struct inode *inode, struct file *filp);
  7355. +static int debug_enable_fopen(struct inode *inode, struct file *filp);
  7356. +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
  7357. + size_t cnt, loff_t *ppos);
  7358. +static ssize_t debug_enable_fwrite(struct file *file,
  7359. + const char __user *user_buffer,
  7360. + size_t user_size, loff_t *offset);
  7361. +
  7362. +/* Initialization functions */
  7363. +static int init_debugfs(void);
  7364. +static void free_debugfs(void);
  7365. +static int detector_init(void);
  7366. +static void detector_exit(void);
  7367. +
  7368. +/* Individual latency samples are stored here when detected and packed into
  7369. + * the ring_buffer circular buffer, where they are overwritten when
  7370. + * more than buf_size/sizeof(sample) samples are received. */
  7371. +struct sample {
  7372. + u64 seqnum; /* unique sequence */
  7373. + u64 duration; /* ktime delta */
  7374. + u64 outer_duration; /* ktime delta (outer loop) */
  7375. + struct timespec timestamp; /* wall time */
  7376. + unsigned long lost;
  7377. +};
  7378. +
  7379. +/* keep the global state somewhere. */
  7380. +static struct data {
  7381. +
  7382. + struct mutex lock; /* protect changes */
  7383. +
  7384. + u64 count; /* total since reset */
  7385. + u64 max_sample; /* max hardware latency */
  7386. + u64 threshold; /* sample threshold level */
  7387. +
  7388. + u64 sample_window; /* total sampling window (on+off) */
  7389. + u64 sample_width; /* active sampling portion of window */
  7390. +
  7391. + atomic_t sample_open; /* whether the sample file is open */
  7392. +
  7393. + wait_queue_head_t wq; /* waitqeue for new sample values */
  7394. +
  7395. +} data;
  7396. +
  7397. +/**
  7398. + * __buffer_add_sample - add a new latency sample recording to the ring buffer
  7399. + * @sample: The new latency sample value
  7400. + *
  7401. + * This receives a new latency sample and records it in a global ring buffer.
  7402. + * No additional locking is used in this case.
  7403. + */
  7404. +static int __buffer_add_sample(struct sample *sample)
  7405. +{
  7406. + return ring_buffer_write(ring_buffer,
  7407. + sizeof(struct sample), sample);
  7408. +}
  7409. +
  7410. +/**
  7411. + * buffer_get_sample - remove a hardware latency sample from the ring buffer
  7412. + * @sample: Pre-allocated storage for the sample
  7413. + *
  7414. + * This retrieves a hardware latency sample from the global circular buffer
  7415. + */
  7416. +static struct sample *buffer_get_sample(struct sample *sample)
  7417. +{
  7418. + struct ring_buffer_event *e = NULL;
  7419. + struct sample *s = NULL;
  7420. + unsigned int cpu = 0;
  7421. +
  7422. + if (!sample)
  7423. + return NULL;
  7424. +
  7425. + mutex_lock(&ring_buffer_mutex);
  7426. + for_each_online_cpu(cpu) {
  7427. + e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost);
  7428. + if (e)
  7429. + break;
  7430. + }
  7431. +
  7432. + if (e) {
  7433. + s = ring_buffer_event_data(e);
  7434. + memcpy(sample, s, sizeof(struct sample));
  7435. + } else
  7436. + sample = NULL;
  7437. + mutex_unlock(&ring_buffer_mutex);
  7438. +
  7439. + return sample;
  7440. +}
  7441. +
  7442. +#ifndef CONFIG_TRACING
  7443. +#define time_type ktime_t
  7444. +#define time_get() ktime_get()
  7445. +#define time_to_us(x) ktime_to_us(x)
  7446. +#define time_sub(a, b) ktime_sub(a, b)
  7447. +#define init_time(a, b) (a).tv64 = b
  7448. +#define time_u64(a) ((a).tv64)
  7449. +#else
  7450. +#define time_type u64
  7451. +#define time_get() trace_clock_local()
  7452. +#define time_to_us(x) div_u64(x, 1000)
  7453. +#define time_sub(a, b) ((a) - (b))
  7454. +#define init_time(a, b) (a = b)
  7455. +#define time_u64(a) a
  7456. +#endif
  7457. +/**
  7458. + * get_sample - sample the CPU TSC and look for likely hardware latencies
  7459. + *
  7460. + * Used to repeatedly capture the CPU TSC (or similar), looking for potential
  7461. + * hardware-induced latency. Called with interrupts disabled and with
  7462. + * data.lock held.
  7463. + */
  7464. +static int get_sample(void)
  7465. +{
  7466. + time_type start, t1, t2, last_t2;
  7467. + s64 diff, total = 0;
  7468. + u64 sample = 0;
  7469. + u64 outer_sample = 0;
  7470. + int ret = -1;
  7471. +
  7472. + init_time(last_t2, 0);
  7473. + start = time_get(); /* start timestamp */
  7474. +
  7475. + do {
  7476. +
  7477. + t1 = time_get(); /* we'll look for a discontinuity */
  7478. + t2 = time_get();
  7479. +
  7480. + if (time_u64(last_t2)) {
  7481. + /* Check the delta from outer loop (t2 to next t1) */
  7482. + diff = time_to_us(time_sub(t1, last_t2));
  7483. + /* This shouldn't happen */
  7484. + if (diff < 0) {
  7485. + pr_err(BANNER "time running backwards\n");
  7486. + goto out;
  7487. + }
  7488. + if (diff > outer_sample)
  7489. + outer_sample = diff;
  7490. + }
  7491. + last_t2 = t2;
  7492. +
  7493. + total = time_to_us(time_sub(t2, start)); /* sample width */
  7494. +
  7495. + /* This checks the inner loop (t1 to t2) */
  7496. + diff = time_to_us(time_sub(t2, t1)); /* current diff */
  7497. +
  7498. + /* This shouldn't happen */
  7499. + if (diff < 0) {
  7500. + pr_err(BANNER "time running backwards\n");
  7501. + goto out;
  7502. + }
  7503. +
  7504. + if (diff > sample)
  7505. + sample = diff; /* only want highest value */
  7506. +
  7507. + } while (total <= data.sample_width);
  7508. +
  7509. + ret = 0;
  7510. +
  7511. + /* If we exceed the threshold value, we have found a hardware latency */
  7512. + if (sample > data.threshold || outer_sample > data.threshold) {
  7513. + struct sample s;
  7514. +
  7515. + ret = 1;
  7516. +
  7517. + data.count++;
  7518. + s.seqnum = data.count;
  7519. + s.duration = sample;
  7520. + s.outer_duration = outer_sample;
  7521. + s.timestamp = CURRENT_TIME;
  7522. + __buffer_add_sample(&s);
  7523. +
  7524. + /* Keep a running maximum ever recorded hardware latency */
  7525. + if (sample > data.max_sample)
  7526. + data.max_sample = sample;
  7527. + }
  7528. +
  7529. +out:
  7530. + return ret;
  7531. +}
  7532. +
  7533. +/*
  7534. + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
  7535. + * @unused: A required part of the kthread API.
  7536. + *
  7537. + * Used to periodically sample the CPU TSC via a call to get_sample. We
  7538. + * disable interrupts, which does (intentionally) introduce latency since we
  7539. + * need to ensure nothing else might be running (and thus pre-empting).
  7540. + * Obviously this should never be used in production environments.
  7541. + *
  7542. + * Currently this runs on which ever CPU it was scheduled on, but most
  7543. + * real-worald hardware latency situations occur across several CPUs,
  7544. + * but we might later generalize this if we find there are any actualy
  7545. + * systems with alternate SMI delivery or other hardware latencies.
  7546. + */
  7547. +static int kthread_fn(void *unused)
  7548. +{
  7549. + int ret;
  7550. + u64 interval;
  7551. +
  7552. + while (!kthread_should_stop()) {
  7553. +
  7554. + mutex_lock(&data.lock);
  7555. +
  7556. + local_irq_disable();
  7557. + ret = get_sample();
  7558. + local_irq_enable();
  7559. +
  7560. + if (ret > 0)
  7561. + wake_up(&data.wq); /* wake up reader(s) */
  7562. +
  7563. + interval = data.sample_window - data.sample_width;
  7564. + do_div(interval, USEC_PER_MSEC); /* modifies interval value */
  7565. +
  7566. + mutex_unlock(&data.lock);
  7567. +
  7568. + if (msleep_interruptible(interval))
  7569. + break;
  7570. + }
  7571. +
  7572. + return 0;
  7573. +}
  7574. +
  7575. +/**
  7576. + * start_kthread - Kick off the hardware latency sampling/detector kthread
  7577. + *
  7578. + * This starts a kernel thread that will sit and sample the CPU timestamp
  7579. + * counter (TSC or similar) and look for potential hardware latencies.
  7580. + */
  7581. +static int start_kthread(void)
  7582. +{
  7583. + kthread = kthread_run(kthread_fn, NULL,
  7584. + DRVNAME);
  7585. + if (IS_ERR(kthread)) {
  7586. + pr_err(BANNER "could not start sampling thread\n");
  7587. + enabled = 0;
  7588. + return -ENOMEM;
  7589. + }
  7590. +
  7591. + return 0;
  7592. +}
  7593. +
  7594. +/**
  7595. + * stop_kthread - Inform the hardware latency samping/detector kthread to stop
  7596. + *
  7597. + * This kicks the running hardware latency sampling/detector kernel thread and
  7598. + * tells it to stop sampling now. Use this on unload and at system shutdown.
  7599. + */
  7600. +static int stop_kthread(void)
  7601. +{
  7602. + int ret;
  7603. +
  7604. + ret = kthread_stop(kthread);
  7605. +
  7606. + return ret;
  7607. +}
  7608. +
  7609. +/**
  7610. + * __reset_stats - Reset statistics for the hardware latency detector
  7611. + *
  7612. + * We use data to store various statistics and global state. We call this
  7613. + * function in order to reset those when "enable" is toggled on or off, and
  7614. + * also at initialization. Should be called with data.lock held.
  7615. + */
  7616. +static void __reset_stats(void)
  7617. +{
  7618. + data.count = 0;
  7619. + data.max_sample = 0;
  7620. + ring_buffer_reset(ring_buffer); /* flush out old sample entries */
  7621. +}
  7622. +
  7623. +/**
  7624. + * init_stats - Setup global state statistics for the hardware latency detector
  7625. + *
  7626. + * We use data to store various statistics and global state. We also use
  7627. + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
  7628. + * induced system latencies. This function initializes these structures and
  7629. + * allocates the global ring buffer also.
  7630. + */
  7631. +static int init_stats(void)
  7632. +{
  7633. + int ret = -ENOMEM;
  7634. +
  7635. + mutex_init(&data.lock);
  7636. + init_waitqueue_head(&data.wq);
  7637. + atomic_set(&data.sample_open, 0);
  7638. +
  7639. + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
  7640. +
  7641. + if (WARN(!ring_buffer, KERN_ERR BANNER
  7642. + "failed to allocate ring buffer!\n"))
  7643. + goto out;
  7644. +
  7645. + __reset_stats();
  7646. + data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */
  7647. + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
  7648. + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */
  7649. +
  7650. + ret = 0;
  7651. +
  7652. +out:
  7653. + return ret;
  7654. +
  7655. +}
  7656. +
  7657. +/*
  7658. + * simple_data_read - Wrapper read function for global state debugfs entries
  7659. + * @filp: The active open file structure for the debugfs "file"
  7660. + * @ubuf: The userspace provided buffer to read value into
  7661. + * @cnt: The maximum number of bytes to read
  7662. + * @ppos: The current "file" position
  7663. + * @entry: The entry to read from
  7664. + *
  7665. + * This function provides a generic read implementation for the global state
  7666. + * "data" structure debugfs filesystem entries. It would be nice to use
  7667. + * simple_attr_read directly, but we need to make sure that the data.lock
  7668. + * is held during the actual read.
  7669. + */
  7670. +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
  7671. + size_t cnt, loff_t *ppos, const u64 *entry)
  7672. +{
  7673. + char buf[U64STR_SIZE];
  7674. + u64 val = 0;
  7675. + int len = 0;
  7676. +
  7677. + memset(buf, 0, sizeof(buf));
  7678. +
  7679. + if (!entry)
  7680. + return -EFAULT;
  7681. +
  7682. + mutex_lock(&data.lock);
  7683. + val = *entry;
  7684. + mutex_unlock(&data.lock);
  7685. +
  7686. + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
  7687. +
  7688. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
  7689. +
  7690. +}
  7691. +
  7692. +/*
  7693. + * simple_data_write - Wrapper write function for global state debugfs entries
  7694. + * @filp: The active open file structure for the debugfs "file"
  7695. + * @ubuf: The userspace provided buffer to write value from
  7696. + * @cnt: The maximum number of bytes to write
  7697. + * @ppos: The current "file" position
  7698. + * @entry: The entry to write to
  7699. + *
  7700. + * This function provides a generic write implementation for the global state
  7701. + * "data" structure debugfs filesystem entries. It would be nice to use
  7702. + * simple_attr_write directly, but we need to make sure that the data.lock
  7703. + * is held during the actual write.
  7704. + */
  7705. +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
  7706. + size_t cnt, loff_t *ppos, u64 *entry)
  7707. +{
  7708. + char buf[U64STR_SIZE];
  7709. + int csize = min(cnt, sizeof(buf));
  7710. + u64 val = 0;
  7711. + int err = 0;
  7712. +
  7713. + memset(buf, '\0', sizeof(buf));
  7714. + if (copy_from_user(buf, ubuf, csize))
  7715. + return -EFAULT;
  7716. +
  7717. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  7718. + err = kstrtoull(buf, 10, &val);
  7719. + if (err)
  7720. + return -EINVAL;
  7721. +
  7722. + mutex_lock(&data.lock);
  7723. + *entry = val;
  7724. + mutex_unlock(&data.lock);
  7725. +
  7726. + return csize;
  7727. +}
  7728. +
  7729. +/**
  7730. + * debug_count_fopen - Open function for "count" debugfs entry
  7731. + * @inode: The in-kernel inode representation of the debugfs "file"
  7732. + * @filp: The active open file structure for the debugfs "file"
  7733. + *
  7734. + * This function provides an open implementation for the "count" debugfs
  7735. + * interface to the hardware latency detector.
  7736. + */
  7737. +static int debug_count_fopen(struct inode *inode, struct file *filp)
  7738. +{
  7739. + return 0;
  7740. +}
  7741. +
  7742. +/**
  7743. + * debug_count_fread - Read function for "count" debugfs entry
  7744. + * @filp: The active open file structure for the debugfs "file"
  7745. + * @ubuf: The userspace provided buffer to read value into
  7746. + * @cnt: The maximum number of bytes to read
  7747. + * @ppos: The current "file" position
  7748. + *
  7749. + * This function provides a read implementation for the "count" debugfs
  7750. + * interface to the hardware latency detector. Can be used to read the
  7751. + * number of latency readings exceeding the configured threshold since
  7752. + * the detector was last reset (e.g. by writing a zero into "count").
  7753. + */
  7754. +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
  7755. + size_t cnt, loff_t *ppos)
  7756. +{
  7757. + return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
  7758. +}
  7759. +
  7760. +/**
  7761. + * debug_count_fwrite - Write function for "count" debugfs entry
  7762. + * @filp: The active open file structure for the debugfs "file"
  7763. + * @ubuf: The user buffer that contains the value to write
  7764. + * @cnt: The maximum number of bytes to write to "file"
  7765. + * @ppos: The current position in the debugfs "file"
  7766. + *
  7767. + * This function provides a write implementation for the "count" debugfs
  7768. + * interface to the hardware latency detector. Can be used to write a
  7769. + * desired value, especially to zero the total count.
  7770. + */
  7771. +static ssize_t debug_count_fwrite(struct file *filp,
  7772. + const char __user *ubuf,
  7773. + size_t cnt,
  7774. + loff_t *ppos)
  7775. +{
  7776. + return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
  7777. +}
  7778. +
  7779. +/**
  7780. + * debug_enable_fopen - Dummy open function for "enable" debugfs interface
  7781. + * @inode: The in-kernel inode representation of the debugfs "file"
  7782. + * @filp: The active open file structure for the debugfs "file"
  7783. + *
  7784. + * This function provides an open implementation for the "enable" debugfs
  7785. + * interface to the hardware latency detector.
  7786. + */
  7787. +static int debug_enable_fopen(struct inode *inode, struct file *filp)
  7788. +{
  7789. + return 0;
  7790. +}
  7791. +
  7792. +/**
  7793. + * debug_enable_fread - Read function for "enable" debugfs interface
  7794. + * @filp: The active open file structure for the debugfs "file"
  7795. + * @ubuf: The userspace provided buffer to read value into
  7796. + * @cnt: The maximum number of bytes to read
  7797. + * @ppos: The current "file" position
  7798. + *
  7799. + * This function provides a read implementation for the "enable" debugfs
  7800. + * interface to the hardware latency detector. Can be used to determine
  7801. + * whether the detector is currently enabled ("0\n" or "1\n" returned).
  7802. + */
  7803. +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
  7804. + size_t cnt, loff_t *ppos)
  7805. +{
  7806. + char buf[4];
  7807. +
  7808. + if ((cnt < sizeof(buf)) || (*ppos))
  7809. + return 0;
  7810. +
  7811. + buf[0] = enabled ? '1' : '0';
  7812. + buf[1] = '\n';
  7813. + buf[2] = '\0';
  7814. + if (copy_to_user(ubuf, buf, strlen(buf)))
  7815. + return -EFAULT;
  7816. + return *ppos = strlen(buf);
  7817. +}
  7818. +
  7819. +/**
  7820. + * debug_enable_fwrite - Write function for "enable" debugfs interface
  7821. + * @filp: The active open file structure for the debugfs "file"
  7822. + * @ubuf: The user buffer that contains the value to write
  7823. + * @cnt: The maximum number of bytes to write to "file"
  7824. + * @ppos: The current position in the debugfs "file"
  7825. + *
  7826. + * This function provides a write implementation for the "enable" debugfs
  7827. + * interface to the hardware latency detector. Can be used to enable or
  7828. + * disable the detector, which will have the side-effect of possibly
  7829. + * also resetting the global stats and kicking off the measuring
  7830. + * kthread (on an enable) or the converse (upon a disable).
  7831. + */
  7832. +static ssize_t debug_enable_fwrite(struct file *filp,
  7833. + const char __user *ubuf,
  7834. + size_t cnt,
  7835. + loff_t *ppos)
  7836. +{
  7837. + char buf[4];
  7838. + int csize = min(cnt, sizeof(buf));
  7839. + long val = 0;
  7840. + int err = 0;
  7841. +
  7842. + memset(buf, '\0', sizeof(buf));
  7843. + if (copy_from_user(buf, ubuf, csize))
  7844. + return -EFAULT;
  7845. +
  7846. + buf[sizeof(buf)-1] = '\0'; /* just in case */
  7847. + err = kstrtoul(buf, 10, &val);
  7848. + if (0 != err)
  7849. + return -EINVAL;
  7850. +
  7851. + if (val) {
  7852. + if (enabled)
  7853. + goto unlock;
  7854. + enabled = 1;
  7855. + __reset_stats();
  7856. + if (start_kthread())
  7857. + return -EFAULT;
  7858. + } else {
  7859. + if (!enabled)
  7860. + goto unlock;
  7861. + enabled = 0;
  7862. + err = stop_kthread();
  7863. + if (err) {
  7864. + pr_err(BANNER "cannot stop kthread\n");
  7865. + return -EFAULT;
  7866. + }
  7867. + wake_up(&data.wq); /* reader(s) should return */
  7868. + }
  7869. +unlock:
  7870. + return csize;
  7871. +}
  7872. +
  7873. +/**
  7874. + * debug_max_fopen - Open function for "max" debugfs entry
  7875. + * @inode: The in-kernel inode representation of the debugfs "file"
  7876. + * @filp: The active open file structure for the debugfs "file"
  7877. + *
  7878. + * This function provides an open implementation for the "max" debugfs
  7879. + * interface to the hardware latency detector.
  7880. + */
  7881. +static int debug_max_fopen(struct inode *inode, struct file *filp)
  7882. +{
  7883. + return 0;
  7884. +}
  7885. +
  7886. +/**
  7887. + * debug_max_fread - Read function for "max" debugfs entry
  7888. + * @filp: The active open file structure for the debugfs "file"
  7889. + * @ubuf: The userspace provided buffer to read value into
  7890. + * @cnt: The maximum number of bytes to read
  7891. + * @ppos: The current "file" position
  7892. + *
  7893. + * This function provides a read implementation for the "max" debugfs
  7894. + * interface to the hardware latency detector. Can be used to determine
  7895. + * the maximum latency value observed since it was last reset.
  7896. + */
  7897. +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
  7898. + size_t cnt, loff_t *ppos)
  7899. +{
  7900. + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
  7901. +}
  7902. +
  7903. +/**
  7904. + * debug_max_fwrite - Write function for "max" debugfs entry
  7905. + * @filp: The active open file structure for the debugfs "file"
  7906. + * @ubuf: The user buffer that contains the value to write
  7907. + * @cnt: The maximum number of bytes to write to "file"
  7908. + * @ppos: The current position in the debugfs "file"
  7909. + *
  7910. + * This function provides a write implementation for the "max" debugfs
  7911. + * interface to the hardware latency detector. Can be used to reset the
  7912. + * maximum or set it to some other desired value - if, then, subsequent
  7913. + * measurements exceed this value, the maximum will be updated.
  7914. + */
  7915. +static ssize_t debug_max_fwrite(struct file *filp,
  7916. + const char __user *ubuf,
  7917. + size_t cnt,
  7918. + loff_t *ppos)
  7919. +{
  7920. + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
  7921. +}
  7922. +
  7923. +
  7924. +/**
  7925. + * debug_sample_fopen - An open function for "sample" debugfs interface
  7926. + * @inode: The in-kernel inode representation of this debugfs "file"
  7927. + * @filp: The active open file structure for the debugfs "file"
  7928. + *
  7929. + * This function handles opening the "sample" file within the hardware
  7930. + * latency detector debugfs directory interface. This file is used to read
  7931. + * raw samples from the global ring_buffer and allows the user to see a
  7932. + * running latency history. Can be opened blocking or non-blocking,
  7933. + * affecting whether it behaves as a buffer read pipe, or does not.
  7934. + * Implements simple locking to prevent multiple simultaneous use.
  7935. + */
  7936. +static int debug_sample_fopen(struct inode *inode, struct file *filp)
  7937. +{
  7938. + if (!atomic_add_unless(&data.sample_open, 1, 1))
  7939. + return -EBUSY;
  7940. + else
  7941. + return 0;
  7942. +}
  7943. +
  7944. +/**
  7945. + * debug_sample_fread - A read function for "sample" debugfs interface
  7946. + * @filp: The active open file structure for the debugfs "file"
  7947. + * @ubuf: The user buffer that will contain the samples read
  7948. + * @cnt: The maximum bytes to read from the debugfs "file"
  7949. + * @ppos: The current position in the debugfs "file"
  7950. + *
  7951. + * This function handles reading from the "sample" file within the hardware
  7952. + * latency detector debugfs directory interface. This file is used to read
  7953. + * raw samples from the global ring_buffer and allows the user to see a
  7954. + * running latency history. By default this will block pending a new
  7955. + * value written into the sample buffer, unless there are already a
  7956. + * number of value(s) waiting in the buffer, or the sample file was
  7957. + * previously opened in a non-blocking mode of operation.
  7958. + */
  7959. +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
  7960. + size_t cnt, loff_t *ppos)
  7961. +{
  7962. + int len = 0;
  7963. + char buf[64];
  7964. + struct sample *sample = NULL;
  7965. +
  7966. + if (!enabled)
  7967. + return 0;
  7968. +
  7969. + sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
  7970. + if (!sample)
  7971. + return -ENOMEM;
  7972. +
  7973. + while (!buffer_get_sample(sample)) {
  7974. +
  7975. + DEFINE_WAIT(wait);
  7976. +
  7977. + if (filp->f_flags & O_NONBLOCK) {
  7978. + len = -EAGAIN;
  7979. + goto out;
  7980. + }
  7981. +
  7982. + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
  7983. + schedule();
  7984. + finish_wait(&data.wq, &wait);
  7985. +
  7986. + if (signal_pending(current)) {
  7987. + len = -EINTR;
  7988. + goto out;
  7989. + }
  7990. +
  7991. + if (!enabled) { /* enable was toggled */
  7992. + len = 0;
  7993. + goto out;
  7994. + }
  7995. + }
  7996. +
  7997. + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n",
  7998. + sample->timestamp.tv_sec,
  7999. + sample->timestamp.tv_nsec,
  8000. + sample->duration,
  8001. + sample->outer_duration);
  8002. +
  8003. +
  8004. + /* handling partial reads is more trouble than it's worth */
  8005. + if (len > cnt)
  8006. + goto out;
  8007. +
  8008. + if (copy_to_user(ubuf, buf, len))
  8009. + len = -EFAULT;
  8010. +
  8011. +out:
  8012. + kfree(sample);
  8013. + return len;
  8014. +}
  8015. +
  8016. +/**
  8017. + * debug_sample_release - Release function for "sample" debugfs interface
  8018. + * @inode: The in-kernel inode represenation of the debugfs "file"
  8019. + * @filp: The active open file structure for the debugfs "file"
  8020. + *
  8021. + * This function completes the close of the debugfs interface "sample" file.
  8022. + * Frees the sample_open "lock" so that other users may open the interface.
  8023. + */
  8024. +static int debug_sample_release(struct inode *inode, struct file *filp)
  8025. +{
  8026. + atomic_dec(&data.sample_open);
  8027. +
  8028. + return 0;
  8029. +}
  8030. +
  8031. +/**
  8032. + * debug_threshold_fopen - Open function for "threshold" debugfs entry
  8033. + * @inode: The in-kernel inode representation of the debugfs "file"
  8034. + * @filp: The active open file structure for the debugfs "file"
  8035. + *
  8036. + * This function provides an open implementation for the "threshold" debugfs
  8037. + * interface to the hardware latency detector.
  8038. + */
  8039. +static int debug_threshold_fopen(struct inode *inode, struct file *filp)
  8040. +{
  8041. + return 0;
  8042. +}
  8043. +
  8044. +/**
  8045. + * debug_threshold_fread - Read function for "threshold" debugfs entry
  8046. + * @filp: The active open file structure for the debugfs "file"
  8047. + * @ubuf: The userspace provided buffer to read value into
  8048. + * @cnt: The maximum number of bytes to read
  8049. + * @ppos: The current "file" position
  8050. + *
  8051. + * This function provides a read implementation for the "threshold" debugfs
  8052. + * interface to the hardware latency detector. It can be used to determine
  8053. + * the current threshold level at which a latency will be recorded in the
  8054. + * global ring buffer, typically on the order of 10us.
  8055. + */
  8056. +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
  8057. + size_t cnt, loff_t *ppos)
  8058. +{
  8059. + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
  8060. +}
  8061. +
  8062. +/**
  8063. + * debug_threshold_fwrite - Write function for "threshold" debugfs entry
  8064. + * @filp: The active open file structure for the debugfs "file"
  8065. + * @ubuf: The user buffer that contains the value to write
  8066. + * @cnt: The maximum number of bytes to write to "file"
  8067. + * @ppos: The current position in the debugfs "file"
  8068. + *
  8069. + * This function provides a write implementation for the "threshold" debugfs
  8070. + * interface to the hardware latency detector. It can be used to configure
  8071. + * the threshold level at which any subsequently detected latencies will
  8072. + * be recorded into the global ring buffer.
  8073. + */
  8074. +static ssize_t debug_threshold_fwrite(struct file *filp,
  8075. + const char __user *ubuf,
  8076. + size_t cnt,
  8077. + loff_t *ppos)
  8078. +{
  8079. + int ret;
  8080. +
  8081. + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
  8082. +
  8083. + if (enabled)
  8084. + wake_up_process(kthread);
  8085. +
  8086. + return ret;
  8087. +}
  8088. +
  8089. +/**
  8090. + * debug_width_fopen - Open function for "width" debugfs entry
  8091. + * @inode: The in-kernel inode representation of the debugfs "file"
  8092. + * @filp: The active open file structure for the debugfs "file"
  8093. + *
  8094. + * This function provides an open implementation for the "width" debugfs
  8095. + * interface to the hardware latency detector.
  8096. + */
  8097. +static int debug_width_fopen(struct inode *inode, struct file *filp)
  8098. +{
  8099. + return 0;
  8100. +}
  8101. +
  8102. +/**
  8103. + * debug_width_fread - Read function for "width" debugfs entry
  8104. + * @filp: The active open file structure for the debugfs "file"
  8105. + * @ubuf: The userspace provided buffer to read value into
  8106. + * @cnt: The maximum number of bytes to read
  8107. + * @ppos: The current "file" position
  8108. + *
  8109. + * This function provides a read implementation for the "width" debugfs
  8110. + * interface to the hardware latency detector. It can be used to determine
  8111. + * for how many us of the total window us we will actively sample for any
  8112. + * hardware-induced latecy periods. Obviously, it is not possible to
  8113. + * sample constantly and have the system respond to a sample reader, or,
  8114. + * worse, without having the system appear to have gone out to lunch.
  8115. + */
  8116. +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
  8117. + size_t cnt, loff_t *ppos)
  8118. +{
  8119. + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
  8120. +}
  8121. +
  8122. +/**
  8123. + * debug_width_fwrite - Write function for "width" debugfs entry
  8124. + * @filp: The active open file structure for the debugfs "file"
  8125. + * @ubuf: The user buffer that contains the value to write
  8126. + * @cnt: The maximum number of bytes to write to "file"
  8127. + * @ppos: The current position in the debugfs "file"
  8128. + *
  8129. + * This function provides a write implementation for the "width" debugfs
  8130. + * interface to the hardware latency detector. It can be used to configure
  8131. + * for how many us of the total window us we will actively sample for any
  8132. + * hardware-induced latency periods. Obviously, it is not possible to
  8133. + * sample constantly and have the system respond to a sample reader, or,
  8134. + * worse, without having the system appear to have gone out to lunch. It
  8135. + * is enforced that width is less that the total window size.
  8136. + */
  8137. +static ssize_t debug_width_fwrite(struct file *filp,
  8138. + const char __user *ubuf,
  8139. + size_t cnt,
  8140. + loff_t *ppos)
  8141. +{
  8142. + char buf[U64STR_SIZE];
  8143. + int csize = min(cnt, sizeof(buf));
  8144. + u64 val = 0;
  8145. + int err = 0;
  8146. +
  8147. + memset(buf, '\0', sizeof(buf));
  8148. + if (copy_from_user(buf, ubuf, csize))
  8149. + return -EFAULT;
  8150. +
  8151. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  8152. + err = kstrtoull(buf, 10, &val);
  8153. + if (0 != err)
  8154. + return -EINVAL;
  8155. +
  8156. + mutex_lock(&data.lock);
  8157. + if (val < data.sample_window)
  8158. + data.sample_width = val;
  8159. + else {
  8160. + mutex_unlock(&data.lock);
  8161. + return -EINVAL;
  8162. + }
  8163. + mutex_unlock(&data.lock);
  8164. +
  8165. + if (enabled)
  8166. + wake_up_process(kthread);
  8167. +
  8168. + return csize;
  8169. +}
  8170. +
  8171. +/**
  8172. + * debug_window_fopen - Open function for "window" debugfs entry
  8173. + * @inode: The in-kernel inode representation of the debugfs "file"
  8174. + * @filp: The active open file structure for the debugfs "file"
  8175. + *
  8176. + * This function provides an open implementation for the "window" debugfs
  8177. + * interface to the hardware latency detector. The window is the total time
  8178. + * in us that will be considered one sample period. Conceptually, windows
  8179. + * occur back-to-back and contain a sample width period during which
  8180. + * actual sampling occurs.
  8181. + */
  8182. +static int debug_window_fopen(struct inode *inode, struct file *filp)
  8183. +{
  8184. + return 0;
  8185. +}
  8186. +
  8187. +/**
  8188. + * debug_window_fread - Read function for "window" debugfs entry
  8189. + * @filp: The active open file structure for the debugfs "file"
  8190. + * @ubuf: The userspace provided buffer to read value into
  8191. + * @cnt: The maximum number of bytes to read
  8192. + * @ppos: The current "file" position
  8193. + *
  8194. + * This function provides a read implementation for the "window" debugfs
  8195. + * interface to the hardware latency detector. The window is the total time
  8196. + * in us that will be considered one sample period. Conceptually, windows
  8197. + * occur back-to-back and contain a sample width period during which
  8198. + * actual sampling occurs. Can be used to read the total window size.
  8199. + */
  8200. +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
  8201. + size_t cnt, loff_t *ppos)
  8202. +{
  8203. + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
  8204. +}
  8205. +
  8206. +/**
  8207. + * debug_window_fwrite - Write function for "window" debugfs entry
  8208. + * @filp: The active open file structure for the debugfs "file"
  8209. + * @ubuf: The user buffer that contains the value to write
  8210. + * @cnt: The maximum number of bytes to write to "file"
  8211. + * @ppos: The current position in the debugfs "file"
  8212. + *
  8213. + * This function provides a write implementation for the "window" debufds
  8214. + * interface to the hardware latency detetector. The window is the total time
  8215. + * in us that will be considered one sample period. Conceptually, windows
  8216. + * occur back-to-back and contain a sample width period during which
  8217. + * actual sampling occurs. Can be used to write a new total window size. It
  8218. + * is enfoced that any value written must be greater than the sample width
  8219. + * size, or an error results.
  8220. + */
  8221. +static ssize_t debug_window_fwrite(struct file *filp,
  8222. + const char __user *ubuf,
  8223. + size_t cnt,
  8224. + loff_t *ppos)
  8225. +{
  8226. + char buf[U64STR_SIZE];
  8227. + int csize = min(cnt, sizeof(buf));
  8228. + u64 val = 0;
  8229. + int err = 0;
  8230. +
  8231. + memset(buf, '\0', sizeof(buf));
  8232. + if (copy_from_user(buf, ubuf, csize))
  8233. + return -EFAULT;
  8234. +
  8235. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  8236. + err = kstrtoull(buf, 10, &val);
  8237. + if (0 != err)
  8238. + return -EINVAL;
  8239. +
  8240. + mutex_lock(&data.lock);
  8241. + if (data.sample_width < val)
  8242. + data.sample_window = val;
  8243. + else {
  8244. + mutex_unlock(&data.lock);
  8245. + return -EINVAL;
  8246. + }
  8247. + mutex_unlock(&data.lock);
  8248. +
  8249. + return csize;
  8250. +}
  8251. +
  8252. +/*
  8253. + * Function pointers for the "count" debugfs file operations
  8254. + */
  8255. +static const struct file_operations count_fops = {
  8256. + .open = debug_count_fopen,
  8257. + .read = debug_count_fread,
  8258. + .write = debug_count_fwrite,
  8259. + .owner = THIS_MODULE,
  8260. +};
  8261. +
  8262. +/*
  8263. + * Function pointers for the "enable" debugfs file operations
  8264. + */
  8265. +static const struct file_operations enable_fops = {
  8266. + .open = debug_enable_fopen,
  8267. + .read = debug_enable_fread,
  8268. + .write = debug_enable_fwrite,
  8269. + .owner = THIS_MODULE,
  8270. +};
  8271. +
  8272. +/*
  8273. + * Function pointers for the "max" debugfs file operations
  8274. + */
  8275. +static const struct file_operations max_fops = {
  8276. + .open = debug_max_fopen,
  8277. + .read = debug_max_fread,
  8278. + .write = debug_max_fwrite,
  8279. + .owner = THIS_MODULE,
  8280. +};
  8281. +
  8282. +/*
  8283. + * Function pointers for the "sample" debugfs file operations
  8284. + */
  8285. +static const struct file_operations sample_fops = {
  8286. + .open = debug_sample_fopen,
  8287. + .read = debug_sample_fread,
  8288. + .release = debug_sample_release,
  8289. + .owner = THIS_MODULE,
  8290. +};
  8291. +
  8292. +/*
  8293. + * Function pointers for the "threshold" debugfs file operations
  8294. + */
  8295. +static const struct file_operations threshold_fops = {
  8296. + .open = debug_threshold_fopen,
  8297. + .read = debug_threshold_fread,
  8298. + .write = debug_threshold_fwrite,
  8299. + .owner = THIS_MODULE,
  8300. +};
  8301. +
  8302. +/*
  8303. + * Function pointers for the "width" debugfs file operations
  8304. + */
  8305. +static const struct file_operations width_fops = {
  8306. + .open = debug_width_fopen,
  8307. + .read = debug_width_fread,
  8308. + .write = debug_width_fwrite,
  8309. + .owner = THIS_MODULE,
  8310. +};
  8311. +
  8312. +/*
  8313. + * Function pointers for the "window" debugfs file operations
  8314. + */
  8315. +static const struct file_operations window_fops = {
  8316. + .open = debug_window_fopen,
  8317. + .read = debug_window_fread,
  8318. + .write = debug_window_fwrite,
  8319. + .owner = THIS_MODULE,
  8320. +};
  8321. +
  8322. +/**
  8323. + * init_debugfs - A function to initialize the debugfs interface files
  8324. + *
  8325. + * This function creates entries in debugfs for "hwlat_detector", including
  8326. + * files to read values from the detector, current samples, and the
  8327. + * maximum sample that has been captured since the hardware latency
  8328. + * dectector was started.
  8329. + */
  8330. +static int init_debugfs(void)
  8331. +{
  8332. + int ret = -ENOMEM;
  8333. +
  8334. + debug_dir = debugfs_create_dir(DRVNAME, NULL);
  8335. + if (!debug_dir)
  8336. + goto err_debug_dir;
  8337. +
  8338. + debug_sample = debugfs_create_file("sample", 0444,
  8339. + debug_dir, NULL,
  8340. + &sample_fops);
  8341. + if (!debug_sample)
  8342. + goto err_sample;
  8343. +
  8344. + debug_count = debugfs_create_file("count", 0444,
  8345. + debug_dir, NULL,
  8346. + &count_fops);
  8347. + if (!debug_count)
  8348. + goto err_count;
  8349. +
  8350. + debug_max = debugfs_create_file("max", 0444,
  8351. + debug_dir, NULL,
  8352. + &max_fops);
  8353. + if (!debug_max)
  8354. + goto err_max;
  8355. +
  8356. + debug_sample_window = debugfs_create_file("window", 0644,
  8357. + debug_dir, NULL,
  8358. + &window_fops);
  8359. + if (!debug_sample_window)
  8360. + goto err_window;
  8361. +
  8362. + debug_sample_width = debugfs_create_file("width", 0644,
  8363. + debug_dir, NULL,
  8364. + &width_fops);
  8365. + if (!debug_sample_width)
  8366. + goto err_width;
  8367. +
  8368. + debug_threshold = debugfs_create_file("threshold", 0644,
  8369. + debug_dir, NULL,
  8370. + &threshold_fops);
  8371. + if (!debug_threshold)
  8372. + goto err_threshold;
  8373. +
  8374. + debug_enable = debugfs_create_file("enable", 0644,
  8375. + debug_dir, &enabled,
  8376. + &enable_fops);
  8377. + if (!debug_enable)
  8378. + goto err_enable;
  8379. +
  8380. + else {
  8381. + ret = 0;
  8382. + goto out;
  8383. + }
  8384. +
  8385. +err_enable:
  8386. + debugfs_remove(debug_threshold);
  8387. +err_threshold:
  8388. + debugfs_remove(debug_sample_width);
  8389. +err_width:
  8390. + debugfs_remove(debug_sample_window);
  8391. +err_window:
  8392. + debugfs_remove(debug_max);
  8393. +err_max:
  8394. + debugfs_remove(debug_count);
  8395. +err_count:
  8396. + debugfs_remove(debug_sample);
  8397. +err_sample:
  8398. + debugfs_remove(debug_dir);
  8399. +err_debug_dir:
  8400. +out:
  8401. + return ret;
  8402. +}
  8403. +
  8404. +/**
  8405. + * free_debugfs - A function to cleanup the debugfs file interface
  8406. + */
  8407. +static void free_debugfs(void)
  8408. +{
  8409. + /* could also use a debugfs_remove_recursive */
  8410. + debugfs_remove(debug_enable);
  8411. + debugfs_remove(debug_threshold);
  8412. + debugfs_remove(debug_sample_width);
  8413. + debugfs_remove(debug_sample_window);
  8414. + debugfs_remove(debug_max);
  8415. + debugfs_remove(debug_count);
  8416. + debugfs_remove(debug_sample);
  8417. + debugfs_remove(debug_dir);
  8418. +}
  8419. +
  8420. +/**
  8421. + * detector_init - Standard module initialization code
  8422. + */
  8423. +static int detector_init(void)
  8424. +{
  8425. + int ret = -ENOMEM;
  8426. +
  8427. + pr_info(BANNER "version %s\n", VERSION);
  8428. +
  8429. + ret = init_stats();
  8430. + if (0 != ret)
  8431. + goto out;
  8432. +
  8433. + ret = init_debugfs();
  8434. + if (0 != ret)
  8435. + goto err_stats;
  8436. +
  8437. + if (enabled)
  8438. + ret = start_kthread();
  8439. +
  8440. + goto out;
  8441. +
  8442. +err_stats:
  8443. + ring_buffer_free(ring_buffer);
  8444. +out:
  8445. + return ret;
  8446. +
  8447. +}
  8448. +
  8449. +/**
  8450. + * detector_exit - Standard module cleanup code
  8451. + */
  8452. +static void detector_exit(void)
  8453. +{
  8454. + int err;
  8455. +
  8456. + if (enabled) {
  8457. + enabled = 0;
  8458. + err = stop_kthread();
  8459. + if (err)
  8460. + pr_err(BANNER "cannot stop kthread\n");
  8461. + }
  8462. +
  8463. + free_debugfs();
  8464. + ring_buffer_free(ring_buffer); /* free up the ring buffer */
  8465. +
  8466. +}
  8467. +
  8468. +module_init(detector_init);
  8469. +module_exit(detector_exit);
  8470. diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
  8471. index acece3299756..58ea04a03fa9 100644
  8472. --- a/drivers/mmc/host/mmci.c
  8473. +++ b/drivers/mmc/host/mmci.c
  8474. @@ -1155,15 +1155,12 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id)
  8475. struct sg_mapping_iter *sg_miter = &host->sg_miter;
  8476. struct variant_data *variant = host->variant;
  8477. void __iomem *base = host->base;
  8478. - unsigned long flags;
  8479. u32 status;
  8480. status = readl(base + MMCISTATUS);
  8481. dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
  8482. - local_irq_save(flags);
  8483. -
  8484. do {
  8485. unsigned int remain, len;
  8486. char *buffer;
  8487. @@ -1203,8 +1200,6 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id)
  8488. sg_miter_stop(sg_miter);
  8489. - local_irq_restore(flags);
  8490. -
  8491. /*
  8492. * If we have less than the fifo 'half-full' threshold to transfer,
  8493. * trigger a PIO interrupt as soon as any data is available.
  8494. diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
  8495. index 41095ebad97f..b0a0cb22aec4 100644
  8496. --- a/drivers/net/ethernet/3com/3c59x.c
  8497. +++ b/drivers/net/ethernet/3com/3c59x.c
  8498. @@ -842,9 +842,9 @@ static void poll_vortex(struct net_device *dev)
  8499. {
  8500. struct vortex_private *vp = netdev_priv(dev);
  8501. unsigned long flags;
  8502. - local_irq_save(flags);
  8503. + local_irq_save_nort(flags);
  8504. (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
  8505. - local_irq_restore(flags);
  8506. + local_irq_restore_nort(flags);
  8507. }
  8508. #endif
  8509. @@ -1916,12 +1916,12 @@ static void vortex_tx_timeout(struct net_device *dev)
  8510. * Block interrupts because vortex_interrupt does a bare spin_lock()
  8511. */
  8512. unsigned long flags;
  8513. - local_irq_save(flags);
  8514. + local_irq_save_nort(flags);
  8515. if (vp->full_bus_master_tx)
  8516. boomerang_interrupt(dev->irq, dev);
  8517. else
  8518. vortex_interrupt(dev->irq, dev);
  8519. - local_irq_restore(flags);
  8520. + local_irq_restore_nort(flags);
  8521. }
  8522. }
  8523. diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
  8524. index 6e9036a06515..cc956b06ad18 100644
  8525. --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
  8526. +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
  8527. @@ -2212,11 +2212,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
  8528. }
  8529. tpd_req = atl1c_cal_tpd_req(skb);
  8530. - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
  8531. - if (netif_msg_pktdata(adapter))
  8532. - dev_info(&adapter->pdev->dev, "tx locked\n");
  8533. - return NETDEV_TX_LOCKED;
  8534. - }
  8535. + spin_lock_irqsave(&adapter->tx_lock, flags);
  8536. if (atl1c_tpd_avail(adapter, type) < tpd_req) {
  8537. /* no enough descriptor, just stop queue */
  8538. diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
  8539. index 59a03a193e83..734f7a7ad2c3 100644
  8540. --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
  8541. +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
  8542. @@ -1880,8 +1880,7 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb,
  8543. return NETDEV_TX_OK;
  8544. }
  8545. tpd_req = atl1e_cal_tdp_req(skb);
  8546. - if (!spin_trylock_irqsave(&adapter->tx_lock, flags))
  8547. - return NETDEV_TX_LOCKED;
  8548. + spin_lock_irqsave(&adapter->tx_lock, flags);
  8549. if (atl1e_tpd_avail(adapter) < tpd_req) {
  8550. /* no enough descriptor, just stop queue */
  8551. diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
  8552. index 526ea74e82d9..86f467a2c485 100644
  8553. --- a/drivers/net/ethernet/chelsio/cxgb/sge.c
  8554. +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
  8555. @@ -1664,8 +1664,7 @@ static int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter,
  8556. struct cmdQ *q = &sge->cmdQ[qid];
  8557. unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
  8558. - if (!spin_trylock(&q->lock))
  8559. - return NETDEV_TX_LOCKED;
  8560. + spin_lock(&q->lock);
  8561. reclaim_completed_tx(sge, q);
  8562. diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
  8563. index 4ee080d49bc0..e616b71d5014 100644
  8564. --- a/drivers/net/ethernet/freescale/gianfar.c
  8565. +++ b/drivers/net/ethernet/freescale/gianfar.c
  8566. @@ -1540,7 +1540,7 @@ static int gfar_suspend(struct device *dev)
  8567. if (netif_running(ndev)) {
  8568. - local_irq_save(flags);
  8569. + local_irq_save_nort(flags);
  8570. lock_tx_qs(priv);
  8571. gfar_halt_nodisable(priv);
  8572. @@ -1556,7 +1556,7 @@ static int gfar_suspend(struct device *dev)
  8573. gfar_write(&regs->maccfg1, tempval);
  8574. unlock_tx_qs(priv);
  8575. - local_irq_restore(flags);
  8576. + local_irq_restore_nort(flags);
  8577. disable_napi(priv);
  8578. @@ -1598,7 +1598,7 @@ static int gfar_resume(struct device *dev)
  8579. /* Disable Magic Packet mode, in case something
  8580. * else woke us up.
  8581. */
  8582. - local_irq_save(flags);
  8583. + local_irq_save_nort(flags);
  8584. lock_tx_qs(priv);
  8585. tempval = gfar_read(&regs->maccfg2);
  8586. @@ -1608,7 +1608,7 @@ static int gfar_resume(struct device *dev)
  8587. gfar_start(priv);
  8588. unlock_tx_qs(priv);
  8589. - local_irq_restore(flags);
  8590. + local_irq_restore_nort(flags);
  8591. netif_device_attach(ndev);
  8592. @@ -3418,14 +3418,14 @@ static irqreturn_t gfar_error(int irq, void *grp_id)
  8593. dev->stats.tx_dropped++;
  8594. atomic64_inc(&priv->extra_stats.tx_underrun);
  8595. - local_irq_save(flags);
  8596. + local_irq_save_nort(flags);
  8597. lock_tx_qs(priv);
  8598. /* Reactivate the Tx Queues */
  8599. gfar_write(&regs->tstat, gfargrp->tstat);
  8600. unlock_tx_qs(priv);
  8601. - local_irq_restore(flags);
  8602. + local_irq_restore_nort(flags);
  8603. }
  8604. netif_dbg(priv, tx_err, dev, "Transmit Error\n");
  8605. }
  8606. diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
  8607. index 1e0f72b65459..bb5ced2b5194 100644
  8608. --- a/drivers/net/ethernet/neterion/s2io.c
  8609. +++ b/drivers/net/ethernet/neterion/s2io.c
  8610. @@ -4084,12 +4084,7 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
  8611. [skb->priority & (MAX_TX_FIFOS - 1)];
  8612. fifo = &mac_control->fifos[queue];
  8613. - if (do_spin_lock)
  8614. - spin_lock_irqsave(&fifo->tx_lock, flags);
  8615. - else {
  8616. - if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
  8617. - return NETDEV_TX_LOCKED;
  8618. - }
  8619. + spin_lock_irqsave(&fifo->tx_lock, flags);
  8620. if (sp->config.multiq) {
  8621. if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
  8622. diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
  8623. index 3b98b263bad0..ca4add749410 100644
  8624. --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
  8625. +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
  8626. @@ -2137,10 +2137,8 @@ static int pch_gbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
  8627. struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
  8628. unsigned long flags;
  8629. - if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) {
  8630. - /* Collision - tell upper layer to requeue */
  8631. - return NETDEV_TX_LOCKED;
  8632. - }
  8633. + spin_lock_irqsave(&tx_ring->tx_lock, flags);
  8634. +
  8635. if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) {
  8636. netif_stop_queue(netdev);
  8637. spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
  8638. diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
  8639. index 78bb4ceb1cdd..b5156963ca07 100644
  8640. --- a/drivers/net/ethernet/realtek/8139too.c
  8641. +++ b/drivers/net/ethernet/realtek/8139too.c
  8642. @@ -2229,7 +2229,7 @@ static void rtl8139_poll_controller(struct net_device *dev)
  8643. struct rtl8139_private *tp = netdev_priv(dev);
  8644. const int irq = tp->pci_dev->irq;
  8645. - disable_irq(irq);
  8646. + disable_irq_nosync(irq);
  8647. rtl8139_interrupt(irq, dev);
  8648. enable_irq(irq);
  8649. }
  8650. diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
  8651. index a9cac8413e49..bd70b848174d 100644
  8652. --- a/drivers/net/ethernet/tehuti/tehuti.c
  8653. +++ b/drivers/net/ethernet/tehuti/tehuti.c
  8654. @@ -1629,13 +1629,8 @@ static netdev_tx_t bdx_tx_transmit(struct sk_buff *skb,
  8655. unsigned long flags;
  8656. ENTER;
  8657. - local_irq_save(flags);
  8658. - if (!spin_trylock(&priv->tx_lock)) {
  8659. - local_irq_restore(flags);
  8660. - DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n",
  8661. - BDX_DRV_NAME, ndev->name);
  8662. - return NETDEV_TX_LOCKED;
  8663. - }
  8664. +
  8665. + spin_lock_irqsave(&priv->tx_lock, flags);
  8666. /* build tx descriptor */
  8667. BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */
  8668. diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
  8669. index 18cc2c8d5447..a5e0ef3c02d1 100644
  8670. --- a/drivers/net/rionet.c
  8671. +++ b/drivers/net/rionet.c
  8672. @@ -174,11 +174,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
  8673. unsigned long flags;
  8674. int add_num = 1;
  8675. - local_irq_save(flags);
  8676. - if (!spin_trylock(&rnet->tx_lock)) {
  8677. - local_irq_restore(flags);
  8678. - return NETDEV_TX_LOCKED;
  8679. - }
  8680. + spin_lock_irqsave(&rnet->tx_lock, flags);
  8681. if (is_multicast_ether_addr(eth->h_dest))
  8682. add_num = nets[rnet->mport->id].nact;
  8683. diff --git a/drivers/net/wireless/orinoco/orinoco_usb.c b/drivers/net/wireless/orinoco/orinoco_usb.c
  8684. index 91f05442de28..8fb1c92724df 100644
  8685. --- a/drivers/net/wireless/orinoco/orinoco_usb.c
  8686. +++ b/drivers/net/wireless/orinoco/orinoco_usb.c
  8687. @@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ezusb_priv *upriv,
  8688. while (!ctx->done.done && msecs--)
  8689. udelay(1000);
  8690. } else {
  8691. - wait_event_interruptible(ctx->done.wait,
  8692. + swait_event_interruptible(ctx->done.wait,
  8693. ctx->done.done);
  8694. }
  8695. break;
  8696. diff --git a/drivers/pci/access.c b/drivers/pci/access.c
  8697. index 502a82ca1db0..6bb46e0a3349 100644
  8698. --- a/drivers/pci/access.c
  8699. +++ b/drivers/pci/access.c
  8700. @@ -561,7 +561,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev)
  8701. WARN_ON(!dev->block_cfg_access);
  8702. dev->block_cfg_access = 0;
  8703. - wake_up_all(&pci_cfg_wait);
  8704. + wake_up_all_locked(&pci_cfg_wait);
  8705. raw_spin_unlock_irqrestore(&pci_lock, flags);
  8706. }
  8707. EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
  8708. diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
  8709. index ec193a8357d7..455bf9c67b16 100644
  8710. --- a/drivers/scsi/fcoe/fcoe.c
  8711. +++ b/drivers/scsi/fcoe/fcoe.c
  8712. @@ -1287,7 +1287,7 @@ static void fcoe_percpu_thread_destroy(unsigned int cpu)
  8713. struct sk_buff *skb;
  8714. #ifdef CONFIG_SMP
  8715. struct fcoe_percpu_s *p0;
  8716. - unsigned targ_cpu = get_cpu();
  8717. + unsigned targ_cpu = get_cpu_light();
  8718. #endif /* CONFIG_SMP */
  8719. FCOE_DBG("Destroying receive thread for CPU %d\n", cpu);
  8720. @@ -1343,7 +1343,7 @@ static void fcoe_percpu_thread_destroy(unsigned int cpu)
  8721. kfree_skb(skb);
  8722. spin_unlock_bh(&p->fcoe_rx_list.lock);
  8723. }
  8724. - put_cpu();
  8725. + put_cpu_light();
  8726. #else
  8727. /*
  8728. * This a non-SMP scenario where the singular Rx thread is
  8729. @@ -1567,11 +1567,11 @@ err2:
  8730. static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
  8731. {
  8732. struct fcoe_percpu_s *fps;
  8733. - int rc;
  8734. + int rc, cpu = get_cpu_light();
  8735. - fps = &get_cpu_var(fcoe_percpu);
  8736. + fps = &per_cpu(fcoe_percpu, cpu);
  8737. rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
  8738. - put_cpu_var(fcoe_percpu);
  8739. + put_cpu_light();
  8740. return rc;
  8741. }
  8742. @@ -1767,11 +1767,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport,
  8743. return 0;
  8744. }
  8745. - stats = per_cpu_ptr(lport->stats, get_cpu());
  8746. + stats = per_cpu_ptr(lport->stats, get_cpu_light());
  8747. stats->InvalidCRCCount++;
  8748. if (stats->InvalidCRCCount < 5)
  8749. printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
  8750. - put_cpu();
  8751. + put_cpu_light();
  8752. return -EINVAL;
  8753. }
  8754. @@ -1815,7 +1815,7 @@ static void fcoe_recv_frame(struct sk_buff *skb)
  8755. */
  8756. hp = (struct fcoe_hdr *) skb_network_header(skb);
  8757. - stats = per_cpu_ptr(lport->stats, get_cpu());
  8758. + stats = per_cpu_ptr(lport->stats, get_cpu_light());
  8759. if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) {
  8760. if (stats->ErrorFrames < 5)
  8761. printk(KERN_WARNING "fcoe: FCoE version "
  8762. @@ -1847,13 +1847,13 @@ static void fcoe_recv_frame(struct sk_buff *skb)
  8763. goto drop;
  8764. if (!fcoe_filter_frames(lport, fp)) {
  8765. - put_cpu();
  8766. + put_cpu_light();
  8767. fc_exch_recv(lport, fp);
  8768. return;
  8769. }
  8770. drop:
  8771. stats->ErrorFrames++;
  8772. - put_cpu();
  8773. + put_cpu_light();
  8774. kfree_skb(skb);
  8775. }
  8776. diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
  8777. index 34a1b1f333b4..d91131210695 100644
  8778. --- a/drivers/scsi/fcoe/fcoe_ctlr.c
  8779. +++ b/drivers/scsi/fcoe/fcoe_ctlr.c
  8780. @@ -831,7 +831,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
  8781. INIT_LIST_HEAD(&del_list);
  8782. - stats = per_cpu_ptr(fip->lp->stats, get_cpu());
  8783. + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
  8784. list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
  8785. deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
  8786. @@ -867,7 +867,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
  8787. sel_time = fcf->time;
  8788. }
  8789. }
  8790. - put_cpu();
  8791. + put_cpu_light();
  8792. list_for_each_entry_safe(fcf, next, &del_list, list) {
  8793. /* Removes fcf from current list */
  8794. diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
  8795. index 30f9ef0c0d4f..6c686bc01a82 100644
  8796. --- a/drivers/scsi/libfc/fc_exch.c
  8797. +++ b/drivers/scsi/libfc/fc_exch.c
  8798. @@ -814,10 +814,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
  8799. }
  8800. memset(ep, 0, sizeof(*ep));
  8801. - cpu = get_cpu();
  8802. + cpu = get_cpu_light();
  8803. pool = per_cpu_ptr(mp->pool, cpu);
  8804. spin_lock_bh(&pool->lock);
  8805. - put_cpu();
  8806. + put_cpu_light();
  8807. /* peek cache of free slot */
  8808. if (pool->left != FC_XID_UNKNOWN) {
  8809. diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
  8810. index 9c706d8c1441..d968ffc79c08 100644
  8811. --- a/drivers/scsi/libsas/sas_ata.c
  8812. +++ b/drivers/scsi/libsas/sas_ata.c
  8813. @@ -190,7 +190,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
  8814. /* TODO: audit callers to ensure they are ready for qc_issue to
  8815. * unconditionally re-enable interrupts
  8816. */
  8817. - local_irq_save(flags);
  8818. + local_irq_save_nort(flags);
  8819. spin_unlock(ap->lock);
  8820. /* If the device fell off, no sense in issuing commands */
  8821. @@ -255,7 +255,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
  8822. out:
  8823. spin_lock(ap->lock);
  8824. - local_irq_restore(flags);
  8825. + local_irq_restore_nort(flags);
  8826. return ret;
  8827. }
  8828. diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
  8829. index fee9eb7c8a60..b42d4adc42dc 100644
  8830. --- a/drivers/scsi/qla2xxx/qla_inline.h
  8831. +++ b/drivers/scsi/qla2xxx/qla_inline.h
  8832. @@ -59,12 +59,12 @@ qla2x00_poll(struct rsp_que *rsp)
  8833. {
  8834. unsigned long flags;
  8835. struct qla_hw_data *ha = rsp->hw;
  8836. - local_irq_save(flags);
  8837. + local_irq_save_nort(flags);
  8838. if (IS_P3P_TYPE(ha))
  8839. qla82xx_poll(0, rsp);
  8840. else
  8841. ha->isp_ops->intr_handler(0, rsp);
  8842. - local_irq_restore(flags);
  8843. + local_irq_restore_nort(flags);
  8844. }
  8845. static inline uint8_t *
  8846. diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c
  8847. index 9ea3d9d49ffc..9e68706ae5e2 100644
  8848. --- a/drivers/thermal/x86_pkg_temp_thermal.c
  8849. +++ b/drivers/thermal/x86_pkg_temp_thermal.c
  8850. @@ -29,6 +29,7 @@
  8851. #include <linux/pm.h>
  8852. #include <linux/thermal.h>
  8853. #include <linux/debugfs.h>
  8854. +#include <linux/work-simple.h>
  8855. #include <asm/cpu_device_id.h>
  8856. #include <asm/mce.h>
  8857. @@ -352,7 +353,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
  8858. }
  8859. }
  8860. -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  8861. +static void platform_thermal_notify_work(struct swork_event *event)
  8862. {
  8863. unsigned long flags;
  8864. int cpu = smp_processor_id();
  8865. @@ -369,7 +370,7 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  8866. pkg_work_scheduled[phy_id]) {
  8867. disable_pkg_thres_interrupt();
  8868. spin_unlock_irqrestore(&pkg_work_lock, flags);
  8869. - return -EINVAL;
  8870. + return;
  8871. }
  8872. pkg_work_scheduled[phy_id] = 1;
  8873. spin_unlock_irqrestore(&pkg_work_lock, flags);
  8874. @@ -378,9 +379,48 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  8875. schedule_delayed_work_on(cpu,
  8876. &per_cpu(pkg_temp_thermal_threshold_work, cpu),
  8877. msecs_to_jiffies(notify_delay_ms));
  8878. +}
  8879. +
  8880. +#ifdef CONFIG_PREEMPT_RT_FULL
  8881. +static struct swork_event notify_work;
  8882. +
  8883. +static int thermal_notify_work_init(void)
  8884. +{
  8885. + int err;
  8886. +
  8887. + err = swork_get();
  8888. + if (err)
  8889. + return err;
  8890. +
  8891. + INIT_SWORK(&notify_work, platform_thermal_notify_work);
  8892. return 0;
  8893. }
  8894. +static void thermal_notify_work_cleanup(void)
  8895. +{
  8896. + swork_put();
  8897. +}
  8898. +
  8899. +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  8900. +{
  8901. + swork_queue(&notify_work);
  8902. + return 0;
  8903. +}
  8904. +
  8905. +#else /* !CONFIG_PREEMPT_RT_FULL */
  8906. +
  8907. +static int thermal_notify_work_init(void) { return 0; }
  8908. +
  8909. +static void thermal_notify_work_cleanup(void) { }
  8910. +
  8911. +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  8912. +{
  8913. + platform_thermal_notify_work(NULL);
  8914. +
  8915. + return 0;
  8916. +}
  8917. +#endif /* CONFIG_PREEMPT_RT_FULL */
  8918. +
  8919. static int find_siblings_cpu(int cpu)
  8920. {
  8921. int i;
  8922. @@ -584,6 +624,9 @@ static int __init pkg_temp_thermal_init(void)
  8923. if (!x86_match_cpu(pkg_temp_thermal_ids))
  8924. return -ENODEV;
  8925. + if (!thermal_notify_work_init())
  8926. + return -ENODEV;
  8927. +
  8928. spin_lock_init(&pkg_work_lock);
  8929. platform_thermal_package_notify =
  8930. pkg_temp_thermal_platform_thermal_notify;
  8931. @@ -608,7 +651,7 @@ err_ret:
  8932. kfree(pkg_work_scheduled);
  8933. platform_thermal_package_notify = NULL;
  8934. platform_thermal_package_rate_control = NULL;
  8935. -
  8936. + thermal_notify_work_cleanup();
  8937. return -ENODEV;
  8938. }
  8939. @@ -633,6 +676,7 @@ static void __exit pkg_temp_thermal_exit(void)
  8940. mutex_unlock(&phy_dev_list_mutex);
  8941. platform_thermal_package_notify = NULL;
  8942. platform_thermal_package_rate_control = NULL;
  8943. + thermal_notify_work_cleanup();
  8944. for_each_online_cpu(i)
  8945. cancel_delayed_work_sync(
  8946. &per_cpu(pkg_temp_thermal_threshold_work, i));
  8947. diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
  8948. index a64d53f7b1d1..fd96ce65bc31 100644
  8949. --- a/drivers/tty/serial/8250/8250_core.c
  8950. +++ b/drivers/tty/serial/8250/8250_core.c
  8951. @@ -36,6 +36,7 @@
  8952. #include <linux/nmi.h>
  8953. #include <linux/mutex.h>
  8954. #include <linux/slab.h>
  8955. +#include <linux/kdb.h>
  8956. #include <linux/uaccess.h>
  8957. #include <linux/pm_runtime.h>
  8958. #ifdef CONFIG_SPARC
  8959. @@ -80,7 +81,16 @@ static unsigned int skip_txen_test; /* force skip of txen test at init time */
  8960. #define DEBUG_INTR(fmt...) do { } while (0)
  8961. #endif
  8962. -#define PASS_LIMIT 512
  8963. +/*
  8964. + * On -rt we can have a more delays, and legitimately
  8965. + * so - so don't drop work spuriously and spam the
  8966. + * syslog:
  8967. + */
  8968. +#ifdef CONFIG_PREEMPT_RT_FULL
  8969. +# define PASS_LIMIT 1000000
  8970. +#else
  8971. +# define PASS_LIMIT 512
  8972. +#endif
  8973. #define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
  8974. @@ -3366,7 +3376,7 @@ static void serial8250_console_write(struct uart_8250_port *up, const char *s,
  8975. if (port->sysrq)
  8976. locked = 0;
  8977. - else if (oops_in_progress)
  8978. + else if (oops_in_progress || in_kdb_printk())
  8979. locked = spin_trylock_irqsave(&port->lock, flags);
  8980. else
  8981. spin_lock_irqsave(&port->lock, flags);
  8982. diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
  8983. index 0cc622afb67d..52f45f3029b7 100644
  8984. --- a/drivers/tty/serial/amba-pl011.c
  8985. +++ b/drivers/tty/serial/amba-pl011.c
  8986. @@ -2000,13 +2000,19 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
  8987. clk_enable(uap->clk);
  8988. - local_irq_save(flags);
  8989. + /*
  8990. + * local_irq_save(flags);
  8991. + *
  8992. + * This local_irq_save() is nonsense. If we come in via sysrq
  8993. + * handling then interrupts are already disabled. Aside of
  8994. + * that the port.sysrq check is racy on SMP regardless.
  8995. + */
  8996. if (uap->port.sysrq)
  8997. locked = 0;
  8998. else if (oops_in_progress)
  8999. - locked = spin_trylock(&uap->port.lock);
  9000. + locked = spin_trylock_irqsave(&uap->port.lock, flags);
  9001. else
  9002. - spin_lock(&uap->port.lock);
  9003. + spin_lock_irqsave(&uap->port.lock, flags);
  9004. /*
  9005. * First save the CR then disable the interrupts
  9006. @@ -2028,8 +2034,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
  9007. writew(old_cr, uap->port.membase + UART011_CR);
  9008. if (locked)
  9009. - spin_unlock(&uap->port.lock);
  9010. - local_irq_restore(flags);
  9011. + spin_unlock_irqrestore(&uap->port.lock, flags);
  9012. clk_disable(uap->clk);
  9013. }
  9014. diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
  9015. index 0a88693cd8ca..b89b06ed3b74 100644
  9016. --- a/drivers/tty/serial/omap-serial.c
  9017. +++ b/drivers/tty/serial/omap-serial.c
  9018. @@ -1282,13 +1282,10 @@ serial_omap_console_write(struct console *co, const char *s,
  9019. pm_runtime_get_sync(up->dev);
  9020. - local_irq_save(flags);
  9021. - if (up->port.sysrq)
  9022. - locked = 0;
  9023. - else if (oops_in_progress)
  9024. - locked = spin_trylock(&up->port.lock);
  9025. + if (up->port.sysrq || oops_in_progress)
  9026. + locked = spin_trylock_irqsave(&up->port.lock, flags);
  9027. else
  9028. - spin_lock(&up->port.lock);
  9029. + spin_lock_irqsave(&up->port.lock, flags);
  9030. /*
  9031. * First save the IER then disable the interrupts
  9032. @@ -1317,8 +1314,7 @@ serial_omap_console_write(struct console *co, const char *s,
  9033. pm_runtime_mark_last_busy(up->dev);
  9034. pm_runtime_put_autosuspend(up->dev);
  9035. if (locked)
  9036. - spin_unlock(&up->port.lock);
  9037. - local_irq_restore(flags);
  9038. + spin_unlock_irqrestore(&up->port.lock, flags);
  9039. }
  9040. static int __init
  9041. diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
  9042. index 3a49ba2910df..c55c42603849 100644
  9043. --- a/drivers/usb/core/hcd.c
  9044. +++ b/drivers/usb/core/hcd.c
  9045. @@ -1684,9 +1684,9 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
  9046. * and no one may trigger the above deadlock situation when
  9047. * running complete() in tasklet.
  9048. */
  9049. - local_irq_save(flags);
  9050. + local_irq_save_nort(flags);
  9051. urb->complete(urb);
  9052. - local_irq_restore(flags);
  9053. + local_irq_restore_nort(flags);
  9054. usb_anchor_resume_wakeups(anchor);
  9055. atomic_dec(&urb->use_count);
  9056. diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
  9057. index db9433eed2cc..6536d557abc1 100644
  9058. --- a/drivers/usb/gadget/function/f_fs.c
  9059. +++ b/drivers/usb/gadget/function/f_fs.c
  9060. @@ -1404,7 +1404,7 @@ static void ffs_data_put(struct ffs_data *ffs)
  9061. pr_info("%s(): freeing\n", __func__);
  9062. ffs_data_clear(ffs);
  9063. BUG_ON(waitqueue_active(&ffs->ev.waitq) ||
  9064. - waitqueue_active(&ffs->ep0req_completion.wait));
  9065. + swaitqueue_active(&ffs->ep0req_completion.wait));
  9066. kfree(ffs->dev_name);
  9067. kfree(ffs);
  9068. }
  9069. diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
  9070. index bccc5788bb98..8c23636963bc 100644
  9071. --- a/drivers/usb/gadget/legacy/inode.c
  9072. +++ b/drivers/usb/gadget/legacy/inode.c
  9073. @@ -345,7 +345,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
  9074. spin_unlock_irq (&epdata->dev->lock);
  9075. if (likely (value == 0)) {
  9076. - value = wait_event_interruptible (done.wait, done.done);
  9077. + value = swait_event_interruptible (done.wait, done.done);
  9078. if (value != 0) {
  9079. spin_lock_irq (&epdata->dev->lock);
  9080. if (likely (epdata->ep != NULL)) {
  9081. @@ -354,7 +354,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
  9082. usb_ep_dequeue (epdata->ep, epdata->req);
  9083. spin_unlock_irq (&epdata->dev->lock);
  9084. - wait_event (done.wait, done.done);
  9085. + swait_event (done.wait, done.done);
  9086. if (epdata->status == -ECONNRESET)
  9087. epdata->status = -EINTR;
  9088. } else {
  9089. diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
  9090. index d6ca3697d3c8..ee6bc64f9656 100644
  9091. --- a/drivers/usb/gadget/udc/atmel_usba_udc.c
  9092. +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
  9093. @@ -17,7 +17,9 @@
  9094. #include <linux/device.h>
  9095. #include <linux/dma-mapping.h>
  9096. #include <linux/list.h>
  9097. +#include <linux/mfd/syscon.h>
  9098. #include <linux/platform_device.h>
  9099. +#include <linux/regmap.h>
  9100. #include <linux/usb/ch9.h>
  9101. #include <linux/usb/gadget.h>
  9102. #include <linux/usb/atmel_usba_udc.h>
  9103. @@ -1889,20 +1891,15 @@ static int atmel_usba_stop(struct usb_gadget *gadget)
  9104. #ifdef CONFIG_OF
  9105. static void at91sam9rl_toggle_bias(struct usba_udc *udc, int is_on)
  9106. {
  9107. - unsigned int uckr = at91_pmc_read(AT91_CKGR_UCKR);
  9108. -
  9109. - if (is_on)
  9110. - at91_pmc_write(AT91_CKGR_UCKR, uckr | AT91_PMC_BIASEN);
  9111. - else
  9112. - at91_pmc_write(AT91_CKGR_UCKR, uckr & ~(AT91_PMC_BIASEN));
  9113. + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN,
  9114. + is_on ? AT91_PMC_BIASEN : 0);
  9115. }
  9116. static void at91sam9g45_pulse_bias(struct usba_udc *udc)
  9117. {
  9118. - unsigned int uckr = at91_pmc_read(AT91_CKGR_UCKR);
  9119. -
  9120. - at91_pmc_write(AT91_CKGR_UCKR, uckr & ~(AT91_PMC_BIASEN));
  9121. - at91_pmc_write(AT91_CKGR_UCKR, uckr | AT91_PMC_BIASEN);
  9122. + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN, 0);
  9123. + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN,
  9124. + AT91_PMC_BIASEN);
  9125. }
  9126. static const struct usba_udc_errata at91sam9rl_errata = {
  9127. @@ -1939,6 +1936,9 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev,
  9128. return ERR_PTR(-EINVAL);
  9129. udc->errata = match->data;
  9130. + udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9g45-pmc");
  9131. + if (udc->errata && IS_ERR(udc->pmc))
  9132. + return ERR_CAST(udc->pmc);
  9133. udc->num_ep = 0;
  9134. diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.h b/drivers/usb/gadget/udc/atmel_usba_udc.h
  9135. index ea448a344767..3e1c9d589dfa 100644
  9136. --- a/drivers/usb/gadget/udc/atmel_usba_udc.h
  9137. +++ b/drivers/usb/gadget/udc/atmel_usba_udc.h
  9138. @@ -354,6 +354,8 @@ struct usba_udc {
  9139. struct dentry *debugfs_root;
  9140. struct dentry *debugfs_regs;
  9141. #endif
  9142. +
  9143. + struct regmap *pmc;
  9144. };
  9145. static inline struct usba_ep *to_usba_ep(struct usb_ep *ep)
  9146. diff --git a/fs/aio.c b/fs/aio.c
  9147. index 480440f4701f..5a2380de4a9b 100644
  9148. --- a/fs/aio.c
  9149. +++ b/fs/aio.c
  9150. @@ -40,6 +40,7 @@
  9151. #include <linux/ramfs.h>
  9152. #include <linux/percpu-refcount.h>
  9153. #include <linux/mount.h>
  9154. +#include <linux/work-simple.h>
  9155. #include <asm/kmap_types.h>
  9156. #include <asm/uaccess.h>
  9157. @@ -115,7 +116,7 @@ struct kioctx {
  9158. struct page **ring_pages;
  9159. long nr_pages;
  9160. - struct work_struct free_work;
  9161. + struct swork_event free_work;
  9162. /*
  9163. * signals when all in-flight requests are done
  9164. @@ -253,6 +254,7 @@ static int __init aio_setup(void)
  9165. .mount = aio_mount,
  9166. .kill_sb = kill_anon_super,
  9167. };
  9168. + BUG_ON(swork_get());
  9169. aio_mnt = kern_mount(&aio_fs);
  9170. if (IS_ERR(aio_mnt))
  9171. panic("Failed to create aio fs mount.");
  9172. @@ -559,9 +561,9 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
  9173. return cancel(&kiocb->common);
  9174. }
  9175. -static void free_ioctx(struct work_struct *work)
  9176. +static void free_ioctx(struct swork_event *sev)
  9177. {
  9178. - struct kioctx *ctx = container_of(work, struct kioctx, free_work);
  9179. + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
  9180. pr_debug("freeing %p\n", ctx);
  9181. @@ -580,8 +582,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
  9182. if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
  9183. complete(&ctx->rq_wait->comp);
  9184. - INIT_WORK(&ctx->free_work, free_ioctx);
  9185. - schedule_work(&ctx->free_work);
  9186. + INIT_SWORK(&ctx->free_work, free_ioctx);
  9187. + swork_queue(&ctx->free_work);
  9188. }
  9189. /*
  9190. @@ -589,9 +591,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
  9191. * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
  9192. * now it's safe to cancel any that need to be.
  9193. */
  9194. -static void free_ioctx_users(struct percpu_ref *ref)
  9195. +static void free_ioctx_users_work(struct swork_event *sev)
  9196. {
  9197. - struct kioctx *ctx = container_of(ref, struct kioctx, users);
  9198. + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
  9199. struct aio_kiocb *req;
  9200. spin_lock_irq(&ctx->ctx_lock);
  9201. @@ -610,6 +612,14 @@ static void free_ioctx_users(struct percpu_ref *ref)
  9202. percpu_ref_put(&ctx->reqs);
  9203. }
  9204. +static void free_ioctx_users(struct percpu_ref *ref)
  9205. +{
  9206. + struct kioctx *ctx = container_of(ref, struct kioctx, users);
  9207. +
  9208. + INIT_SWORK(&ctx->free_work, free_ioctx_users_work);
  9209. + swork_queue(&ctx->free_work);
  9210. +}
  9211. +
  9212. static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
  9213. {
  9214. unsigned i, new_nr;
  9215. diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
  9216. index 6196b5eaf9a5..ab37f57136f3 100644
  9217. --- a/fs/autofs4/autofs_i.h
  9218. +++ b/fs/autofs4/autofs_i.h
  9219. @@ -34,6 +34,7 @@
  9220. #include <linux/sched.h>
  9221. #include <linux/mount.h>
  9222. #include <linux/namei.h>
  9223. +#include <linux/delay.h>
  9224. #include <asm/current.h>
  9225. #include <asm/uaccess.h>
  9226. diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
  9227. index 7a5a598a2d94..d08bcdc30566 100644
  9228. --- a/fs/autofs4/expire.c
  9229. +++ b/fs/autofs4/expire.c
  9230. @@ -150,7 +150,7 @@ again:
  9231. parent = p->d_parent;
  9232. if (!spin_trylock(&parent->d_lock)) {
  9233. spin_unlock(&p->d_lock);
  9234. - cpu_relax();
  9235. + cpu_chill();
  9236. goto relock;
  9237. }
  9238. spin_unlock(&p->d_lock);
  9239. diff --git a/fs/buffer.c b/fs/buffer.c
  9240. index c7a5602d01ee..2907544c3a1d 100644
  9241. --- a/fs/buffer.c
  9242. +++ b/fs/buffer.c
  9243. @@ -301,8 +301,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
  9244. * decide that the page is now completely done.
  9245. */
  9246. first = page_buffers(page);
  9247. - local_irq_save(flags);
  9248. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  9249. + flags = bh_uptodate_lock_irqsave(first);
  9250. clear_buffer_async_read(bh);
  9251. unlock_buffer(bh);
  9252. tmp = bh;
  9253. @@ -315,8 +314,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
  9254. }
  9255. tmp = tmp->b_this_page;
  9256. } while (tmp != bh);
  9257. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  9258. - local_irq_restore(flags);
  9259. + bh_uptodate_unlock_irqrestore(first, flags);
  9260. /*
  9261. * If none of the buffers had errors and they are all
  9262. @@ -328,9 +326,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
  9263. return;
  9264. still_busy:
  9265. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  9266. - local_irq_restore(flags);
  9267. - return;
  9268. + bh_uptodate_unlock_irqrestore(first, flags);
  9269. }
  9270. /*
  9271. @@ -358,8 +354,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
  9272. }
  9273. first = page_buffers(page);
  9274. - local_irq_save(flags);
  9275. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  9276. + flags = bh_uptodate_lock_irqsave(first);
  9277. clear_buffer_async_write(bh);
  9278. unlock_buffer(bh);
  9279. @@ -371,15 +366,12 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
  9280. }
  9281. tmp = tmp->b_this_page;
  9282. }
  9283. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  9284. - local_irq_restore(flags);
  9285. + bh_uptodate_unlock_irqrestore(first, flags);
  9286. end_page_writeback(page);
  9287. return;
  9288. still_busy:
  9289. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  9290. - local_irq_restore(flags);
  9291. - return;
  9292. + bh_uptodate_unlock_irqrestore(first, flags);
  9293. }
  9294. EXPORT_SYMBOL(end_buffer_async_write);
  9295. @@ -3325,6 +3317,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
  9296. struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
  9297. if (ret) {
  9298. INIT_LIST_HEAD(&ret->b_assoc_buffers);
  9299. + buffer_head_init_locks(ret);
  9300. preempt_disable();
  9301. __this_cpu_inc(bh_accounting.nr);
  9302. recalc_bh_state();
  9303. diff --git a/fs/dcache.c b/fs/dcache.c
  9304. index 660857431b1c..c790b2b070ab 100644
  9305. --- a/fs/dcache.c
  9306. +++ b/fs/dcache.c
  9307. @@ -19,6 +19,7 @@
  9308. #include <linux/mm.h>
  9309. #include <linux/fs.h>
  9310. #include <linux/fsnotify.h>
  9311. +#include <linux/delay.h>
  9312. #include <linux/slab.h>
  9313. #include <linux/init.h>
  9314. #include <linux/hash.h>
  9315. @@ -747,6 +748,8 @@ static inline bool fast_dput(struct dentry *dentry)
  9316. */
  9317. void dput(struct dentry *dentry)
  9318. {
  9319. + struct dentry *parent;
  9320. +
  9321. if (unlikely(!dentry))
  9322. return;
  9323. @@ -783,9 +786,18 @@ repeat:
  9324. return;
  9325. kill_it:
  9326. - dentry = dentry_kill(dentry);
  9327. - if (dentry) {
  9328. - cond_resched();
  9329. + parent = dentry_kill(dentry);
  9330. + if (parent) {
  9331. + int r;
  9332. +
  9333. + if (parent == dentry) {
  9334. + /* the task with the highest priority won't schedule */
  9335. + r = cond_resched();
  9336. + if (!r)
  9337. + cpu_chill();
  9338. + } else {
  9339. + dentry = parent;
  9340. + }
  9341. goto repeat;
  9342. }
  9343. }
  9344. @@ -2391,7 +2403,7 @@ again:
  9345. if (dentry->d_lockref.count == 1) {
  9346. if (!spin_trylock(&inode->i_lock)) {
  9347. spin_unlock(&dentry->d_lock);
  9348. - cpu_relax();
  9349. + cpu_chill();
  9350. goto again;
  9351. }
  9352. dentry->d_flags &= ~DCACHE_CANT_MOUNT;
  9353. diff --git a/fs/eventpoll.c b/fs/eventpoll.c
  9354. index 1e009cad8d5c..d0c12504d3b4 100644
  9355. --- a/fs/eventpoll.c
  9356. +++ b/fs/eventpoll.c
  9357. @@ -505,12 +505,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
  9358. */
  9359. static void ep_poll_safewake(wait_queue_head_t *wq)
  9360. {
  9361. - int this_cpu = get_cpu();
  9362. + int this_cpu = get_cpu_light();
  9363. ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
  9364. ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
  9365. - put_cpu();
  9366. + put_cpu_light();
  9367. }
  9368. static void ep_remove_wait_queue(struct eppoll_entry *pwq)
  9369. diff --git a/fs/exec.c b/fs/exec.c
  9370. index 1977c2a553ac..0e7125be0283 100644
  9371. --- a/fs/exec.c
  9372. +++ b/fs/exec.c
  9373. @@ -859,12 +859,14 @@ static int exec_mmap(struct mm_struct *mm)
  9374. }
  9375. }
  9376. task_lock(tsk);
  9377. + preempt_disable_rt();
  9378. active_mm = tsk->active_mm;
  9379. tsk->mm = mm;
  9380. tsk->active_mm = mm;
  9381. activate_mm(active_mm, mm);
  9382. tsk->mm->vmacache_seqnum = 0;
  9383. vmacache_flush(tsk);
  9384. + preempt_enable_rt();
  9385. task_unlock(tsk);
  9386. if (old_mm) {
  9387. up_read(&old_mm->mmap_sem);
  9388. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
  9389. index 8de34ab6d5b1..4e80270703a4 100644
  9390. --- a/fs/f2fs/f2fs.h
  9391. +++ b/fs/f2fs/f2fs.h
  9392. @@ -22,7 +22,6 @@
  9393. #ifdef CONFIG_F2FS_CHECK_FS
  9394. #define f2fs_bug_on(sbi, condition) BUG_ON(condition)
  9395. -#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
  9396. #else
  9397. #define f2fs_bug_on(sbi, condition) \
  9398. do { \
  9399. @@ -31,7 +30,6 @@
  9400. set_sbi_flag(sbi, SBI_NEED_FSCK); \
  9401. } \
  9402. } while (0)
  9403. -#define f2fs_down_write(x, y) down_write(x)
  9404. #endif
  9405. /*
  9406. @@ -838,7 +836,7 @@ static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
  9407. static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
  9408. {
  9409. - f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex);
  9410. + down_write(&sbi->cp_rwsem);
  9411. }
  9412. static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
  9413. diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
  9414. index 08c03044abdd..95debd71e5fa 100644
  9415. --- a/fs/jbd/checkpoint.c
  9416. +++ b/fs/jbd/checkpoint.c
  9417. @@ -129,6 +129,8 @@ void __log_wait_for_space(journal_t *journal)
  9418. if (journal->j_flags & JFS_ABORT)
  9419. return;
  9420. spin_unlock(&journal->j_state_lock);
  9421. + if (current->plug)
  9422. + io_schedule();
  9423. mutex_lock(&journal->j_checkpoint_mutex);
  9424. /*
  9425. diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
  9426. index 8c44654ce274..78c1545a3fab 100644
  9427. --- a/fs/jbd2/checkpoint.c
  9428. +++ b/fs/jbd2/checkpoint.c
  9429. @@ -116,6 +116,8 @@ void __jbd2_log_wait_for_space(journal_t *journal)
  9430. nblocks = jbd2_space_needed(journal);
  9431. while (jbd2_log_space_left(journal) < nblocks) {
  9432. write_unlock(&journal->j_state_lock);
  9433. + if (current->plug)
  9434. + io_schedule();
  9435. mutex_lock(&journal->j_checkpoint_mutex);
  9436. /*
  9437. diff --git a/fs/namespace.c b/fs/namespace.c
  9438. index 556721fb0cf6..d27cd4633f59 100644
  9439. --- a/fs/namespace.c
  9440. +++ b/fs/namespace.c
  9441. @@ -14,6 +14,7 @@
  9442. #include <linux/mnt_namespace.h>
  9443. #include <linux/user_namespace.h>
  9444. #include <linux/namei.h>
  9445. +#include <linux/delay.h>
  9446. #include <linux/security.h>
  9447. #include <linux/idr.h>
  9448. #include <linux/init.h> /* init_rootfs */
  9449. @@ -353,8 +354,11 @@ int __mnt_want_write(struct vfsmount *m)
  9450. * incremented count after it has set MNT_WRITE_HOLD.
  9451. */
  9452. smp_mb();
  9453. - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
  9454. - cpu_relax();
  9455. + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
  9456. + preempt_enable();
  9457. + cpu_chill();
  9458. + preempt_disable();
  9459. + }
  9460. /*
  9461. * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
  9462. * be set to match its requirements. So we must not load that until
  9463. diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
  9464. index 7521e11db728..f0de4b6b8bf3 100644
  9465. --- a/fs/ntfs/aops.c
  9466. +++ b/fs/ntfs/aops.c
  9467. @@ -107,8 +107,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  9468. "0x%llx.", (unsigned long long)bh->b_blocknr);
  9469. }
  9470. first = page_buffers(page);
  9471. - local_irq_save(flags);
  9472. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  9473. + flags = bh_uptodate_lock_irqsave(first);
  9474. clear_buffer_async_read(bh);
  9475. unlock_buffer(bh);
  9476. tmp = bh;
  9477. @@ -123,8 +122,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  9478. }
  9479. tmp = tmp->b_this_page;
  9480. } while (tmp != bh);
  9481. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  9482. - local_irq_restore(flags);
  9483. + bh_uptodate_unlock_irqrestore(first, flags);
  9484. /*
  9485. * If none of the buffers had errors then we can set the page uptodate,
  9486. * but we first have to perform the post read mst fixups, if the
  9487. @@ -145,13 +143,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  9488. recs = PAGE_CACHE_SIZE / rec_size;
  9489. /* Should have been verified before we got here... */
  9490. BUG_ON(!recs);
  9491. - local_irq_save(flags);
  9492. + local_irq_save_nort(flags);
  9493. kaddr = kmap_atomic(page);
  9494. for (i = 0; i < recs; i++)
  9495. post_read_mst_fixup((NTFS_RECORD*)(kaddr +
  9496. i * rec_size), rec_size);
  9497. kunmap_atomic(kaddr);
  9498. - local_irq_restore(flags);
  9499. + local_irq_restore_nort(flags);
  9500. flush_dcache_page(page);
  9501. if (likely(page_uptodate && !PageError(page)))
  9502. SetPageUptodate(page);
  9503. @@ -159,9 +157,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  9504. unlock_page(page);
  9505. return;
  9506. still_busy:
  9507. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  9508. - local_irq_restore(flags);
  9509. - return;
  9510. + bh_uptodate_unlock_irqrestore(first, flags);
  9511. }
  9512. /**
  9513. diff --git a/fs/timerfd.c b/fs/timerfd.c
  9514. index b94fa6c3c6eb..64fb86066237 100644
  9515. --- a/fs/timerfd.c
  9516. +++ b/fs/timerfd.c
  9517. @@ -450,7 +450,10 @@ static int do_timerfd_settime(int ufd, int flags,
  9518. break;
  9519. }
  9520. spin_unlock_irq(&ctx->wqh.lock);
  9521. - cpu_relax();
  9522. + if (isalarm(ctx))
  9523. + hrtimer_wait_for_timer(&ctx->t.alarm.timer);
  9524. + else
  9525. + hrtimer_wait_for_timer(&ctx->t.tmr);
  9526. }
  9527. /*
  9528. diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
  9529. index c29f34253e2b..f30d5b8cd5a1 100644
  9530. --- a/fs/xfs/xfs_inode.c
  9531. +++ b/fs/xfs/xfs_inode.c
  9532. @@ -164,7 +164,7 @@ xfs_ilock(
  9533. (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
  9534. ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
  9535. (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
  9536. - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
  9537. + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
  9538. if (lock_flags & XFS_IOLOCK_EXCL)
  9539. mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
  9540. @@ -212,7 +212,7 @@ xfs_ilock_nowait(
  9541. (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
  9542. ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
  9543. (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
  9544. - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
  9545. + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
  9546. if (lock_flags & XFS_IOLOCK_EXCL) {
  9547. if (!mrtryupdate(&ip->i_iolock))
  9548. @@ -281,7 +281,7 @@ xfs_iunlock(
  9549. (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
  9550. ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
  9551. (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
  9552. - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
  9553. + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
  9554. ASSERT(lock_flags != 0);
  9555. if (lock_flags & XFS_IOLOCK_EXCL)
  9556. @@ -364,30 +364,38 @@ int xfs_lock_delays;
  9557. /*
  9558. * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
  9559. - * value. This shouldn't be called for page fault locking, but we also need to
  9560. - * ensure we don't overrun the number of lockdep subclasses for the iolock or
  9561. - * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
  9562. + * value. This can be called for any type of inode lock combination, including
  9563. + * parent locking. Care must be taken to ensure we don't overrun the subclass
  9564. + * storage fields in the class mask we build.
  9565. */
  9566. static inline int
  9567. xfs_lock_inumorder(int lock_mode, int subclass)
  9568. {
  9569. + int class = 0;
  9570. +
  9571. + ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP |
  9572. + XFS_ILOCK_RTSUM)));
  9573. +
  9574. if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
  9575. - ASSERT(subclass + XFS_LOCK_INUMORDER <
  9576. - (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
  9577. - lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
  9578. + ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
  9579. + ASSERT(subclass + XFS_IOLOCK_PARENT_VAL <
  9580. + MAX_LOCKDEP_SUBCLASSES);
  9581. + class += subclass << XFS_IOLOCK_SHIFT;
  9582. + if (lock_mode & XFS_IOLOCK_PARENT)
  9583. + class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT;
  9584. }
  9585. if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
  9586. - ASSERT(subclass + XFS_LOCK_INUMORDER <
  9587. - (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
  9588. - lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
  9589. - XFS_MMAPLOCK_SHIFT;
  9590. + ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
  9591. + class += subclass << XFS_MMAPLOCK_SHIFT;
  9592. }
  9593. - if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
  9594. - lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
  9595. + if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) {
  9596. + ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
  9597. + class += subclass << XFS_ILOCK_SHIFT;
  9598. + }
  9599. - return lock_mode;
  9600. + return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class;
  9601. }
  9602. /*
  9603. @@ -399,6 +407,11 @@ xfs_lock_inumorder(int lock_mode, int subclass)
  9604. * transaction (such as truncate). This can result in deadlock since the long
  9605. * running trans might need to wait for the inode we just locked in order to
  9606. * push the tail and free space in the log.
  9607. + *
  9608. + * xfs_lock_inodes() can only be used to lock one type of lock at a time -
  9609. + * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
  9610. + * lock more than one at a time, lockdep will report false positives saying we
  9611. + * have violated locking orders.
  9612. */
  9613. void
  9614. xfs_lock_inodes(
  9615. @@ -409,8 +422,29 @@ xfs_lock_inodes(
  9616. int attempts = 0, i, j, try_lock;
  9617. xfs_log_item_t *lp;
  9618. - /* currently supports between 2 and 5 inodes */
  9619. + /*
  9620. + * Currently supports between 2 and 5 inodes with exclusive locking. We
  9621. + * support an arbitrary depth of locking here, but absolute limits on
  9622. + * inodes depend on the the type of locking and the limits placed by
  9623. + * lockdep annotations in xfs_lock_inumorder. These are all checked by
  9624. + * the asserts.
  9625. + */
  9626. ASSERT(ips && inodes >= 2 && inodes <= 5);
  9627. + ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL |
  9628. + XFS_ILOCK_EXCL));
  9629. + ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
  9630. + XFS_ILOCK_SHARED)));
  9631. + ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) ||
  9632. + inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1);
  9633. + ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
  9634. + inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
  9635. + ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
  9636. + inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
  9637. +
  9638. + if (lock_mode & XFS_IOLOCK_EXCL) {
  9639. + ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL)));
  9640. + } else if (lock_mode & XFS_MMAPLOCK_EXCL)
  9641. + ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
  9642. try_lock = 0;
  9643. i = 0;
  9644. diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
  9645. index 8f22d20368d8..ee26a603c131 100644
  9646. --- a/fs/xfs/xfs_inode.h
  9647. +++ b/fs/xfs/xfs_inode.h
  9648. @@ -284,9 +284,9 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
  9649. * Flags for lockdep annotations.
  9650. *
  9651. * XFS_LOCK_PARENT - for directory operations that require locking a
  9652. - * parent directory inode and a child entry inode. The parent gets locked
  9653. - * with this flag so it gets a lockdep subclass of 1 and the child entry
  9654. - * lock will have a lockdep subclass of 0.
  9655. + * parent directory inode and a child entry inode. IOLOCK requires nesting,
  9656. + * MMAPLOCK does not support this class, ILOCK requires a single subclass
  9657. + * to differentiate parent from child.
  9658. *
  9659. * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
  9660. * inodes do not participate in the normal lock order, and thus have their
  9661. @@ -295,30 +295,63 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
  9662. * XFS_LOCK_INUMORDER - for locking several inodes at the some time
  9663. * with xfs_lock_inodes(). This flag is used as the starting subclass
  9664. * and each subsequent lock acquired will increment the subclass by one.
  9665. - * So the first lock acquired will have a lockdep subclass of 4, the
  9666. - * second lock will have a lockdep subclass of 5, and so on. It is
  9667. - * the responsibility of the class builder to shift this to the correct
  9668. - * portion of the lock_mode lockdep mask.
  9669. + * However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly
  9670. + * limited to the subclasses we can represent via nesting. We need at least
  9671. + * 5 inodes nest depth for the ILOCK through rename, and we also have to support
  9672. + * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP
  9673. + * and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all
  9674. + * 8 subclasses supported by lockdep.
  9675. + *
  9676. + * This also means we have to number the sub-classes in the lowest bits of
  9677. + * the mask we keep, and we have to ensure we never exceed 3 bits of lockdep
  9678. + * mask and we can't use bit-masking to build the subclasses. What a mess.
  9679. + *
  9680. + * Bit layout:
  9681. + *
  9682. + * Bit Lock Region
  9683. + * 16-19 XFS_IOLOCK_SHIFT dependencies
  9684. + * 20-23 XFS_MMAPLOCK_SHIFT dependencies
  9685. + * 24-31 XFS_ILOCK_SHIFT dependencies
  9686. + *
  9687. + * IOLOCK values
  9688. + *
  9689. + * 0-3 subclass value
  9690. + * 4-7 PARENT subclass values
  9691. + *
  9692. + * MMAPLOCK values
  9693. + *
  9694. + * 0-3 subclass value
  9695. + * 4-7 unused
  9696. + *
  9697. + * ILOCK values
  9698. + * 0-4 subclass values
  9699. + * 5 PARENT subclass (not nestable)
  9700. + * 6 RTBITMAP subclass (not nestable)
  9701. + * 7 RTSUM subclass (not nestable)
  9702. + *
  9703. */
  9704. -#define XFS_LOCK_PARENT 1
  9705. -#define XFS_LOCK_RTBITMAP 2
  9706. -#define XFS_LOCK_RTSUM 3
  9707. -#define XFS_LOCK_INUMORDER 4
  9708. -
  9709. -#define XFS_IOLOCK_SHIFT 16
  9710. -#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
  9711. -
  9712. -#define XFS_MMAPLOCK_SHIFT 20
  9713. -
  9714. -#define XFS_ILOCK_SHIFT 24
  9715. -#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
  9716. -#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
  9717. -#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
  9718. -
  9719. -#define XFS_IOLOCK_DEP_MASK 0x000f0000
  9720. -#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
  9721. -#define XFS_ILOCK_DEP_MASK 0xff000000
  9722. -#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
  9723. +#define XFS_IOLOCK_SHIFT 16
  9724. +#define XFS_IOLOCK_PARENT_VAL 4
  9725. +#define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1)
  9726. +#define XFS_IOLOCK_DEP_MASK 0x000f0000
  9727. +#define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT)
  9728. +
  9729. +#define XFS_MMAPLOCK_SHIFT 20
  9730. +#define XFS_MMAPLOCK_NUMORDER 0
  9731. +#define XFS_MMAPLOCK_MAX_SUBCLASS 3
  9732. +#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
  9733. +
  9734. +#define XFS_ILOCK_SHIFT 24
  9735. +#define XFS_ILOCK_PARENT_VAL 5
  9736. +#define XFS_ILOCK_MAX_SUBCLASS (XFS_ILOCK_PARENT_VAL - 1)
  9737. +#define XFS_ILOCK_RTBITMAP_VAL 6
  9738. +#define XFS_ILOCK_RTSUM_VAL 7
  9739. +#define XFS_ILOCK_DEP_MASK 0xff000000
  9740. +#define XFS_ILOCK_PARENT (XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT)
  9741. +#define XFS_ILOCK_RTBITMAP (XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT)
  9742. +#define XFS_ILOCK_RTSUM (XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT)
  9743. +
  9744. +#define XFS_LOCK_SUBCLASS_MASK (XFS_IOLOCK_DEP_MASK | \
  9745. XFS_MMAPLOCK_DEP_MASK | \
  9746. XFS_ILOCK_DEP_MASK)
  9747. diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
  9748. index 74ba46c8157a..ccde2a9ca7b7 100644
  9749. --- a/include/acpi/platform/aclinux.h
  9750. +++ b/include/acpi/platform/aclinux.h
  9751. @@ -123,6 +123,7 @@
  9752. #define acpi_cache_t struct kmem_cache
  9753. #define acpi_spinlock spinlock_t *
  9754. +#define acpi_raw_spinlock raw_spinlock_t *
  9755. #define acpi_cpu_flags unsigned long
  9756. /* Use native linux version of acpi_os_allocate_zeroed */
  9757. @@ -141,6 +142,20 @@
  9758. #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id
  9759. #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock
  9760. +#define acpi_os_create_raw_lock(__handle) \
  9761. +({ \
  9762. + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \
  9763. + \
  9764. + if (lock) { \
  9765. + *(__handle) = lock; \
  9766. + raw_spin_lock_init(*(__handle)); \
  9767. + } \
  9768. + lock ? AE_OK : AE_NO_MEMORY; \
  9769. + })
  9770. +
  9771. +#define acpi_os_delete_raw_lock(__handle) kfree(__handle)
  9772. +
  9773. +
  9774. /*
  9775. * OSL interfaces used by debugger/disassembler
  9776. */
  9777. diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
  9778. index 630dd2372238..850e4d993a88 100644
  9779. --- a/include/asm-generic/bug.h
  9780. +++ b/include/asm-generic/bug.h
  9781. @@ -206,6 +206,20 @@ extern void warn_slowpath_null(const char *file, const int line);
  9782. # define WARN_ON_SMP(x) ({0;})
  9783. #endif
  9784. +#ifdef CONFIG_PREEMPT_RT_BASE
  9785. +# define BUG_ON_RT(c) BUG_ON(c)
  9786. +# define BUG_ON_NONRT(c) do { } while (0)
  9787. +# define WARN_ON_RT(condition) WARN_ON(condition)
  9788. +# define WARN_ON_NONRT(condition) do { } while (0)
  9789. +# define WARN_ON_ONCE_NONRT(condition) do { } while (0)
  9790. +#else
  9791. +# define BUG_ON_RT(c) do { } while (0)
  9792. +# define BUG_ON_NONRT(c) BUG_ON(c)
  9793. +# define WARN_ON_RT(condition) do { } while (0)
  9794. +# define WARN_ON_NONRT(condition) WARN_ON(condition)
  9795. +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition)
  9796. +#endif
  9797. +
  9798. #endif /* __ASSEMBLY__ */
  9799. #endif
  9800. diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
  9801. index b59b5a52637e..e56272c919b5 100644
  9802. --- a/include/asm-generic/futex.h
  9803. +++ b/include/asm-generic/futex.h
  9804. @@ -8,8 +8,7 @@
  9805. #ifndef CONFIG_SMP
  9806. /*
  9807. * The following implementation only for uniprocessor machines.
  9808. - * For UP, it's relies on the fact that pagefault_disable() also disables
  9809. - * preemption to ensure mutual exclusion.
  9810. + * It relies on preempt_disable() ensuring mutual exclusion.
  9811. *
  9812. */
  9813. @@ -38,6 +37,7 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
  9814. if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
  9815. oparg = 1 << oparg;
  9816. + preempt_disable();
  9817. pagefault_disable();
  9818. ret = -EFAULT;
  9819. @@ -72,6 +72,7 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
  9820. out_pagefault_enable:
  9821. pagefault_enable();
  9822. + preempt_enable();
  9823. if (ret == 0) {
  9824. switch (cmp) {
  9825. @@ -106,6 +107,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
  9826. {
  9827. u32 val;
  9828. + preempt_disable();
  9829. if (unlikely(get_user(val, uaddr) != 0))
  9830. return -EFAULT;
  9831. @@ -113,6 +115,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
  9832. return -EFAULT;
  9833. *uval = val;
  9834. + preempt_enable();
  9835. return 0;
  9836. }
  9837. diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
  9838. index b6a53e8e526a..c91d3d764c36 100644
  9839. --- a/include/asm-generic/preempt.h
  9840. +++ b/include/asm-generic/preempt.h
  9841. @@ -7,10 +7,10 @@
  9842. static __always_inline int preempt_count(void)
  9843. {
  9844. - return current_thread_info()->preempt_count;
  9845. + return READ_ONCE(current_thread_info()->preempt_count);
  9846. }
  9847. -static __always_inline int *preempt_count_ptr(void)
  9848. +static __always_inline volatile int *preempt_count_ptr(void)
  9849. {
  9850. return &current_thread_info()->preempt_count;
  9851. }
  9852. diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
  9853. index 2056a99b92f8..e6ff990c9f11 100644
  9854. --- a/include/linux/blk-mq.h
  9855. +++ b/include/linux/blk-mq.h
  9856. @@ -202,6 +202,7 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
  9857. struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
  9858. struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
  9859. +void __blk_mq_complete_request_remote_work(struct work_struct *work);
  9860. int blk_mq_request_started(struct request *rq);
  9861. void blk_mq_start_request(struct request *rq);
  9862. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
  9863. index 5d93a6645e88..37faf63af7f7 100644
  9864. --- a/include/linux/blkdev.h
  9865. +++ b/include/linux/blkdev.h
  9866. @@ -101,6 +101,7 @@ struct request {
  9867. struct list_head queuelist;
  9868. union {
  9869. struct call_single_data csd;
  9870. + struct work_struct work;
  9871. unsigned long fifo_time;
  9872. };
  9873. @@ -482,7 +483,7 @@ struct request_queue {
  9874. struct throtl_data *td;
  9875. #endif
  9876. struct rcu_head rcu_head;
  9877. - wait_queue_head_t mq_freeze_wq;
  9878. + struct swait_head mq_freeze_wq;
  9879. struct percpu_ref mq_usage_counter;
  9880. struct list_head all_q_node;
  9881. diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h
  9882. index 86c12c93e3cf..d3c0c02acc97 100644
  9883. --- a/include/linux/bottom_half.h
  9884. +++ b/include/linux/bottom_half.h
  9885. @@ -4,6 +4,39 @@
  9886. #include <linux/preempt.h>
  9887. #include <linux/preempt_mask.h>
  9888. +#ifdef CONFIG_PREEMPT_RT_FULL
  9889. +
  9890. +extern void __local_bh_disable(void);
  9891. +extern void _local_bh_enable(void);
  9892. +extern void __local_bh_enable(void);
  9893. +
  9894. +static inline void local_bh_disable(void)
  9895. +{
  9896. + __local_bh_disable();
  9897. +}
  9898. +
  9899. +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
  9900. +{
  9901. + __local_bh_disable();
  9902. +}
  9903. +
  9904. +static inline void local_bh_enable(void)
  9905. +{
  9906. + __local_bh_enable();
  9907. +}
  9908. +
  9909. +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
  9910. +{
  9911. + __local_bh_enable();
  9912. +}
  9913. +
  9914. +static inline void local_bh_enable_ip(unsigned long ip)
  9915. +{
  9916. + __local_bh_enable();
  9917. +}
  9918. +
  9919. +#else
  9920. +
  9921. #ifdef CONFIG_TRACE_IRQFLAGS
  9922. extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
  9923. #else
  9924. @@ -31,5 +64,6 @@ static inline void local_bh_enable(void)
  9925. {
  9926. __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
  9927. }
  9928. +#endif
  9929. #endif /* _LINUX_BH_H */
  9930. diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
  9931. index e6797ded700e..6d25afd8b847 100644
  9932. --- a/include/linux/buffer_head.h
  9933. +++ b/include/linux/buffer_head.h
  9934. @@ -75,8 +75,52 @@ struct buffer_head {
  9935. struct address_space *b_assoc_map; /* mapping this buffer is
  9936. associated with */
  9937. atomic_t b_count; /* users using this buffer_head */
  9938. +#ifdef CONFIG_PREEMPT_RT_BASE
  9939. + spinlock_t b_uptodate_lock;
  9940. +#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
  9941. + defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
  9942. + spinlock_t b_state_lock;
  9943. + spinlock_t b_journal_head_lock;
  9944. +#endif
  9945. +#endif
  9946. };
  9947. +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
  9948. +{
  9949. + unsigned long flags;
  9950. +
  9951. +#ifndef CONFIG_PREEMPT_RT_BASE
  9952. + local_irq_save(flags);
  9953. + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
  9954. +#else
  9955. + spin_lock_irqsave(&bh->b_uptodate_lock, flags);
  9956. +#endif
  9957. + return flags;
  9958. +}
  9959. +
  9960. +static inline void
  9961. +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
  9962. +{
  9963. +#ifndef CONFIG_PREEMPT_RT_BASE
  9964. + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
  9965. + local_irq_restore(flags);
  9966. +#else
  9967. + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
  9968. +#endif
  9969. +}
  9970. +
  9971. +static inline void buffer_head_init_locks(struct buffer_head *bh)
  9972. +{
  9973. +#ifdef CONFIG_PREEMPT_RT_BASE
  9974. + spin_lock_init(&bh->b_uptodate_lock);
  9975. +#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
  9976. + defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
  9977. + spin_lock_init(&bh->b_state_lock);
  9978. + spin_lock_init(&bh->b_journal_head_lock);
  9979. +#endif
  9980. +#endif
  9981. +}
  9982. +
  9983. /*
  9984. * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
  9985. * and buffer_foo() functions.
  9986. diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
  9987. index 8d9c7e7a6432..3fd10743e452 100644
  9988. --- a/include/linux/cgroup-defs.h
  9989. +++ b/include/linux/cgroup-defs.h
  9990. @@ -124,6 +124,7 @@ struct cgroup_subsys_state {
  9991. /* percpu_ref killing and RCU release */
  9992. struct rcu_head rcu_head;
  9993. struct work_struct destroy_work;
  9994. + struct swork_event destroy_swork;
  9995. };
  9996. /*
  9997. diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
  9998. index 96a2ecd5aa69..7f08633d839a 100644
  9999. --- a/include/linux/cgroup.h
  10000. +++ b/include/linux/cgroup.h
  10001. @@ -17,6 +17,8 @@
  10002. #include <linux/fs.h>
  10003. #include <linux/seq_file.h>
  10004. #include <linux/kernfs.h>
  10005. +#include <linux/wait.h>
  10006. +#include <linux/work-simple.h>
  10007. #include <linux/cgroup-defs.h>
  10008. diff --git a/include/linux/completion.h b/include/linux/completion.h
  10009. index 5d5aaae3af43..3fe8d14c98c0 100644
  10010. --- a/include/linux/completion.h
  10011. +++ b/include/linux/completion.h
  10012. @@ -7,8 +7,7 @@
  10013. * Atomic wait-for-completion handler data structures.
  10014. * See kernel/sched/completion.c for details.
  10015. */
  10016. -
  10017. -#include <linux/wait.h>
  10018. +#include <linux/wait-simple.h>
  10019. /*
  10020. * struct completion - structure used to maintain state for a "completion"
  10021. @@ -24,11 +23,11 @@
  10022. */
  10023. struct completion {
  10024. unsigned int done;
  10025. - wait_queue_head_t wait;
  10026. + struct swait_head wait;
  10027. };
  10028. #define COMPLETION_INITIALIZER(work) \
  10029. - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
  10030. + { 0, SWAIT_HEAD_INITIALIZER((work).wait) }
  10031. #define COMPLETION_INITIALIZER_ONSTACK(work) \
  10032. ({ init_completion(&work); work; })
  10033. @@ -73,7 +72,7 @@ struct completion {
  10034. static inline void init_completion(struct completion *x)
  10035. {
  10036. x->done = 0;
  10037. - init_waitqueue_head(&x->wait);
  10038. + init_swait_head(&x->wait);
  10039. }
  10040. /**
  10041. diff --git a/include/linux/cpu.h b/include/linux/cpu.h
  10042. index c0fb6b1b4712..2a22c7c729bc 100644
  10043. --- a/include/linux/cpu.h
  10044. +++ b/include/linux/cpu.h
  10045. @@ -231,6 +231,8 @@ extern bool try_get_online_cpus(void);
  10046. extern void put_online_cpus(void);
  10047. extern void cpu_hotplug_disable(void);
  10048. extern void cpu_hotplug_enable(void);
  10049. +extern void pin_current_cpu(void);
  10050. +extern void unpin_current_cpu(void);
  10051. #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
  10052. #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri)
  10053. #define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
  10054. @@ -249,6 +251,8 @@ static inline void cpu_hotplug_done(void) {}
  10055. #define put_online_cpus() do { } while (0)
  10056. #define cpu_hotplug_disable() do { } while (0)
  10057. #define cpu_hotplug_enable() do { } while (0)
  10058. +static inline void pin_current_cpu(void) { }
  10059. +static inline void unpin_current_cpu(void) { }
  10060. #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
  10061. #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
  10062. /* These aren't inline functions due to a GCC bug. */
  10063. diff --git a/include/linux/delay.h b/include/linux/delay.h
  10064. index a6ecb34cf547..37caab306336 100644
  10065. --- a/include/linux/delay.h
  10066. +++ b/include/linux/delay.h
  10067. @@ -52,4 +52,10 @@ static inline void ssleep(unsigned int seconds)
  10068. msleep(seconds * 1000);
  10069. }
  10070. +#ifdef CONFIG_PREEMPT_RT_FULL
  10071. +extern void cpu_chill(void);
  10072. +#else
  10073. +# define cpu_chill() cpu_relax()
  10074. +#endif
  10075. +
  10076. #endif /* defined(_LINUX_DELAY_H) */
  10077. diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
  10078. index 6cd8c0ee4b6f..1ec37fef6355 100644
  10079. --- a/include/linux/ftrace.h
  10080. +++ b/include/linux/ftrace.h
  10081. @@ -682,6 +682,18 @@ static inline void __ftrace_enabled_restore(int enabled)
  10082. #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
  10083. #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
  10084. +static inline unsigned long get_lock_parent_ip(void)
  10085. +{
  10086. + unsigned long addr = CALLER_ADDR0;
  10087. +
  10088. + if (!in_lock_functions(addr))
  10089. + return addr;
  10090. + addr = CALLER_ADDR1;
  10091. + if (!in_lock_functions(addr))
  10092. + return addr;
  10093. + return CALLER_ADDR2;
  10094. +}
  10095. +
  10096. #ifdef CONFIG_IRQSOFF_TRACER
  10097. extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
  10098. extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
  10099. diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
  10100. index f9ecf63d47f1..2ef42aa7e484 100644
  10101. --- a/include/linux/ftrace_event.h
  10102. +++ b/include/linux/ftrace_event.h
  10103. @@ -66,6 +66,9 @@ struct trace_entry {
  10104. unsigned char flags;
  10105. unsigned char preempt_count;
  10106. int pid;
  10107. + unsigned short migrate_disable;
  10108. + unsigned short padding;
  10109. + unsigned char preempt_lazy_count;
  10110. };
  10111. #define FTRACE_MAX_EVENT \
  10112. diff --git a/include/linux/highmem.h b/include/linux/highmem.h
  10113. index 9286a46b7d69..06bae5a6761d 100644
  10114. --- a/include/linux/highmem.h
  10115. +++ b/include/linux/highmem.h
  10116. @@ -7,6 +7,7 @@
  10117. #include <linux/mm.h>
  10118. #include <linux/uaccess.h>
  10119. #include <linux/hardirq.h>
  10120. +#include <linux/sched.h>
  10121. #include <asm/cacheflush.h>
  10122. @@ -65,6 +66,7 @@ static inline void kunmap(struct page *page)
  10123. static inline void *kmap_atomic(struct page *page)
  10124. {
  10125. + preempt_disable_nort();
  10126. pagefault_disable();
  10127. return page_address(page);
  10128. }
  10129. @@ -73,6 +75,7 @@ static inline void *kmap_atomic(struct page *page)
  10130. static inline void __kunmap_atomic(void *addr)
  10131. {
  10132. pagefault_enable();
  10133. + preempt_enable_nort();
  10134. }
  10135. #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn))
  10136. @@ -85,32 +88,51 @@ static inline void __kunmap_atomic(void *addr)
  10137. #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
  10138. +#ifndef CONFIG_PREEMPT_RT_FULL
  10139. DECLARE_PER_CPU(int, __kmap_atomic_idx);
  10140. +#endif
  10141. static inline int kmap_atomic_idx_push(void)
  10142. {
  10143. +#ifndef CONFIG_PREEMPT_RT_FULL
  10144. int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
  10145. -#ifdef CONFIG_DEBUG_HIGHMEM
  10146. +# ifdef CONFIG_DEBUG_HIGHMEM
  10147. WARN_ON_ONCE(in_irq() && !irqs_disabled());
  10148. BUG_ON(idx >= KM_TYPE_NR);
  10149. -#endif
  10150. +# endif
  10151. return idx;
  10152. +#else
  10153. + current->kmap_idx++;
  10154. + BUG_ON(current->kmap_idx > KM_TYPE_NR);
  10155. + return current->kmap_idx - 1;
  10156. +#endif
  10157. }
  10158. static inline int kmap_atomic_idx(void)
  10159. {
  10160. +#ifndef CONFIG_PREEMPT_RT_FULL
  10161. return __this_cpu_read(__kmap_atomic_idx) - 1;
  10162. +#else
  10163. + return current->kmap_idx - 1;
  10164. +#endif
  10165. }
  10166. static inline void kmap_atomic_idx_pop(void)
  10167. {
  10168. -#ifdef CONFIG_DEBUG_HIGHMEM
  10169. +#ifndef CONFIG_PREEMPT_RT_FULL
  10170. +# ifdef CONFIG_DEBUG_HIGHMEM
  10171. int idx = __this_cpu_dec_return(__kmap_atomic_idx);
  10172. BUG_ON(idx < 0);
  10173. -#else
  10174. +# else
  10175. __this_cpu_dec(__kmap_atomic_idx);
  10176. +# endif
  10177. +#else
  10178. + current->kmap_idx--;
  10179. +# ifdef CONFIG_DEBUG_HIGHMEM
  10180. + BUG_ON(current->kmap_idx < 0);
  10181. +# endif
  10182. #endif
  10183. }
  10184. diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
  10185. index 05f6df1fdf5b..64e1abb3715b 100644
  10186. --- a/include/linux/hrtimer.h
  10187. +++ b/include/linux/hrtimer.h
  10188. @@ -111,6 +111,11 @@ struct hrtimer {
  10189. enum hrtimer_restart (*function)(struct hrtimer *);
  10190. struct hrtimer_clock_base *base;
  10191. unsigned long state;
  10192. + struct list_head cb_entry;
  10193. + int irqsafe;
  10194. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  10195. + ktime_t praecox;
  10196. +#endif
  10197. #ifdef CONFIG_TIMER_STATS
  10198. int start_pid;
  10199. void *start_site;
  10200. @@ -147,6 +152,7 @@ struct hrtimer_clock_base {
  10201. int index;
  10202. clockid_t clockid;
  10203. struct timerqueue_head active;
  10204. + struct list_head expired;
  10205. ktime_t resolution;
  10206. ktime_t (*get_time)(void);
  10207. ktime_t softirq_time;
  10208. @@ -194,6 +200,9 @@ struct hrtimer_cpu_base {
  10209. unsigned long nr_hangs;
  10210. ktime_t max_hang_time;
  10211. #endif
  10212. +#ifdef CONFIG_PREEMPT_RT_BASE
  10213. + wait_queue_head_t wait;
  10214. +#endif
  10215. struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
  10216. };
  10217. @@ -381,6 +390,13 @@ static inline int hrtimer_restart(struct hrtimer *timer)
  10218. return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
  10219. }
  10220. +/* Softirq preemption could deadlock timer removal */
  10221. +#ifdef CONFIG_PREEMPT_RT_BASE
  10222. + extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
  10223. +#else
  10224. +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
  10225. +#endif
  10226. +
  10227. /* Query timers: */
  10228. extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
  10229. extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
  10230. diff --git a/include/linux/idr.h b/include/linux/idr.h
  10231. index 013fd9bc4cb6..f62be0aec911 100644
  10232. --- a/include/linux/idr.h
  10233. +++ b/include/linux/idr.h
  10234. @@ -95,10 +95,14 @@ bool idr_is_empty(struct idr *idp);
  10235. * Each idr_preload() should be matched with an invocation of this
  10236. * function. See idr_preload() for details.
  10237. */
  10238. +#ifdef CONFIG_PREEMPT_RT_FULL
  10239. +void idr_preload_end(void);
  10240. +#else
  10241. static inline void idr_preload_end(void)
  10242. {
  10243. preempt_enable();
  10244. }
  10245. +#endif
  10246. /**
  10247. * idr_find - return pointer for given id
  10248. diff --git a/include/linux/init_task.h b/include/linux/init_task.h
  10249. index 696d22312b31..4a77d39ff7dd 100644
  10250. --- a/include/linux/init_task.h
  10251. +++ b/include/linux/init_task.h
  10252. @@ -147,9 +147,16 @@ extern struct task_group root_task_group;
  10253. # define INIT_PERF_EVENTS(tsk)
  10254. #endif
  10255. +#ifdef CONFIG_PREEMPT_RT_BASE
  10256. +# define INIT_TIMER_LIST .posix_timer_list = NULL,
  10257. +#else
  10258. +# define INIT_TIMER_LIST
  10259. +#endif
  10260. +
  10261. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  10262. # define INIT_VTIME(tsk) \
  10263. - .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
  10264. + .vtime_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.vtime_lock), \
  10265. + .vtime_seq = SEQCNT_ZERO(tsk.vtime_seq), \
  10266. .vtime_snap = 0, \
  10267. .vtime_snap_whence = VTIME_SYS,
  10268. #else
  10269. @@ -238,6 +245,7 @@ extern struct task_group root_task_group;
  10270. .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
  10271. .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
  10272. .timer_slack_ns = 50000, /* 50 usec default slack */ \
  10273. + INIT_TIMER_LIST \
  10274. .pids = { \
  10275. [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
  10276. [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
  10277. diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
  10278. index 950ae4501826..d11fd0a440ff 100644
  10279. --- a/include/linux/interrupt.h
  10280. +++ b/include/linux/interrupt.h
  10281. @@ -61,6 +61,7 @@
  10282. * interrupt handler after suspending interrupts. For system
  10283. * wakeup devices users need to implement wakeup detection in
  10284. * their interrupt handlers.
  10285. + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT)
  10286. */
  10287. #define IRQF_SHARED 0x00000080
  10288. #define IRQF_PROBE_SHARED 0x00000100
  10289. @@ -74,6 +75,7 @@
  10290. #define IRQF_NO_THREAD 0x00010000
  10291. #define IRQF_EARLY_RESUME 0x00020000
  10292. #define IRQF_COND_SUSPEND 0x00040000
  10293. +#define IRQF_NO_SOFTIRQ_CALL 0x00080000
  10294. #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
  10295. @@ -102,6 +104,7 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
  10296. * @flags: flags (see IRQF_* above)
  10297. * @thread_fn: interrupt handler function for threaded interrupts
  10298. * @thread: thread pointer for threaded interrupts
  10299. + * @secondary: pointer to secondary irqaction (force threading)
  10300. * @thread_flags: flags related to @thread
  10301. * @thread_mask: bitmask for keeping track of @thread activity
  10302. * @dir: pointer to the proc/irq/NN/name entry
  10303. @@ -113,6 +116,7 @@ struct irqaction {
  10304. struct irqaction *next;
  10305. irq_handler_t thread_fn;
  10306. struct task_struct *thread;
  10307. + struct irqaction *secondary;
  10308. unsigned int irq;
  10309. unsigned int flags;
  10310. unsigned long thread_flags;
  10311. @@ -184,7 +188,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
  10312. #ifdef CONFIG_LOCKDEP
  10313. # define local_irq_enable_in_hardirq() do { } while (0)
  10314. #else
  10315. -# define local_irq_enable_in_hardirq() local_irq_enable()
  10316. +# define local_irq_enable_in_hardirq() local_irq_enable_nort()
  10317. #endif
  10318. extern void disable_irq_nosync(unsigned int irq);
  10319. @@ -215,6 +219,7 @@ struct irq_affinity_notify {
  10320. unsigned int irq;
  10321. struct kref kref;
  10322. struct work_struct work;
  10323. + struct list_head list;
  10324. void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
  10325. void (*release)(struct kref *ref);
  10326. };
  10327. @@ -377,9 +382,13 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
  10328. bool state);
  10329. #ifdef CONFIG_IRQ_FORCED_THREADING
  10330. +# ifndef CONFIG_PREEMPT_RT_BASE
  10331. extern bool force_irqthreads;
  10332. +# else
  10333. +# define force_irqthreads (true)
  10334. +# endif
  10335. #else
  10336. -#define force_irqthreads (0)
  10337. +#define force_irqthreads (false)
  10338. #endif
  10339. #ifndef __ARCH_SET_SOFTIRQ_PENDING
  10340. @@ -435,9 +444,10 @@ struct softirq_action
  10341. void (*action)(struct softirq_action *);
  10342. };
  10343. +#ifndef CONFIG_PREEMPT_RT_FULL
  10344. asmlinkage void do_softirq(void);
  10345. asmlinkage void __do_softirq(void);
  10346. -
  10347. +static inline void thread_do_softirq(void) { do_softirq(); }
  10348. #ifdef __ARCH_HAS_DO_SOFTIRQ
  10349. void do_softirq_own_stack(void);
  10350. #else
  10351. @@ -446,13 +456,25 @@ static inline void do_softirq_own_stack(void)
  10352. __do_softirq();
  10353. }
  10354. #endif
  10355. +#else
  10356. +extern void thread_do_softirq(void);
  10357. +#endif
  10358. extern void open_softirq(int nr, void (*action)(struct softirq_action *));
  10359. extern void softirq_init(void);
  10360. extern void __raise_softirq_irqoff(unsigned int nr);
  10361. +#ifdef CONFIG_PREEMPT_RT_FULL
  10362. +extern void __raise_softirq_irqoff_ksoft(unsigned int nr);
  10363. +#else
  10364. +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr)
  10365. +{
  10366. + __raise_softirq_irqoff(nr);
  10367. +}
  10368. +#endif
  10369. extern void raise_softirq_irqoff(unsigned int nr);
  10370. extern void raise_softirq(unsigned int nr);
  10371. +extern void softirq_check_pending_idle(void);
  10372. DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
  10373. @@ -474,8 +496,9 @@ static inline struct task_struct *this_cpu_ksoftirqd(void)
  10374. to be executed on some cpu at least once after this.
  10375. * If the tasklet is already scheduled, but its execution is still not
  10376. started, it will be executed only once.
  10377. - * If this tasklet is already running on another CPU (or schedule is called
  10378. - from tasklet itself), it is rescheduled for later.
  10379. + * If this tasklet is already running on another CPU, it is rescheduled
  10380. + for later.
  10381. + * Schedule must not be called from the tasklet itself (a lockup occurs)
  10382. * Tasklet is strictly serialized wrt itself, but not
  10383. wrt another tasklets. If client needs some intertask synchronization,
  10384. he makes it with spinlocks.
  10385. @@ -500,27 +523,36 @@ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data }
  10386. enum
  10387. {
  10388. TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */
  10389. - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */
  10390. + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */
  10391. + TASKLET_STATE_PENDING /* Tasklet is pending */
  10392. };
  10393. -#ifdef CONFIG_SMP
  10394. +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED)
  10395. +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN)
  10396. +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING)
  10397. +
  10398. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  10399. static inline int tasklet_trylock(struct tasklet_struct *t)
  10400. {
  10401. return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
  10402. }
  10403. +static inline int tasklet_tryunlock(struct tasklet_struct *t)
  10404. +{
  10405. + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN;
  10406. +}
  10407. +
  10408. static inline void tasklet_unlock(struct tasklet_struct *t)
  10409. {
  10410. smp_mb__before_atomic();
  10411. clear_bit(TASKLET_STATE_RUN, &(t)->state);
  10412. }
  10413. -static inline void tasklet_unlock_wait(struct tasklet_struct *t)
  10414. -{
  10415. - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
  10416. -}
  10417. +extern void tasklet_unlock_wait(struct tasklet_struct *t);
  10418. +
  10419. #else
  10420. #define tasklet_trylock(t) 1
  10421. +#define tasklet_tryunlock(t) 1
  10422. #define tasklet_unlock_wait(t) do { } while (0)
  10423. #define tasklet_unlock(t) do { } while (0)
  10424. #endif
  10425. @@ -569,12 +601,7 @@ static inline void tasklet_disable(struct tasklet_struct *t)
  10426. smp_mb();
  10427. }
  10428. -static inline void tasklet_enable(struct tasklet_struct *t)
  10429. -{
  10430. - smp_mb__before_atomic();
  10431. - atomic_dec(&t->count);
  10432. -}
  10433. -
  10434. +extern void tasklet_enable(struct tasklet_struct *t);
  10435. extern void tasklet_kill(struct tasklet_struct *t);
  10436. extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
  10437. extern void tasklet_init(struct tasklet_struct *t,
  10438. @@ -605,6 +632,12 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer)
  10439. tasklet_kill(&ttimer->tasklet);
  10440. }
  10441. +#ifdef CONFIG_PREEMPT_RT_FULL
  10442. +extern void softirq_early_init(void);
  10443. +#else
  10444. +static inline void softirq_early_init(void) { }
  10445. +#endif
  10446. +
  10447. /*
  10448. * Autoprobing for irqs:
  10449. *
  10450. diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
  10451. index 657fab4efab3..c27dde7215b5 100644
  10452. --- a/include/linux/io-mapping.h
  10453. +++ b/include/linux/io-mapping.h
  10454. @@ -141,6 +141,7 @@ static inline void __iomem *
  10455. io_mapping_map_atomic_wc(struct io_mapping *mapping,
  10456. unsigned long offset)
  10457. {
  10458. + preempt_disable();
  10459. pagefault_disable();
  10460. return ((char __force __iomem *) mapping) + offset;
  10461. }
  10462. @@ -149,6 +150,7 @@ static inline void
  10463. io_mapping_unmap_atomic(void __iomem *vaddr)
  10464. {
  10465. pagefault_enable();
  10466. + preempt_enable();
  10467. }
  10468. /* Non-atomic map/unmap */
  10469. diff --git a/include/linux/irq.h b/include/linux/irq.h
  10470. index 33475a37f1bb..00e8834d2e02 100644
  10471. --- a/include/linux/irq.h
  10472. +++ b/include/linux/irq.h
  10473. @@ -72,6 +72,7 @@ enum irqchip_irq_state;
  10474. * IRQ_IS_POLLED - Always polled by another interrupt. Exclude
  10475. * it from the spurious interrupt detection
  10476. * mechanism and from core side polling.
  10477. + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT)
  10478. */
  10479. enum {
  10480. IRQ_TYPE_NONE = 0x00000000,
  10481. @@ -97,13 +98,14 @@ enum {
  10482. IRQ_NOTHREAD = (1 << 16),
  10483. IRQ_PER_CPU_DEVID = (1 << 17),
  10484. IRQ_IS_POLLED = (1 << 18),
  10485. + IRQ_NO_SOFTIRQ_CALL = (1 << 19),
  10486. };
  10487. #define IRQF_MODIFY_MASK \
  10488. (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
  10489. IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
  10490. IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
  10491. - IRQ_IS_POLLED)
  10492. + IRQ_IS_POLLED | IRQ_NO_SOFTIRQ_CALL)
  10493. #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
  10494. diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
  10495. index 47b9ebd4a74f..2543aab05daa 100644
  10496. --- a/include/linux/irq_work.h
  10497. +++ b/include/linux/irq_work.h
  10498. @@ -16,6 +16,7 @@
  10499. #define IRQ_WORK_BUSY 2UL
  10500. #define IRQ_WORK_FLAGS 3UL
  10501. #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */
  10502. +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */
  10503. struct irq_work {
  10504. unsigned long flags;
  10505. @@ -51,4 +52,10 @@ static inline bool irq_work_needs_cpu(void) { return false; }
  10506. static inline void irq_work_run(void) { }
  10507. #endif
  10508. +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
  10509. +void irq_work_tick_soft(void);
  10510. +#else
  10511. +static inline void irq_work_tick_soft(void) { }
  10512. +#endif
  10513. +
  10514. #endif /* _LINUX_IRQ_WORK_H */
  10515. diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
  10516. index dd1109fb241e..9d97cd5bb7c7 100644
  10517. --- a/include/linux/irqdesc.h
  10518. +++ b/include/linux/irqdesc.h
  10519. @@ -63,6 +63,7 @@ struct irq_desc {
  10520. unsigned int irqs_unhandled;
  10521. atomic_t threads_handled;
  10522. int threads_handled_last;
  10523. + u64 random_ip;
  10524. raw_spinlock_t lock;
  10525. struct cpumask *percpu_enabled;
  10526. #ifdef CONFIG_SMP
  10527. diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
  10528. index 5dd1272d1ab2..9b77034f7c5e 100644
  10529. --- a/include/linux/irqflags.h
  10530. +++ b/include/linux/irqflags.h
  10531. @@ -25,8 +25,6 @@
  10532. # define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
  10533. # define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
  10534. # define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
  10535. -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
  10536. -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
  10537. # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
  10538. #else
  10539. # define trace_hardirqs_on() do { } while (0)
  10540. @@ -39,9 +37,15 @@
  10541. # define trace_softirqs_enabled(p) 0
  10542. # define trace_hardirq_enter() do { } while (0)
  10543. # define trace_hardirq_exit() do { } while (0)
  10544. +# define INIT_TRACE_IRQFLAGS
  10545. +#endif
  10546. +
  10547. +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL)
  10548. +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
  10549. +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
  10550. +#else
  10551. # define lockdep_softirq_enter() do { } while (0)
  10552. # define lockdep_softirq_exit() do { } while (0)
  10553. -# define INIT_TRACE_IRQFLAGS
  10554. #endif
  10555. #if defined(CONFIG_IRQSOFF_TRACER) || \
  10556. @@ -148,4 +152,23 @@
  10557. #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
  10558. +/*
  10559. + * local_irq* variants depending on RT/!RT
  10560. + */
  10561. +#ifdef CONFIG_PREEMPT_RT_FULL
  10562. +# define local_irq_disable_nort() do { } while (0)
  10563. +# define local_irq_enable_nort() do { } while (0)
  10564. +# define local_irq_save_nort(flags) local_save_flags(flags)
  10565. +# define local_irq_restore_nort(flags) (void)(flags)
  10566. +# define local_irq_disable_rt() local_irq_disable()
  10567. +# define local_irq_enable_rt() local_irq_enable()
  10568. +#else
  10569. +# define local_irq_disable_nort() local_irq_disable()
  10570. +# define local_irq_enable_nort() local_irq_enable()
  10571. +# define local_irq_save_nort(flags) local_irq_save(flags)
  10572. +# define local_irq_restore_nort(flags) local_irq_restore(flags)
  10573. +# define local_irq_disable_rt() do { } while (0)
  10574. +# define local_irq_enable_rt() do { } while (0)
  10575. +#endif
  10576. +
  10577. #endif
  10578. diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h
  10579. index 3dc53432355f..a90a6f5ca899 100644
  10580. --- a/include/linux/jbd_common.h
  10581. +++ b/include/linux/jbd_common.h
  10582. @@ -15,32 +15,56 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh)
  10583. static inline void jbd_lock_bh_state(struct buffer_head *bh)
  10584. {
  10585. +#ifndef CONFIG_PREEMPT_RT_BASE
  10586. bit_spin_lock(BH_State, &bh->b_state);
  10587. +#else
  10588. + spin_lock(&bh->b_state_lock);
  10589. +#endif
  10590. }
  10591. static inline int jbd_trylock_bh_state(struct buffer_head *bh)
  10592. {
  10593. +#ifndef CONFIG_PREEMPT_RT_BASE
  10594. return bit_spin_trylock(BH_State, &bh->b_state);
  10595. +#else
  10596. + return spin_trylock(&bh->b_state_lock);
  10597. +#endif
  10598. }
  10599. static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
  10600. {
  10601. +#ifndef CONFIG_PREEMPT_RT_BASE
  10602. return bit_spin_is_locked(BH_State, &bh->b_state);
  10603. +#else
  10604. + return spin_is_locked(&bh->b_state_lock);
  10605. +#endif
  10606. }
  10607. static inline void jbd_unlock_bh_state(struct buffer_head *bh)
  10608. {
  10609. +#ifndef CONFIG_PREEMPT_RT_BASE
  10610. bit_spin_unlock(BH_State, &bh->b_state);
  10611. +#else
  10612. + spin_unlock(&bh->b_state_lock);
  10613. +#endif
  10614. }
  10615. static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
  10616. {
  10617. +#ifndef CONFIG_PREEMPT_RT_BASE
  10618. bit_spin_lock(BH_JournalHead, &bh->b_state);
  10619. +#else
  10620. + spin_lock(&bh->b_journal_head_lock);
  10621. +#endif
  10622. }
  10623. static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
  10624. {
  10625. +#ifndef CONFIG_PREEMPT_RT_BASE
  10626. bit_spin_unlock(BH_JournalHead, &bh->b_state);
  10627. +#else
  10628. + spin_unlock(&bh->b_journal_head_lock);
  10629. +#endif
  10630. }
  10631. #endif
  10632. diff --git a/include/linux/kdb.h b/include/linux/kdb.h
  10633. index a19bcf9e762e..897495386446 100644
  10634. --- a/include/linux/kdb.h
  10635. +++ b/include/linux/kdb.h
  10636. @@ -167,6 +167,7 @@ extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt,
  10637. extern __printf(1, 2) int kdb_printf(const char *, ...);
  10638. typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
  10639. +#define in_kdb_printk() (kdb_trap_printk)
  10640. extern void kdb_init(int level);
  10641. /* Access to kdb specific polling devices */
  10642. @@ -201,6 +202,7 @@ extern int kdb_register_flags(char *, kdb_func_t, char *, char *,
  10643. extern int kdb_unregister(char *);
  10644. #else /* ! CONFIG_KGDB_KDB */
  10645. static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
  10646. +#define in_kdb_printk() (0)
  10647. static inline void kdb_init(int level) {}
  10648. static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
  10649. char *help, short minlen) { return 0; }
  10650. diff --git a/include/linux/kernel.h b/include/linux/kernel.h
  10651. index d837f2a41665..2f4ce318c4fb 100644
  10652. --- a/include/linux/kernel.h
  10653. +++ b/include/linux/kernel.h
  10654. @@ -188,6 +188,9 @@ extern int _cond_resched(void);
  10655. */
  10656. # define might_sleep() \
  10657. do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
  10658. +
  10659. +# define might_sleep_no_state_check() \
  10660. + do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
  10661. # define sched_annotate_sleep() (current->task_state_change = 0)
  10662. #else
  10663. static inline void ___might_sleep(const char *file, int line,
  10664. @@ -195,6 +198,7 @@ extern int _cond_resched(void);
  10665. static inline void __might_sleep(const char *file, int line,
  10666. int preempt_offset) { }
  10667. # define might_sleep() do { might_resched(); } while (0)
  10668. +# define might_sleep_no_state_check() do { might_resched(); } while (0)
  10669. # define sched_annotate_sleep() do { } while (0)
  10670. #endif
  10671. @@ -244,7 +248,8 @@ static inline u32 reciprocal_scale(u32 val, u32 ep_ro)
  10672. #if defined(CONFIG_MMU) && \
  10673. (defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP))
  10674. -void might_fault(void);
  10675. +#define might_fault() __might_fault(__FILE__, __LINE__)
  10676. +void __might_fault(const char *file, int line);
  10677. #else
  10678. static inline void might_fault(void) { }
  10679. #endif
  10680. @@ -466,6 +471,7 @@ extern enum system_states {
  10681. SYSTEM_HALT,
  10682. SYSTEM_POWER_OFF,
  10683. SYSTEM_RESTART,
  10684. + SYSTEM_SUSPEND,
  10685. } system_state;
  10686. #define TAINT_PROPRIETARY_MODULE 0
  10687. diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
  10688. index 29a57a5b7cee..a081a0316379 100644
  10689. --- a/include/linux/kvm_host.h
  10690. +++ b/include/linux/kvm_host.h
  10691. @@ -230,7 +230,7 @@ struct kvm_vcpu {
  10692. int fpu_active;
  10693. int guest_fpu_loaded, guest_xcr0_loaded;
  10694. - wait_queue_head_t wq;
  10695. + struct swait_head wq;
  10696. struct pid *pid;
  10697. int sigset_active;
  10698. sigset_t sigset;
  10699. @@ -701,7 +701,7 @@ static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
  10700. }
  10701. #endif
  10702. -static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
  10703. +static inline struct swait_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
  10704. {
  10705. #ifdef __KVM_HAVE_ARCH_WQP
  10706. return vcpu->arch.wqp;
  10707. diff --git a/include/linux/lglock.h b/include/linux/lglock.h
  10708. index 0081f000e34b..9603a1500267 100644
  10709. --- a/include/linux/lglock.h
  10710. +++ b/include/linux/lglock.h
  10711. @@ -34,22 +34,39 @@
  10712. #endif
  10713. struct lglock {
  10714. +#ifndef CONFIG_PREEMPT_RT_FULL
  10715. arch_spinlock_t __percpu *lock;
  10716. +#else
  10717. + struct rt_mutex __percpu *lock;
  10718. +#endif
  10719. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  10720. struct lock_class_key lock_key;
  10721. struct lockdep_map lock_dep_map;
  10722. #endif
  10723. };
  10724. -#define DEFINE_LGLOCK(name) \
  10725. +#ifndef CONFIG_PREEMPT_RT_FULL
  10726. +# define DEFINE_LGLOCK(name) \
  10727. static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
  10728. = __ARCH_SPIN_LOCK_UNLOCKED; \
  10729. struct lglock name = { .lock = &name ## _lock }
  10730. -#define DEFINE_STATIC_LGLOCK(name) \
  10731. +# define DEFINE_STATIC_LGLOCK(name) \
  10732. static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
  10733. = __ARCH_SPIN_LOCK_UNLOCKED; \
  10734. static struct lglock name = { .lock = &name ## _lock }
  10735. +#else
  10736. +
  10737. +# define DEFINE_LGLOCK(name) \
  10738. + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \
  10739. + = __RT_MUTEX_INITIALIZER( name ## _lock); \
  10740. + struct lglock name = { .lock = &name ## _lock }
  10741. +
  10742. +# define DEFINE_STATIC_LGLOCK(name) \
  10743. + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \
  10744. + = __RT_MUTEX_INITIALIZER( name ## _lock); \
  10745. + static struct lglock name = { .lock = &name ## _lock }
  10746. +#endif
  10747. void lg_lock_init(struct lglock *lg, char *name);
  10748. void lg_local_lock(struct lglock *lg);
  10749. @@ -59,6 +76,12 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu);
  10750. void lg_global_lock(struct lglock *lg);
  10751. void lg_global_unlock(struct lglock *lg);
  10752. +#ifndef CONFIG_PREEMPT_RT_FULL
  10753. +#define lg_global_trylock_relax(name) lg_global_lock(name)
  10754. +#else
  10755. +void lg_global_trylock_relax(struct lglock *lg);
  10756. +#endif
  10757. +
  10758. #else
  10759. /* When !CONFIG_SMP, map lglock to spinlock */
  10760. #define lglock spinlock
  10761. diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
  10762. index 2eb88556c5c5..017d0f1c1eb4 100644
  10763. --- a/include/linux/list_bl.h
  10764. +++ b/include/linux/list_bl.h
  10765. @@ -2,6 +2,7 @@
  10766. #define _LINUX_LIST_BL_H
  10767. #include <linux/list.h>
  10768. +#include <linux/spinlock.h>
  10769. #include <linux/bit_spinlock.h>
  10770. /*
  10771. @@ -32,13 +33,24 @@
  10772. struct hlist_bl_head {
  10773. struct hlist_bl_node *first;
  10774. +#ifdef CONFIG_PREEMPT_RT_BASE
  10775. + raw_spinlock_t lock;
  10776. +#endif
  10777. };
  10778. struct hlist_bl_node {
  10779. struct hlist_bl_node *next, **pprev;
  10780. };
  10781. -#define INIT_HLIST_BL_HEAD(ptr) \
  10782. - ((ptr)->first = NULL)
  10783. +
  10784. +#ifdef CONFIG_PREEMPT_RT_BASE
  10785. +#define INIT_HLIST_BL_HEAD(h) \
  10786. +do { \
  10787. + (h)->first = NULL; \
  10788. + raw_spin_lock_init(&(h)->lock); \
  10789. +} while (0)
  10790. +#else
  10791. +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL
  10792. +#endif
  10793. static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
  10794. {
  10795. @@ -117,12 +129,26 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n)
  10796. static inline void hlist_bl_lock(struct hlist_bl_head *b)
  10797. {
  10798. +#ifndef CONFIG_PREEMPT_RT_BASE
  10799. bit_spin_lock(0, (unsigned long *)b);
  10800. +#else
  10801. + raw_spin_lock(&b->lock);
  10802. +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  10803. + __set_bit(0, (unsigned long *)b);
  10804. +#endif
  10805. +#endif
  10806. }
  10807. static inline void hlist_bl_unlock(struct hlist_bl_head *b)
  10808. {
  10809. +#ifndef CONFIG_PREEMPT_RT_BASE
  10810. __bit_spin_unlock(0, (unsigned long *)b);
  10811. +#else
  10812. +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  10813. + __clear_bit(0, (unsigned long *)b);
  10814. +#endif
  10815. + raw_spin_unlock(&b->lock);
  10816. +#endif
  10817. }
  10818. static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
  10819. diff --git a/include/linux/locallock.h b/include/linux/locallock.h
  10820. new file mode 100644
  10821. index 000000000000..0edbf192f6d1
  10822. --- /dev/null
  10823. +++ b/include/linux/locallock.h
  10824. @@ -0,0 +1,276 @@
  10825. +#ifndef _LINUX_LOCALLOCK_H
  10826. +#define _LINUX_LOCALLOCK_H
  10827. +
  10828. +#include <linux/percpu.h>
  10829. +#include <linux/spinlock.h>
  10830. +
  10831. +#ifdef CONFIG_PREEMPT_RT_BASE
  10832. +
  10833. +#ifdef CONFIG_DEBUG_SPINLOCK
  10834. +# define LL_WARN(cond) WARN_ON(cond)
  10835. +#else
  10836. +# define LL_WARN(cond) do { } while (0)
  10837. +#endif
  10838. +
  10839. +/*
  10840. + * per cpu lock based substitute for local_irq_*()
  10841. + */
  10842. +struct local_irq_lock {
  10843. + spinlock_t lock;
  10844. + struct task_struct *owner;
  10845. + int nestcnt;
  10846. + unsigned long flags;
  10847. +};
  10848. +
  10849. +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \
  10850. + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \
  10851. + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
  10852. +
  10853. +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \
  10854. + DECLARE_PER_CPU(struct local_irq_lock, lvar)
  10855. +
  10856. +#define local_irq_lock_init(lvar) \
  10857. + do { \
  10858. + int __cpu; \
  10859. + for_each_possible_cpu(__cpu) \
  10860. + spin_lock_init(&per_cpu(lvar, __cpu).lock); \
  10861. + } while (0)
  10862. +
  10863. +/*
  10864. + * spin_lock|trylock|unlock_local flavour that does not migrate disable
  10865. + * used for __local_lock|trylock|unlock where get_local_var/put_local_var
  10866. + * already takes care of the migrate_disable/enable
  10867. + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls.
  10868. + */
  10869. +#ifdef CONFIG_PREEMPT_RT_FULL
  10870. +# define spin_lock_local(lock) rt_spin_lock(lock)
  10871. +# define spin_trylock_local(lock) rt_spin_trylock(lock)
  10872. +# define spin_unlock_local(lock) rt_spin_unlock(lock)
  10873. +#else
  10874. +# define spin_lock_local(lock) spin_lock(lock)
  10875. +# define spin_trylock_local(lock) spin_trylock(lock)
  10876. +# define spin_unlock_local(lock) spin_unlock(lock)
  10877. +#endif
  10878. +
  10879. +static inline void __local_lock(struct local_irq_lock *lv)
  10880. +{
  10881. + if (lv->owner != current) {
  10882. + spin_lock_local(&lv->lock);
  10883. + LL_WARN(lv->owner);
  10884. + LL_WARN(lv->nestcnt);
  10885. + lv->owner = current;
  10886. + }
  10887. + lv->nestcnt++;
  10888. +}
  10889. +
  10890. +#define local_lock(lvar) \
  10891. + do { __local_lock(&get_local_var(lvar)); } while (0)
  10892. +
  10893. +#define local_lock_on(lvar, cpu) \
  10894. + do { __local_lock(&per_cpu(lvar, cpu)); } while (0)
  10895. +
  10896. +static inline int __local_trylock(struct local_irq_lock *lv)
  10897. +{
  10898. + if (lv->owner != current && spin_trylock_local(&lv->lock)) {
  10899. + LL_WARN(lv->owner);
  10900. + LL_WARN(lv->nestcnt);
  10901. + lv->owner = current;
  10902. + lv->nestcnt = 1;
  10903. + return 1;
  10904. + }
  10905. + return 0;
  10906. +}
  10907. +
  10908. +#define local_trylock(lvar) \
  10909. + ({ \
  10910. + int __locked; \
  10911. + __locked = __local_trylock(&get_local_var(lvar)); \
  10912. + if (!__locked) \
  10913. + put_local_var(lvar); \
  10914. + __locked; \
  10915. + })
  10916. +
  10917. +static inline void __local_unlock(struct local_irq_lock *lv)
  10918. +{
  10919. + LL_WARN(lv->nestcnt == 0);
  10920. + LL_WARN(lv->owner != current);
  10921. + if (--lv->nestcnt)
  10922. + return;
  10923. +
  10924. + lv->owner = NULL;
  10925. + spin_unlock_local(&lv->lock);
  10926. +}
  10927. +
  10928. +#define local_unlock(lvar) \
  10929. + do { \
  10930. + __local_unlock(this_cpu_ptr(&lvar)); \
  10931. + put_local_var(lvar); \
  10932. + } while (0)
  10933. +
  10934. +#define local_unlock_on(lvar, cpu) \
  10935. + do { __local_unlock(&per_cpu(lvar, cpu)); } while (0)
  10936. +
  10937. +static inline void __local_lock_irq(struct local_irq_lock *lv)
  10938. +{
  10939. + spin_lock_irqsave(&lv->lock, lv->flags);
  10940. + LL_WARN(lv->owner);
  10941. + LL_WARN(lv->nestcnt);
  10942. + lv->owner = current;
  10943. + lv->nestcnt = 1;
  10944. +}
  10945. +
  10946. +#define local_lock_irq(lvar) \
  10947. + do { __local_lock_irq(&get_local_var(lvar)); } while (0)
  10948. +
  10949. +#define local_lock_irq_on(lvar, cpu) \
  10950. + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0)
  10951. +
  10952. +static inline void __local_unlock_irq(struct local_irq_lock *lv)
  10953. +{
  10954. + LL_WARN(!lv->nestcnt);
  10955. + LL_WARN(lv->owner != current);
  10956. + lv->owner = NULL;
  10957. + lv->nestcnt = 0;
  10958. + spin_unlock_irq(&lv->lock);
  10959. +}
  10960. +
  10961. +#define local_unlock_irq(lvar) \
  10962. + do { \
  10963. + __local_unlock_irq(this_cpu_ptr(&lvar)); \
  10964. + put_local_var(lvar); \
  10965. + } while (0)
  10966. +
  10967. +#define local_unlock_irq_on(lvar, cpu) \
  10968. + do { \
  10969. + __local_unlock_irq(&per_cpu(lvar, cpu)); \
  10970. + } while (0)
  10971. +
  10972. +static inline int __local_lock_irqsave(struct local_irq_lock *lv)
  10973. +{
  10974. + if (lv->owner != current) {
  10975. + __local_lock_irq(lv);
  10976. + return 0;
  10977. + } else {
  10978. + lv->nestcnt++;
  10979. + return 1;
  10980. + }
  10981. +}
  10982. +
  10983. +#define local_lock_irqsave(lvar, _flags) \
  10984. + do { \
  10985. + if (__local_lock_irqsave(&get_local_var(lvar))) \
  10986. + put_local_var(lvar); \
  10987. + _flags = __this_cpu_read(lvar.flags); \
  10988. + } while (0)
  10989. +
  10990. +#define local_lock_irqsave_on(lvar, _flags, cpu) \
  10991. + do { \
  10992. + __local_lock_irqsave(&per_cpu(lvar, cpu)); \
  10993. + _flags = per_cpu(lvar, cpu).flags; \
  10994. + } while (0)
  10995. +
  10996. +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
  10997. + unsigned long flags)
  10998. +{
  10999. + LL_WARN(!lv->nestcnt);
  11000. + LL_WARN(lv->owner != current);
  11001. + if (--lv->nestcnt)
  11002. + return 0;
  11003. +
  11004. + lv->owner = NULL;
  11005. + spin_unlock_irqrestore(&lv->lock, lv->flags);
  11006. + return 1;
  11007. +}
  11008. +
  11009. +#define local_unlock_irqrestore(lvar, flags) \
  11010. + do { \
  11011. + if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \
  11012. + put_local_var(lvar); \
  11013. + } while (0)
  11014. +
  11015. +#define local_unlock_irqrestore_on(lvar, flags, cpu) \
  11016. + do { \
  11017. + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \
  11018. + } while (0)
  11019. +
  11020. +#define local_spin_trylock_irq(lvar, lock) \
  11021. + ({ \
  11022. + int __locked; \
  11023. + local_lock_irq(lvar); \
  11024. + __locked = spin_trylock(lock); \
  11025. + if (!__locked) \
  11026. + local_unlock_irq(lvar); \
  11027. + __locked; \
  11028. + })
  11029. +
  11030. +#define local_spin_lock_irq(lvar, lock) \
  11031. + do { \
  11032. + local_lock_irq(lvar); \
  11033. + spin_lock(lock); \
  11034. + } while (0)
  11035. +
  11036. +#define local_spin_unlock_irq(lvar, lock) \
  11037. + do { \
  11038. + spin_unlock(lock); \
  11039. + local_unlock_irq(lvar); \
  11040. + } while (0)
  11041. +
  11042. +#define local_spin_lock_irqsave(lvar, lock, flags) \
  11043. + do { \
  11044. + local_lock_irqsave(lvar, flags); \
  11045. + spin_lock(lock); \
  11046. + } while (0)
  11047. +
  11048. +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
  11049. + do { \
  11050. + spin_unlock(lock); \
  11051. + local_unlock_irqrestore(lvar, flags); \
  11052. + } while (0)
  11053. +
  11054. +#define get_locked_var(lvar, var) \
  11055. + (*({ \
  11056. + local_lock(lvar); \
  11057. + this_cpu_ptr(&var); \
  11058. + }))
  11059. +
  11060. +#define put_locked_var(lvar, var) local_unlock(lvar);
  11061. +
  11062. +#define local_lock_cpu(lvar) \
  11063. + ({ \
  11064. + local_lock(lvar); \
  11065. + smp_processor_id(); \
  11066. + })
  11067. +
  11068. +#define local_unlock_cpu(lvar) local_unlock(lvar)
  11069. +
  11070. +#else /* PREEMPT_RT_BASE */
  11071. +
  11072. +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar
  11073. +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar
  11074. +
  11075. +static inline void local_irq_lock_init(int lvar) { }
  11076. +
  11077. +#define local_lock(lvar) preempt_disable()
  11078. +#define local_unlock(lvar) preempt_enable()
  11079. +#define local_lock_irq(lvar) local_irq_disable()
  11080. +#define local_unlock_irq(lvar) local_irq_enable()
  11081. +#define local_lock_irqsave(lvar, flags) local_irq_save(flags)
  11082. +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags)
  11083. +
  11084. +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock)
  11085. +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock)
  11086. +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock)
  11087. +#define local_spin_lock_irqsave(lvar, lock, flags) \
  11088. + spin_lock_irqsave(lock, flags)
  11089. +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
  11090. + spin_unlock_irqrestore(lock, flags)
  11091. +
  11092. +#define get_locked_var(lvar, var) get_cpu_var(var)
  11093. +#define put_locked_var(lvar, var) put_cpu_var(var)
  11094. +
  11095. +#define local_lock_cpu(lvar) get_cpu()
  11096. +#define local_unlock_cpu(lvar) put_cpu()
  11097. +
  11098. +#endif
  11099. +
  11100. +#endif
  11101. diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
  11102. index c0c6b33535fb..89c047144b1f 100644
  11103. --- a/include/linux/mm_types.h
  11104. +++ b/include/linux/mm_types.h
  11105. @@ -11,6 +11,7 @@
  11106. #include <linux/completion.h>
  11107. #include <linux/cpumask.h>
  11108. #include <linux/uprobes.h>
  11109. +#include <linux/rcupdate.h>
  11110. #include <linux/page-flags-layout.h>
  11111. #include <asm/page.h>
  11112. #include <asm/mmu.h>
  11113. @@ -453,6 +454,9 @@ struct mm_struct {
  11114. bool tlb_flush_pending;
  11115. #endif
  11116. struct uprobes_state uprobes_state;
  11117. +#ifdef CONFIG_PREEMPT_RT_BASE
  11118. + struct rcu_head delayed_drop;
  11119. +#endif
  11120. #ifdef CONFIG_X86_INTEL_MPX
  11121. /* address of the bounds directory */
  11122. void __user *bd_addr;
  11123. diff --git a/include/linux/mutex.h b/include/linux/mutex.h
  11124. index 2cb7531e7d7a..b3fdfc820216 100644
  11125. --- a/include/linux/mutex.h
  11126. +++ b/include/linux/mutex.h
  11127. @@ -19,6 +19,17 @@
  11128. #include <asm/processor.h>
  11129. #include <linux/osq_lock.h>
  11130. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11131. +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
  11132. + , .dep_map = { .name = #lockname }
  11133. +#else
  11134. +# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
  11135. +#endif
  11136. +
  11137. +#ifdef CONFIG_PREEMPT_RT_FULL
  11138. +# include <linux/mutex_rt.h>
  11139. +#else
  11140. +
  11141. /*
  11142. * Simple, straightforward mutexes with strict semantics:
  11143. *
  11144. @@ -99,13 +110,6 @@ do { \
  11145. static inline void mutex_destroy(struct mutex *lock) {}
  11146. #endif
  11147. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11148. -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
  11149. - , .dep_map = { .name = #lockname }
  11150. -#else
  11151. -# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
  11152. -#endif
  11153. -
  11154. #define __MUTEX_INITIALIZER(lockname) \
  11155. { .count = ATOMIC_INIT(1) \
  11156. , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
  11157. @@ -173,6 +177,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
  11158. extern int mutex_trylock(struct mutex *lock);
  11159. extern void mutex_unlock(struct mutex *lock);
  11160. +#endif /* !PREEMPT_RT_FULL */
  11161. +
  11162. extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
  11163. #endif /* __LINUX_MUTEX_H */
  11164. diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h
  11165. new file mode 100644
  11166. index 000000000000..c38a44b14da5
  11167. --- /dev/null
  11168. +++ b/include/linux/mutex_rt.h
  11169. @@ -0,0 +1,84 @@
  11170. +#ifndef __LINUX_MUTEX_RT_H
  11171. +#define __LINUX_MUTEX_RT_H
  11172. +
  11173. +#ifndef __LINUX_MUTEX_H
  11174. +#error "Please include mutex.h"
  11175. +#endif
  11176. +
  11177. +#include <linux/rtmutex.h>
  11178. +
  11179. +/* FIXME: Just for __lockfunc */
  11180. +#include <linux/spinlock.h>
  11181. +
  11182. +struct mutex {
  11183. + struct rt_mutex lock;
  11184. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11185. + struct lockdep_map dep_map;
  11186. +#endif
  11187. +};
  11188. +
  11189. +#define __MUTEX_INITIALIZER(mutexname) \
  11190. + { \
  11191. + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \
  11192. + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \
  11193. + }
  11194. +
  11195. +#define DEFINE_MUTEX(mutexname) \
  11196. + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
  11197. +
  11198. +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
  11199. +extern void __lockfunc _mutex_lock(struct mutex *lock);
  11200. +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
  11201. +extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
  11202. +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass);
  11203. +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
  11204. +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass);
  11205. +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass);
  11206. +extern int __lockfunc _mutex_trylock(struct mutex *lock);
  11207. +extern void __lockfunc _mutex_unlock(struct mutex *lock);
  11208. +
  11209. +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock)
  11210. +#define mutex_lock(l) _mutex_lock(l)
  11211. +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l)
  11212. +#define mutex_lock_killable(l) _mutex_lock_killable(l)
  11213. +#define mutex_trylock(l) _mutex_trylock(l)
  11214. +#define mutex_unlock(l) _mutex_unlock(l)
  11215. +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock)
  11216. +
  11217. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11218. +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s)
  11219. +# define mutex_lock_interruptible_nested(l, s) \
  11220. + _mutex_lock_interruptible_nested(l, s)
  11221. +# define mutex_lock_killable_nested(l, s) \
  11222. + _mutex_lock_killable_nested(l, s)
  11223. +
  11224. +# define mutex_lock_nest_lock(lock, nest_lock) \
  11225. +do { \
  11226. + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
  11227. + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
  11228. +} while (0)
  11229. +
  11230. +#else
  11231. +# define mutex_lock_nested(l, s) _mutex_lock(l)
  11232. +# define mutex_lock_interruptible_nested(l, s) \
  11233. + _mutex_lock_interruptible(l)
  11234. +# define mutex_lock_killable_nested(l, s) \
  11235. + _mutex_lock_killable(l)
  11236. +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
  11237. +#endif
  11238. +
  11239. +# define mutex_init(mutex) \
  11240. +do { \
  11241. + static struct lock_class_key __key; \
  11242. + \
  11243. + rt_mutex_init(&(mutex)->lock); \
  11244. + __mutex_do_init((mutex), #mutex, &__key); \
  11245. +} while (0)
  11246. +
  11247. +# define __mutex_init(mutex, name, key) \
  11248. +do { \
  11249. + rt_mutex_init(&(mutex)->lock); \
  11250. + __mutex_do_init((mutex), name, key); \
  11251. +} while (0)
  11252. +
  11253. +#endif
  11254. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
  11255. index 6c86c7edafa7..27b25e97c3d4 100644
  11256. --- a/include/linux/netdevice.h
  11257. +++ b/include/linux/netdevice.h
  11258. @@ -2193,11 +2193,20 @@ void netdev_freemem(struct net_device *dev);
  11259. void synchronize_net(void);
  11260. int init_dummy_netdev(struct net_device *dev);
  11261. +#ifdef CONFIG_PREEMPT_RT_FULL
  11262. +static inline int dev_recursion_level(void)
  11263. +{
  11264. + return current->xmit_recursion;
  11265. +}
  11266. +
  11267. +#else
  11268. +
  11269. DECLARE_PER_CPU(int, xmit_recursion);
  11270. static inline int dev_recursion_level(void)
  11271. {
  11272. return this_cpu_read(xmit_recursion);
  11273. }
  11274. +#endif
  11275. struct net_device *dev_get_by_index(struct net *net, int ifindex);
  11276. struct net_device *__dev_get_by_index(struct net *net, int ifindex);
  11277. @@ -2488,6 +2497,7 @@ struct softnet_data {
  11278. unsigned int dropped;
  11279. struct sk_buff_head input_pkt_queue;
  11280. struct napi_struct backlog;
  11281. + struct sk_buff_head tofree_queue;
  11282. };
  11283. diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
  11284. index cc615e273f80..1a6ba6d7ff8b 100644
  11285. --- a/include/linux/netfilter/x_tables.h
  11286. +++ b/include/linux/netfilter/x_tables.h
  11287. @@ -3,6 +3,7 @@
  11288. #include <linux/netdevice.h>
  11289. +#include <linux/locallock.h>
  11290. #include <uapi/linux/netfilter/x_tables.h>
  11291. /**
  11292. @@ -293,6 +294,8 @@ void xt_free_table_info(struct xt_table_info *info);
  11293. */
  11294. DECLARE_PER_CPU(seqcount_t, xt_recseq);
  11295. +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock);
  11296. +
  11297. /**
  11298. * xt_write_recseq_begin - start of a write section
  11299. *
  11300. @@ -307,6 +310,9 @@ static inline unsigned int xt_write_recseq_begin(void)
  11301. {
  11302. unsigned int addend;
  11303. + /* RT protection */
  11304. + local_lock(xt_write_lock);
  11305. +
  11306. /*
  11307. * Low order bit of sequence is set if we already
  11308. * called xt_write_recseq_begin().
  11309. @@ -337,6 +343,7 @@ static inline void xt_write_recseq_end(unsigned int addend)
  11310. /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
  11311. smp_wmb();
  11312. __this_cpu_add(xt_recseq.sequence, addend);
  11313. + local_unlock(xt_write_lock);
  11314. }
  11315. /*
  11316. diff --git a/include/linux/notifier.h b/include/linux/notifier.h
  11317. index d14a4c362465..2e4414a0c1c4 100644
  11318. --- a/include/linux/notifier.h
  11319. +++ b/include/linux/notifier.h
  11320. @@ -6,7 +6,7 @@
  11321. *
  11322. * Alan Cox <Alan.Cox@linux.org>
  11323. */
  11324. -
  11325. +
  11326. #ifndef _LINUX_NOTIFIER_H
  11327. #define _LINUX_NOTIFIER_H
  11328. #include <linux/errno.h>
  11329. @@ -42,9 +42,7 @@
  11330. * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
  11331. * As compensation, srcu_notifier_chain_unregister() is rather expensive.
  11332. * SRCU notifier chains should be used when the chain will be called very
  11333. - * often but notifier_blocks will seldom be removed. Also, SRCU notifier
  11334. - * chains are slightly more difficult to use because they require special
  11335. - * runtime initialization.
  11336. + * often but notifier_blocks will seldom be removed.
  11337. */
  11338. typedef int (*notifier_fn_t)(struct notifier_block *nb,
  11339. @@ -88,7 +86,7 @@ struct srcu_notifier_head {
  11340. (name)->head = NULL; \
  11341. } while (0)
  11342. -/* srcu_notifier_heads must be initialized and cleaned up dynamically */
  11343. +/* srcu_notifier_heads must be cleaned up dynamically */
  11344. extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
  11345. #define srcu_cleanup_notifier_head(name) \
  11346. cleanup_srcu_struct(&(name)->srcu);
  11347. @@ -101,7 +99,13 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
  11348. .head = NULL }
  11349. #define RAW_NOTIFIER_INIT(name) { \
  11350. .head = NULL }
  11351. -/* srcu_notifier_heads cannot be initialized statically */
  11352. +
  11353. +#define SRCU_NOTIFIER_INIT(name, pcpu) \
  11354. + { \
  11355. + .mutex = __MUTEX_INITIALIZER(name.mutex), \
  11356. + .head = NULL, \
  11357. + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \
  11358. + }
  11359. #define ATOMIC_NOTIFIER_HEAD(name) \
  11360. struct atomic_notifier_head name = \
  11361. @@ -113,6 +117,18 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
  11362. struct raw_notifier_head name = \
  11363. RAW_NOTIFIER_INIT(name)
  11364. +#define _SRCU_NOTIFIER_HEAD(name, mod) \
  11365. + static DEFINE_PER_CPU(struct srcu_struct_array, \
  11366. + name##_head_srcu_array); \
  11367. + mod struct srcu_notifier_head name = \
  11368. + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array)
  11369. +
  11370. +#define SRCU_NOTIFIER_HEAD(name) \
  11371. + _SRCU_NOTIFIER_HEAD(name, )
  11372. +
  11373. +#define SRCU_NOTIFIER_HEAD_STATIC(name) \
  11374. + _SRCU_NOTIFIER_HEAD(name, static)
  11375. +
  11376. #ifdef __KERNEL__
  11377. extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
  11378. @@ -182,12 +198,12 @@ static inline int notifier_to_errno(int ret)
  11379. /*
  11380. * Declared notifiers so far. I can imagine quite a few more chains
  11381. - * over time (eg laptop power reset chains, reboot chain (to clean
  11382. + * over time (eg laptop power reset chains, reboot chain (to clean
  11383. * device units up), device [un]mount chain, module load/unload chain,
  11384. - * low memory chain, screenblank chain (for plug in modular screenblankers)
  11385. + * low memory chain, screenblank chain (for plug in modular screenblankers)
  11386. * VC switch chains (for loadable kernel svgalib VC switch helpers) etc...
  11387. */
  11388. -
  11389. +
  11390. /* CPU notfiers are defined in include/linux/cpu.h. */
  11391. /* netdevice notifiers are defined in include/linux/netdevice.h */
  11392. diff --git a/include/linux/percpu.h b/include/linux/percpu.h
  11393. index caebf2a758dc..53a60a51c758 100644
  11394. --- a/include/linux/percpu.h
  11395. +++ b/include/linux/percpu.h
  11396. @@ -24,6 +24,35 @@
  11397. PERCPU_MODULE_RESERVE)
  11398. #endif
  11399. +#ifdef CONFIG_PREEMPT_RT_FULL
  11400. +
  11401. +#define get_local_var(var) (*({ \
  11402. + migrate_disable(); \
  11403. + this_cpu_ptr(&var); }))
  11404. +
  11405. +#define put_local_var(var) do { \
  11406. + (void)&(var); \
  11407. + migrate_enable(); \
  11408. +} while (0)
  11409. +
  11410. +# define get_local_ptr(var) ({ \
  11411. + migrate_disable(); \
  11412. + this_cpu_ptr(var); })
  11413. +
  11414. +# define put_local_ptr(var) do { \
  11415. + (void)(var); \
  11416. + migrate_enable(); \
  11417. +} while (0)
  11418. +
  11419. +#else
  11420. +
  11421. +#define get_local_var(var) get_cpu_var(var)
  11422. +#define put_local_var(var) put_cpu_var(var)
  11423. +#define get_local_ptr(var) get_cpu_ptr(var)
  11424. +#define put_local_ptr(var) put_cpu_ptr(var)
  11425. +
  11426. +#endif
  11427. +
  11428. /* minimum unit size, also is the maximum supported allocation size */
  11429. #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
  11430. diff --git a/include/linux/pid.h b/include/linux/pid.h
  11431. index 23705a53abba..2cc64b779f03 100644
  11432. --- a/include/linux/pid.h
  11433. +++ b/include/linux/pid.h
  11434. @@ -2,6 +2,7 @@
  11435. #define _LINUX_PID_H
  11436. #include <linux/rcupdate.h>
  11437. +#include <linux/atomic.h>
  11438. enum pid_type
  11439. {
  11440. diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h
  11441. index 5d50b25a73d7..ff43e01b8ca9 100644
  11442. --- a/include/linux/platform_data/gpio-omap.h
  11443. +++ b/include/linux/platform_data/gpio-omap.h
  11444. @@ -198,7 +198,6 @@ struct omap_gpio_platform_data {
  11445. int bank_width; /* GPIO bank width */
  11446. int bank_stride; /* Only needed for omap1 MPUIO */
  11447. bool dbck_flag; /* dbck required or not - True for OMAP3&4 */
  11448. - bool loses_context; /* whether the bank would ever lose context */
  11449. bool is_mpuio; /* whether the bank is of type MPUIO */
  11450. u32 non_wakeup_gpios;
  11451. @@ -208,9 +207,17 @@ struct omap_gpio_platform_data {
  11452. int (*get_context_loss_count)(struct device *dev);
  11453. };
  11454. +#if IS_BUILTIN(CONFIG_GPIO_OMAP)
  11455. extern void omap2_gpio_prepare_for_idle(int off_mode);
  11456. extern void omap2_gpio_resume_after_idle(void);
  11457. -extern void omap_set_gpio_debounce(int gpio, int enable);
  11458. -extern void omap_set_gpio_debounce_time(int gpio, int enable);
  11459. +#else
  11460. +static inline void omap2_gpio_prepare_for_idle(int off_mode)
  11461. +{
  11462. +}
  11463. +
  11464. +static inline void omap2_gpio_resume_after_idle(void)
  11465. +{
  11466. +}
  11467. +#endif
  11468. #endif
  11469. diff --git a/include/linux/preempt.h b/include/linux/preempt.h
  11470. index 8cd6725c5758..8fa1d21dab70 100644
  11471. --- a/include/linux/preempt.h
  11472. +++ b/include/linux/preempt.h
  11473. @@ -34,6 +34,20 @@ extern void preempt_count_sub(int val);
  11474. #define preempt_count_inc() preempt_count_add(1)
  11475. #define preempt_count_dec() preempt_count_sub(1)
  11476. +#ifdef CONFIG_PREEMPT_LAZY
  11477. +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
  11478. +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
  11479. +#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
  11480. +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
  11481. +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
  11482. +#else
  11483. +#define add_preempt_lazy_count(val) do { } while (0)
  11484. +#define sub_preempt_lazy_count(val) do { } while (0)
  11485. +#define inc_preempt_lazy_count() do { } while (0)
  11486. +#define dec_preempt_lazy_count() do { } while (0)
  11487. +#define preempt_lazy_count() (0)
  11488. +#endif
  11489. +
  11490. #ifdef CONFIG_PREEMPT_COUNT
  11491. #define preempt_disable() \
  11492. @@ -42,13 +56,25 @@ do { \
  11493. barrier(); \
  11494. } while (0)
  11495. +#define preempt_lazy_disable() \
  11496. +do { \
  11497. + inc_preempt_lazy_count(); \
  11498. + barrier(); \
  11499. +} while (0)
  11500. +
  11501. #define sched_preempt_enable_no_resched() \
  11502. do { \
  11503. barrier(); \
  11504. preempt_count_dec(); \
  11505. } while (0)
  11506. -#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
  11507. +#ifdef CONFIG_PREEMPT_RT_BASE
  11508. +# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
  11509. +# define preempt_check_resched_rt() preempt_check_resched()
  11510. +#else
  11511. +# define preempt_enable_no_resched() preempt_enable()
  11512. +# define preempt_check_resched_rt() barrier();
  11513. +#endif
  11514. #ifdef CONFIG_PREEMPT
  11515. #define preempt_enable() \
  11516. @@ -64,6 +90,13 @@ do { \
  11517. __preempt_schedule(); \
  11518. } while (0)
  11519. +#define preempt_lazy_enable() \
  11520. +do { \
  11521. + dec_preempt_lazy_count(); \
  11522. + barrier(); \
  11523. + preempt_check_resched(); \
  11524. +} while (0)
  11525. +
  11526. #else
  11527. #define preempt_enable() \
  11528. do { \
  11529. @@ -122,6 +155,7 @@ do { \
  11530. #define preempt_disable_notrace() barrier()
  11531. #define preempt_enable_no_resched_notrace() barrier()
  11532. #define preempt_enable_notrace() barrier()
  11533. +#define preempt_check_resched_rt() barrier()
  11534. #endif /* CONFIG_PREEMPT_COUNT */
  11535. @@ -141,10 +175,31 @@ do { \
  11536. } while (0)
  11537. #define preempt_fold_need_resched() \
  11538. do { \
  11539. - if (tif_need_resched()) \
  11540. + if (tif_need_resched_now()) \
  11541. set_preempt_need_resched(); \
  11542. } while (0)
  11543. +#ifdef CONFIG_PREEMPT_RT_FULL
  11544. +# define preempt_disable_rt() preempt_disable()
  11545. +# define preempt_enable_rt() preempt_enable()
  11546. +# define preempt_disable_nort() barrier()
  11547. +# define preempt_enable_nort() barrier()
  11548. +# ifdef CONFIG_SMP
  11549. + extern void migrate_disable(void);
  11550. + extern void migrate_enable(void);
  11551. +# else /* CONFIG_SMP */
  11552. +# define migrate_disable() barrier()
  11553. +# define migrate_enable() barrier()
  11554. +# endif /* CONFIG_SMP */
  11555. +#else
  11556. +# define preempt_disable_rt() barrier()
  11557. +# define preempt_enable_rt() barrier()
  11558. +# define preempt_disable_nort() preempt_disable()
  11559. +# define preempt_enable_nort() preempt_enable()
  11560. +# define migrate_disable() preempt_disable()
  11561. +# define migrate_enable() preempt_enable()
  11562. +#endif
  11563. +
  11564. #ifdef CONFIG_PREEMPT_NOTIFIERS
  11565. struct preempt_notifier;
  11566. diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h
  11567. index 5cb25f17331a..26a33802dae2 100644
  11568. --- a/include/linux/preempt_mask.h
  11569. +++ b/include/linux/preempt_mask.h
  11570. @@ -44,16 +44,26 @@
  11571. #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
  11572. #define NMI_OFFSET (1UL << NMI_SHIFT)
  11573. -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
  11574. +#ifndef CONFIG_PREEMPT_RT_FULL
  11575. +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
  11576. +#else
  11577. +# define SOFTIRQ_DISABLE_OFFSET (0)
  11578. +#endif
  11579. #define PREEMPT_ACTIVE_BITS 1
  11580. #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
  11581. #define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
  11582. #define hardirq_count() (preempt_count() & HARDIRQ_MASK)
  11583. -#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
  11584. #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
  11585. | NMI_MASK))
  11586. +#ifndef CONFIG_PREEMPT_RT_FULL
  11587. +# define softirq_count() (preempt_count() & SOFTIRQ_MASK)
  11588. +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
  11589. +#else
  11590. +# define softirq_count() (0UL)
  11591. +extern int in_serving_softirq(void);
  11592. +#endif
  11593. /*
  11594. * Are we doing bottom half or hardware interrupt processing?
  11595. @@ -64,7 +74,6 @@
  11596. #define in_irq() (hardirq_count())
  11597. #define in_softirq() (softirq_count())
  11598. #define in_interrupt() (irq_count())
  11599. -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
  11600. /*
  11601. * Are we in NMI context?
  11602. @@ -83,7 +92,11 @@
  11603. /*
  11604. * The preempt_count offset after spin_lock()
  11605. */
  11606. +#if !defined(CONFIG_PREEMPT_RT_FULL)
  11607. #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
  11608. +#else
  11609. +#define PREEMPT_LOCK_OFFSET 0
  11610. +#endif
  11611. /*
  11612. * The preempt_count offset needed for things like:
  11613. diff --git a/include/linux/printk.h b/include/linux/printk.h
  11614. index 9b30871c9149..08d0a7574fcf 100644
  11615. --- a/include/linux/printk.h
  11616. +++ b/include/linux/printk.h
  11617. @@ -115,9 +115,11 @@ int no_printk(const char *fmt, ...)
  11618. #ifdef CONFIG_EARLY_PRINTK
  11619. extern asmlinkage __printf(1, 2)
  11620. void early_printk(const char *fmt, ...);
  11621. +extern void printk_kill(void);
  11622. #else
  11623. static inline __printf(1, 2) __cold
  11624. void early_printk(const char *s, ...) { }
  11625. +static inline void printk_kill(void) { }
  11626. #endif
  11627. typedef int(*printk_func_t)(const char *fmt, va_list args);
  11628. diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
  11629. index 673dee29a9b9..9a80663a1574 100644
  11630. --- a/include/linux/radix-tree.h
  11631. +++ b/include/linux/radix-tree.h
  11632. @@ -277,8 +277,13 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
  11633. unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
  11634. void ***results, unsigned long *indices,
  11635. unsigned long first_index, unsigned int max_items);
  11636. +#ifndef CONFIG_PREEMPT_RT_FULL
  11637. int radix_tree_preload(gfp_t gfp_mask);
  11638. int radix_tree_maybe_preload(gfp_t gfp_mask);
  11639. +#else
  11640. +static inline int radix_tree_preload(gfp_t gm) { return 0; }
  11641. +static inline int radix_tree_maybe_preload(gfp_t gfp_mask) { return 0; }
  11642. +#endif
  11643. void radix_tree_init(void);
  11644. void *radix_tree_tag_set(struct radix_tree_root *root,
  11645. unsigned long index, unsigned int tag);
  11646. @@ -303,7 +308,7 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);
  11647. static inline void radix_tree_preload_end(void)
  11648. {
  11649. - preempt_enable();
  11650. + preempt_enable_nort();
  11651. }
  11652. /**
  11653. diff --git a/include/linux/random.h b/include/linux/random.h
  11654. index b05856e16b75..4a64ad52dcb7 100644
  11655. --- a/include/linux/random.h
  11656. +++ b/include/linux/random.h
  11657. @@ -11,7 +11,7 @@
  11658. extern void add_device_randomness(const void *, unsigned int);
  11659. extern void add_input_randomness(unsigned int type, unsigned int code,
  11660. unsigned int value);
  11661. -extern void add_interrupt_randomness(int irq, int irq_flags);
  11662. +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip);
  11663. extern void get_random_bytes(void *buf, int nbytes);
  11664. extern void get_random_bytes_arch(void *buf, int nbytes);
  11665. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
  11666. index 573a5afd5ed8..5d090cdaaace 100644
  11667. --- a/include/linux/rcupdate.h
  11668. +++ b/include/linux/rcupdate.h
  11669. @@ -167,6 +167,9 @@ void call_rcu(struct rcu_head *head,
  11670. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  11671. +#ifdef CONFIG_PREEMPT_RT_FULL
  11672. +#define call_rcu_bh call_rcu
  11673. +#else
  11674. /**
  11675. * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
  11676. * @head: structure to be used for queueing the RCU updates.
  11677. @@ -190,6 +193,7 @@ void call_rcu(struct rcu_head *head,
  11678. */
  11679. void call_rcu_bh(struct rcu_head *head,
  11680. void (*func)(struct rcu_head *head));
  11681. +#endif
  11682. /**
  11683. * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
  11684. @@ -260,6 +264,11 @@ void synchronize_rcu(void);
  11685. * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
  11686. */
  11687. #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
  11688. +#ifndef CONFIG_PREEMPT_RT_FULL
  11689. +#define sched_rcu_preempt_depth() rcu_preempt_depth()
  11690. +#else
  11691. +static inline int sched_rcu_preempt_depth(void) { return 0; }
  11692. +#endif
  11693. #else /* #ifdef CONFIG_PREEMPT_RCU */
  11694. @@ -283,6 +292,8 @@ static inline int rcu_preempt_depth(void)
  11695. return 0;
  11696. }
  11697. +#define sched_rcu_preempt_depth() rcu_preempt_depth()
  11698. +
  11699. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  11700. /* Internal to kernel */
  11701. @@ -463,7 +474,14 @@ extern struct lockdep_map rcu_callback_map;
  11702. int debug_lockdep_rcu_enabled(void);
  11703. int rcu_read_lock_held(void);
  11704. +#ifdef CONFIG_PREEMPT_RT_FULL
  11705. +static inline int rcu_read_lock_bh_held(void)
  11706. +{
  11707. + return rcu_read_lock_held();
  11708. +}
  11709. +#else
  11710. int rcu_read_lock_bh_held(void);
  11711. +#endif
  11712. /**
  11713. * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
  11714. @@ -990,10 +1008,14 @@ static inline void rcu_read_unlock(void)
  11715. static inline void rcu_read_lock_bh(void)
  11716. {
  11717. local_bh_disable();
  11718. +#ifdef CONFIG_PREEMPT_RT_FULL
  11719. + rcu_read_lock();
  11720. +#else
  11721. __acquire(RCU_BH);
  11722. rcu_lock_acquire(&rcu_bh_lock_map);
  11723. rcu_lockdep_assert(rcu_is_watching(),
  11724. "rcu_read_lock_bh() used illegally while idle");
  11725. +#endif
  11726. }
  11727. /*
  11728. @@ -1003,10 +1025,14 @@ static inline void rcu_read_lock_bh(void)
  11729. */
  11730. static inline void rcu_read_unlock_bh(void)
  11731. {
  11732. +#ifdef CONFIG_PREEMPT_RT_FULL
  11733. + rcu_read_unlock();
  11734. +#else
  11735. rcu_lockdep_assert(rcu_is_watching(),
  11736. "rcu_read_unlock_bh() used illegally while idle");
  11737. rcu_lock_release(&rcu_bh_lock_map);
  11738. __release(RCU_BH);
  11739. +#endif
  11740. local_bh_enable();
  11741. }
  11742. diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
  11743. index d2e583a6aaca..0b350893b46a 100644
  11744. --- a/include/linux/rcutree.h
  11745. +++ b/include/linux/rcutree.h
  11746. @@ -46,7 +46,11 @@ static inline void rcu_virt_note_context_switch(int cpu)
  11747. rcu_note_context_switch();
  11748. }
  11749. +#ifdef CONFIG_PREEMPT_RT_FULL
  11750. +# define synchronize_rcu_bh synchronize_rcu
  11751. +#else
  11752. void synchronize_rcu_bh(void);
  11753. +#endif
  11754. void synchronize_sched_expedited(void);
  11755. void synchronize_rcu_expedited(void);
  11756. @@ -74,7 +78,11 @@ static inline void synchronize_rcu_bh_expedited(void)
  11757. }
  11758. void rcu_barrier(void);
  11759. +#ifdef CONFIG_PREEMPT_RT_FULL
  11760. +# define rcu_barrier_bh rcu_barrier
  11761. +#else
  11762. void rcu_barrier_bh(void);
  11763. +#endif
  11764. void rcu_barrier_sched(void);
  11765. unsigned long get_state_synchronize_rcu(void);
  11766. void cond_synchronize_rcu(unsigned long oldstate);
  11767. @@ -85,12 +93,10 @@ unsigned long rcu_batches_started(void);
  11768. unsigned long rcu_batches_started_bh(void);
  11769. unsigned long rcu_batches_started_sched(void);
  11770. unsigned long rcu_batches_completed(void);
  11771. -unsigned long rcu_batches_completed_bh(void);
  11772. unsigned long rcu_batches_completed_sched(void);
  11773. void show_rcu_gp_kthreads(void);
  11774. void rcu_force_quiescent_state(void);
  11775. -void rcu_bh_force_quiescent_state(void);
  11776. void rcu_sched_force_quiescent_state(void);
  11777. void exit_rcu(void);
  11778. @@ -100,6 +106,14 @@ extern int rcu_scheduler_active __read_mostly;
  11779. bool rcu_is_watching(void);
  11780. +#ifndef CONFIG_PREEMPT_RT_FULL
  11781. +void rcu_bh_force_quiescent_state(void);
  11782. +unsigned long rcu_batches_completed_bh(void);
  11783. +#else
  11784. +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state
  11785. +# define rcu_batches_completed_bh rcu_batches_completed
  11786. +#endif
  11787. +
  11788. void rcu_all_qs(void);
  11789. #endif /* __LINUX_RCUTREE_H */
  11790. diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
  11791. index 1abba5ce2a2f..d5a04ea47a13 100644
  11792. --- a/include/linux/rtmutex.h
  11793. +++ b/include/linux/rtmutex.h
  11794. @@ -14,10 +14,14 @@
  11795. #include <linux/linkage.h>
  11796. #include <linux/rbtree.h>
  11797. -#include <linux/spinlock_types.h>
  11798. +#include <linux/spinlock_types_raw.h>
  11799. extern int max_lock_depth; /* for sysctl */
  11800. +#ifdef CONFIG_DEBUG_MUTEXES
  11801. +#include <linux/debug_locks.h>
  11802. +#endif
  11803. +
  11804. /**
  11805. * The rt_mutex structure
  11806. *
  11807. @@ -31,8 +35,8 @@ struct rt_mutex {
  11808. struct rb_root waiters;
  11809. struct rb_node *waiters_leftmost;
  11810. struct task_struct *owner;
  11811. -#ifdef CONFIG_DEBUG_RT_MUTEXES
  11812. int save_state;
  11813. +#ifdef CONFIG_DEBUG_RT_MUTEXES
  11814. const char *name, *file;
  11815. int line;
  11816. void *magic;
  11817. @@ -55,22 +59,33 @@ struct hrtimer_sleeper;
  11818. # define rt_mutex_debug_check_no_locks_held(task) do { } while (0)
  11819. #endif
  11820. +# define rt_mutex_init(mutex) \
  11821. + do { \
  11822. + raw_spin_lock_init(&(mutex)->wait_lock); \
  11823. + __rt_mutex_init(mutex, #mutex); \
  11824. + } while (0)
  11825. +
  11826. #ifdef CONFIG_DEBUG_RT_MUTEXES
  11827. # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
  11828. , .name = #mutexname, .file = __FILE__, .line = __LINE__
  11829. -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__)
  11830. extern void rt_mutex_debug_task_free(struct task_struct *tsk);
  11831. #else
  11832. # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
  11833. -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL)
  11834. # define rt_mutex_debug_task_free(t) do { } while (0)
  11835. #endif
  11836. -#define __RT_MUTEX_INITIALIZER(mutexname) \
  11837. - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  11838. +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
  11839. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  11840. , .waiters = RB_ROOT \
  11841. , .owner = NULL \
  11842. - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
  11843. + __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
  11844. +
  11845. +#define __RT_MUTEX_INITIALIZER(mutexname) \
  11846. + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
  11847. +
  11848. +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
  11849. + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
  11850. + , .save_state = 1 }
  11851. #define DEFINE_RT_MUTEX(mutexname) \
  11852. struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
  11853. @@ -91,6 +106,7 @@ extern void rt_mutex_destroy(struct rt_mutex *lock);
  11854. extern void rt_mutex_lock(struct rt_mutex *lock);
  11855. extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
  11856. +extern int rt_mutex_lock_killable(struct rt_mutex *lock);
  11857. extern int rt_mutex_timed_lock(struct rt_mutex *lock,
  11858. struct hrtimer_sleeper *timeout);
  11859. diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h
  11860. new file mode 100644
  11861. index 000000000000..49ed2d45d3be
  11862. --- /dev/null
  11863. +++ b/include/linux/rwlock_rt.h
  11864. @@ -0,0 +1,99 @@
  11865. +#ifndef __LINUX_RWLOCK_RT_H
  11866. +#define __LINUX_RWLOCK_RT_H
  11867. +
  11868. +#ifndef __LINUX_SPINLOCK_H
  11869. +#error Do not include directly. Use spinlock.h
  11870. +#endif
  11871. +
  11872. +#define rwlock_init(rwl) \
  11873. +do { \
  11874. + static struct lock_class_key __key; \
  11875. + \
  11876. + rt_mutex_init(&(rwl)->lock); \
  11877. + __rt_rwlock_init(rwl, #rwl, &__key); \
  11878. +} while (0)
  11879. +
  11880. +extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
  11881. +extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
  11882. +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
  11883. +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags);
  11884. +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
  11885. +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
  11886. +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
  11887. +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock);
  11888. +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock);
  11889. +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
  11890. +
  11891. +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock))
  11892. +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock))
  11893. +
  11894. +#define write_trylock_irqsave(lock, flags) \
  11895. + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags))
  11896. +
  11897. +#define read_lock_irqsave(lock, flags) \
  11898. + do { \
  11899. + typecheck(unsigned long, flags); \
  11900. + flags = rt_read_lock_irqsave(lock); \
  11901. + } while (0)
  11902. +
  11903. +#define write_lock_irqsave(lock, flags) \
  11904. + do { \
  11905. + typecheck(unsigned long, flags); \
  11906. + flags = rt_write_lock_irqsave(lock); \
  11907. + } while (0)
  11908. +
  11909. +#define read_lock(lock) rt_read_lock(lock)
  11910. +
  11911. +#define read_lock_bh(lock) \
  11912. + do { \
  11913. + local_bh_disable(); \
  11914. + rt_read_lock(lock); \
  11915. + } while (0)
  11916. +
  11917. +#define read_lock_irq(lock) read_lock(lock)
  11918. +
  11919. +#define write_lock(lock) rt_write_lock(lock)
  11920. +
  11921. +#define write_lock_bh(lock) \
  11922. + do { \
  11923. + local_bh_disable(); \
  11924. + rt_write_lock(lock); \
  11925. + } while (0)
  11926. +
  11927. +#define write_lock_irq(lock) write_lock(lock)
  11928. +
  11929. +#define read_unlock(lock) rt_read_unlock(lock)
  11930. +
  11931. +#define read_unlock_bh(lock) \
  11932. + do { \
  11933. + rt_read_unlock(lock); \
  11934. + local_bh_enable(); \
  11935. + } while (0)
  11936. +
  11937. +#define read_unlock_irq(lock) read_unlock(lock)
  11938. +
  11939. +#define write_unlock(lock) rt_write_unlock(lock)
  11940. +
  11941. +#define write_unlock_bh(lock) \
  11942. + do { \
  11943. + rt_write_unlock(lock); \
  11944. + local_bh_enable(); \
  11945. + } while (0)
  11946. +
  11947. +#define write_unlock_irq(lock) write_unlock(lock)
  11948. +
  11949. +#define read_unlock_irqrestore(lock, flags) \
  11950. + do { \
  11951. + typecheck(unsigned long, flags); \
  11952. + (void) flags; \
  11953. + rt_read_unlock(lock); \
  11954. + } while (0)
  11955. +
  11956. +#define write_unlock_irqrestore(lock, flags) \
  11957. + do { \
  11958. + typecheck(unsigned long, flags); \
  11959. + (void) flags; \
  11960. + rt_write_unlock(lock); \
  11961. + } while (0)
  11962. +
  11963. +#endif
  11964. diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
  11965. index cc0072e93e36..d0da966ad7a0 100644
  11966. --- a/include/linux/rwlock_types.h
  11967. +++ b/include/linux/rwlock_types.h
  11968. @@ -1,6 +1,10 @@
  11969. #ifndef __LINUX_RWLOCK_TYPES_H
  11970. #define __LINUX_RWLOCK_TYPES_H
  11971. +#if !defined(__LINUX_SPINLOCK_TYPES_H)
  11972. +# error "Do not include directly, include spinlock_types.h"
  11973. +#endif
  11974. +
  11975. /*
  11976. * include/linux/rwlock_types.h - generic rwlock type definitions
  11977. * and initializers
  11978. @@ -43,6 +47,7 @@ typedef struct {
  11979. RW_DEP_MAP_INIT(lockname) }
  11980. #endif
  11981. -#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
  11982. +#define DEFINE_RWLOCK(name) \
  11983. + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
  11984. #endif /* __LINUX_RWLOCK_TYPES_H */
  11985. diff --git a/include/linux/rwlock_types_rt.h b/include/linux/rwlock_types_rt.h
  11986. new file mode 100644
  11987. index 000000000000..b13832119591
  11988. --- /dev/null
  11989. +++ b/include/linux/rwlock_types_rt.h
  11990. @@ -0,0 +1,33 @@
  11991. +#ifndef __LINUX_RWLOCK_TYPES_RT_H
  11992. +#define __LINUX_RWLOCK_TYPES_RT_H
  11993. +
  11994. +#ifndef __LINUX_SPINLOCK_TYPES_H
  11995. +#error "Do not include directly. Include spinlock_types.h instead"
  11996. +#endif
  11997. +
  11998. +/*
  11999. + * rwlocks - rtmutex which allows single reader recursion
  12000. + */
  12001. +typedef struct {
  12002. + struct rt_mutex lock;
  12003. + int read_depth;
  12004. + unsigned int break_lock;
  12005. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12006. + struct lockdep_map dep_map;
  12007. +#endif
  12008. +} rwlock_t;
  12009. +
  12010. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12011. +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  12012. +#else
  12013. +# define RW_DEP_MAP_INIT(lockname)
  12014. +#endif
  12015. +
  12016. +#define __RW_LOCK_UNLOCKED(name) \
  12017. + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \
  12018. + RW_DEP_MAP_INIT(name) }
  12019. +
  12020. +#define DEFINE_RWLOCK(name) \
  12021. + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
  12022. +
  12023. +#endif
  12024. diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
  12025. index 8f498cdde280..2b2148431f14 100644
  12026. --- a/include/linux/rwsem.h
  12027. +++ b/include/linux/rwsem.h
  12028. @@ -18,6 +18,10 @@
  12029. #include <linux/osq_lock.h>
  12030. #endif
  12031. +#ifdef CONFIG_PREEMPT_RT_FULL
  12032. +#include <linux/rwsem_rt.h>
  12033. +#else /* PREEMPT_RT_FULL */
  12034. +
  12035. struct rw_semaphore;
  12036. #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
  12037. @@ -177,4 +181,6 @@ extern void up_read_non_owner(struct rw_semaphore *sem);
  12038. # define up_read_non_owner(sem) up_read(sem)
  12039. #endif
  12040. +#endif /* !PREEMPT_RT_FULL */
  12041. +
  12042. #endif /* _LINUX_RWSEM_H */
  12043. diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h
  12044. new file mode 100644
  12045. index 000000000000..928a05cbf94f
  12046. --- /dev/null
  12047. +++ b/include/linux/rwsem_rt.h
  12048. @@ -0,0 +1,140 @@
  12049. +#ifndef _LINUX_RWSEM_RT_H
  12050. +#define _LINUX_RWSEM_RT_H
  12051. +
  12052. +#ifndef _LINUX_RWSEM_H
  12053. +#error "Include rwsem.h"
  12054. +#endif
  12055. +
  12056. +/*
  12057. + * RW-semaphores are a spinlock plus a reader-depth count.
  12058. + *
  12059. + * Note that the semantics are different from the usual
  12060. + * Linux rw-sems, in PREEMPT_RT mode we do not allow
  12061. + * multiple readers to hold the lock at once, we only allow
  12062. + * a read-lock owner to read-lock recursively. This is
  12063. + * better for latency, makes the implementation inherently
  12064. + * fair and makes it simpler as well.
  12065. + */
  12066. +
  12067. +#include <linux/rtmutex.h>
  12068. +
  12069. +struct rw_semaphore {
  12070. + struct rt_mutex lock;
  12071. + int read_depth;
  12072. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12073. + struct lockdep_map dep_map;
  12074. +#endif
  12075. +};
  12076. +
  12077. +#define __RWSEM_INITIALIZER(name) \
  12078. + { .lock = __RT_MUTEX_INITIALIZER(name.lock), \
  12079. + RW_DEP_MAP_INIT(name) }
  12080. +
  12081. +#define DECLARE_RWSEM(lockname) \
  12082. + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
  12083. +
  12084. +extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
  12085. + struct lock_class_key *key);
  12086. +
  12087. +#define __rt_init_rwsem(sem, name, key) \
  12088. + do { \
  12089. + rt_mutex_init(&(sem)->lock); \
  12090. + __rt_rwsem_init((sem), (name), (key));\
  12091. + } while (0)
  12092. +
  12093. +#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key)
  12094. +
  12095. +# define rt_init_rwsem(sem) \
  12096. +do { \
  12097. + static struct lock_class_key __key; \
  12098. + \
  12099. + __rt_init_rwsem((sem), #sem, &__key); \
  12100. +} while (0)
  12101. +
  12102. +extern void rt_down_write(struct rw_semaphore *rwsem);
  12103. +extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass);
  12104. +extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass);
  12105. +extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
  12106. + struct lockdep_map *nest);
  12107. +extern void rt_down_read(struct rw_semaphore *rwsem);
  12108. +extern int rt_down_write_trylock(struct rw_semaphore *rwsem);
  12109. +extern int rt_down_read_trylock(struct rw_semaphore *rwsem);
  12110. +extern void __rt_up_read(struct rw_semaphore *rwsem);
  12111. +extern void rt_up_read(struct rw_semaphore *rwsem);
  12112. +extern void rt_up_write(struct rw_semaphore *rwsem);
  12113. +extern void rt_downgrade_write(struct rw_semaphore *rwsem);
  12114. +
  12115. +#define init_rwsem(sem) rt_init_rwsem(sem)
  12116. +#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock)
  12117. +
  12118. +static inline int rwsem_is_contended(struct rw_semaphore *sem)
  12119. +{
  12120. + /* rt_mutex_has_waiters() */
  12121. + return !RB_EMPTY_ROOT(&sem->lock.waiters);
  12122. +}
  12123. +
  12124. +static inline void down_read(struct rw_semaphore *sem)
  12125. +{
  12126. + rt_down_read(sem);
  12127. +}
  12128. +
  12129. +static inline int down_read_trylock(struct rw_semaphore *sem)
  12130. +{
  12131. + return rt_down_read_trylock(sem);
  12132. +}
  12133. +
  12134. +static inline void down_write(struct rw_semaphore *sem)
  12135. +{
  12136. + rt_down_write(sem);
  12137. +}
  12138. +
  12139. +static inline int down_write_trylock(struct rw_semaphore *sem)
  12140. +{
  12141. + return rt_down_write_trylock(sem);
  12142. +}
  12143. +
  12144. +static inline void __up_read(struct rw_semaphore *sem)
  12145. +{
  12146. + __rt_up_read(sem);
  12147. +}
  12148. +
  12149. +static inline void up_read(struct rw_semaphore *sem)
  12150. +{
  12151. + rt_up_read(sem);
  12152. +}
  12153. +
  12154. +static inline void up_write(struct rw_semaphore *sem)
  12155. +{
  12156. + rt_up_write(sem);
  12157. +}
  12158. +
  12159. +static inline void downgrade_write(struct rw_semaphore *sem)
  12160. +{
  12161. + rt_downgrade_write(sem);
  12162. +}
  12163. +
  12164. +static inline void down_read_nested(struct rw_semaphore *sem, int subclass)
  12165. +{
  12166. + return rt_down_read_nested(sem, subclass);
  12167. +}
  12168. +
  12169. +static inline void down_write_nested(struct rw_semaphore *sem, int subclass)
  12170. +{
  12171. + rt_down_write_nested(sem, subclass);
  12172. +}
  12173. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12174. +static inline void down_write_nest_lock(struct rw_semaphore *sem,
  12175. + struct rw_semaphore *nest_lock)
  12176. +{
  12177. + rt_down_write_nested_lock(sem, &nest_lock->dep_map);
  12178. +}
  12179. +
  12180. +#else
  12181. +
  12182. +static inline void down_write_nest_lock(struct rw_semaphore *sem,
  12183. + struct rw_semaphore *nest_lock)
  12184. +{
  12185. + rt_down_write_nested_lock(sem, NULL);
  12186. +}
  12187. +#endif
  12188. +#endif
  12189. diff --git a/include/linux/sched.h b/include/linux/sched.h
  12190. index 9e39deaeddd6..769f2cf30963 100644
  12191. --- a/include/linux/sched.h
  12192. +++ b/include/linux/sched.h
  12193. @@ -26,6 +26,7 @@ struct sched_param {
  12194. #include <linux/nodemask.h>
  12195. #include <linux/mm_types.h>
  12196. #include <linux/preempt_mask.h>
  12197. +#include <asm/kmap_types.h>
  12198. #include <asm/page.h>
  12199. #include <asm/ptrace.h>
  12200. @@ -175,8 +176,6 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
  12201. extern void calc_global_load(unsigned long ticks);
  12202. extern void update_cpu_load_nohz(void);
  12203. -extern unsigned long get_parent_ip(unsigned long addr);
  12204. -
  12205. extern void dump_cpu_task(int cpu);
  12206. struct seq_file;
  12207. @@ -234,10 +233,7 @@ extern char ___assert_task_state[1 - 2*!!(
  12208. TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
  12209. __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
  12210. -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
  12211. #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
  12212. -#define task_is_stopped_or_traced(task) \
  12213. - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
  12214. #define task_contributes_to_load(task) \
  12215. ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
  12216. (task->flags & PF_FROZEN) == 0)
  12217. @@ -302,6 +298,11 @@ extern char ___assert_task_state[1 - 2*!!(
  12218. #endif
  12219. +#define __set_current_state_no_track(state_value) \
  12220. + do { current->state = (state_value); } while (0)
  12221. +#define set_current_state_no_track(state_value) \
  12222. + set_mb(current->state, (state_value))
  12223. +
  12224. /* Task command name length */
  12225. #define TASK_COMM_LEN 16
  12226. @@ -902,6 +903,50 @@ enum cpu_idle_type {
  12227. #define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
  12228. /*
  12229. + * Wake-queues are lists of tasks with a pending wakeup, whose
  12230. + * callers have already marked the task as woken internally,
  12231. + * and can thus carry on. A common use case is being able to
  12232. + * do the wakeups once the corresponding user lock as been
  12233. + * released.
  12234. + *
  12235. + * We hold reference to each task in the list across the wakeup,
  12236. + * thus guaranteeing that the memory is still valid by the time
  12237. + * the actual wakeups are performed in wake_up_q().
  12238. + *
  12239. + * One per task suffices, because there's never a need for a task to be
  12240. + * in two wake queues simultaneously; it is forbidden to abandon a task
  12241. + * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
  12242. + * already in a wake queue, the wakeup will happen soon and the second
  12243. + * waker can just skip it.
  12244. + *
  12245. + * The WAKE_Q macro declares and initializes the list head.
  12246. + * wake_up_q() does NOT reinitialize the list; it's expected to be
  12247. + * called near the end of a function, where the fact that the queue is
  12248. + * not used again will be easy to see by inspection.
  12249. + *
  12250. + * Note that this can cause spurious wakeups. schedule() callers
  12251. + * must ensure the call is done inside a loop, confirming that the
  12252. + * wakeup condition has in fact occurred.
  12253. + */
  12254. +struct wake_q_node {
  12255. + struct wake_q_node *next;
  12256. +};
  12257. +
  12258. +struct wake_q_head {
  12259. + struct wake_q_node *first;
  12260. + struct wake_q_node **lastp;
  12261. +};
  12262. +
  12263. +#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
  12264. +
  12265. +#define WAKE_Q(name) \
  12266. + struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
  12267. +
  12268. +extern void wake_q_add(struct wake_q_head *head,
  12269. + struct task_struct *task);
  12270. +extern void wake_up_q(struct wake_q_head *head);
  12271. +
  12272. +/*
  12273. * sched-domains (multiprocessor balancing) declarations:
  12274. */
  12275. #ifdef CONFIG_SMP
  12276. @@ -1293,6 +1338,7 @@ enum perf_event_task_context {
  12277. struct task_struct {
  12278. volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
  12279. + volatile long saved_state; /* saved state for "spinlock sleepers" */
  12280. void *stack;
  12281. atomic_t usage;
  12282. unsigned int flags; /* per process flags, defined below */
  12283. @@ -1329,6 +1375,12 @@ struct task_struct {
  12284. #endif
  12285. unsigned int policy;
  12286. +#ifdef CONFIG_PREEMPT_RT_FULL
  12287. + int migrate_disable;
  12288. +# ifdef CONFIG_SCHED_DEBUG
  12289. + int migrate_disable_atomic;
  12290. +# endif
  12291. +#endif
  12292. int nr_cpus_allowed;
  12293. cpumask_t cpus_allowed;
  12294. @@ -1436,7 +1488,8 @@ struct task_struct {
  12295. struct cputime prev_cputime;
  12296. #endif
  12297. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  12298. - seqlock_t vtime_seqlock;
  12299. + raw_spinlock_t vtime_lock;
  12300. + seqcount_t vtime_seq;
  12301. unsigned long long vtime_snap;
  12302. enum {
  12303. VTIME_SLEEPING = 0,
  12304. @@ -1452,6 +1505,9 @@ struct task_struct {
  12305. struct task_cputime cputime_expires;
  12306. struct list_head cpu_timers[3];
  12307. +#ifdef CONFIG_PREEMPT_RT_BASE
  12308. + struct task_struct *posix_timer_list;
  12309. +#endif
  12310. /* process credentials */
  12311. const struct cred __rcu *real_cred; /* objective and real subjective task
  12312. @@ -1484,10 +1540,15 @@ struct task_struct {
  12313. /* signal handlers */
  12314. struct signal_struct *signal;
  12315. struct sighand_struct *sighand;
  12316. + struct sigqueue *sigqueue_cache;
  12317. sigset_t blocked, real_blocked;
  12318. sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
  12319. struct sigpending pending;
  12320. +#ifdef CONFIG_PREEMPT_RT_FULL
  12321. + /* TODO: move me into ->restart_block ? */
  12322. + struct siginfo forced_info;
  12323. +#endif
  12324. unsigned long sas_ss_sp;
  12325. size_t sas_ss_size;
  12326. @@ -1513,6 +1574,8 @@ struct task_struct {
  12327. /* Protection of the PI data structures: */
  12328. raw_spinlock_t pi_lock;
  12329. + struct wake_q_node wake_q;
  12330. +
  12331. #ifdef CONFIG_RT_MUTEXES
  12332. /* PI waiters blocked on a rt_mutex held by this task */
  12333. struct rb_root pi_waiters;
  12334. @@ -1707,6 +1770,12 @@ struct task_struct {
  12335. unsigned long trace;
  12336. /* bitmask and counter of trace recursion */
  12337. unsigned long trace_recursion;
  12338. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  12339. + u64 preempt_timestamp_hist;
  12340. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  12341. + long timer_offset;
  12342. +#endif
  12343. +#endif
  12344. #endif /* CONFIG_TRACING */
  12345. #ifdef CONFIG_MEMCG
  12346. struct memcg_oom_info {
  12347. @@ -1723,14 +1792,26 @@ struct task_struct {
  12348. unsigned int sequential_io;
  12349. unsigned int sequential_io_avg;
  12350. #endif
  12351. +#ifdef CONFIG_PREEMPT_RT_BASE
  12352. + struct rcu_head put_rcu;
  12353. + int softirq_nestcnt;
  12354. + unsigned int softirqs_raised;
  12355. +#endif
  12356. +#ifdef CONFIG_PREEMPT_RT_FULL
  12357. +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
  12358. + int kmap_idx;
  12359. + pte_t kmap_pte[KM_TYPE_NR];
  12360. +# endif
  12361. +#endif
  12362. #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  12363. unsigned long task_state_change;
  12364. #endif
  12365. +#ifdef CONFIG_PREEMPT_RT_FULL
  12366. + int xmit_recursion;
  12367. +#endif
  12368. + int pagefault_disabled;
  12369. };
  12370. -/* Future-safe accessor for struct task_struct's cpus_allowed. */
  12371. -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
  12372. -
  12373. #define TNF_MIGRATED 0x01
  12374. #define TNF_NO_GROUP 0x02
  12375. #define TNF_SHARED 0x04
  12376. @@ -1919,6 +2000,15 @@ extern struct pid *cad_pid;
  12377. extern void free_task(struct task_struct *tsk);
  12378. #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
  12379. +#ifdef CONFIG_PREEMPT_RT_BASE
  12380. +extern void __put_task_struct_cb(struct rcu_head *rhp);
  12381. +
  12382. +static inline void put_task_struct(struct task_struct *t)
  12383. +{
  12384. + if (atomic_dec_and_test(&t->usage))
  12385. + call_rcu(&t->put_rcu, __put_task_struct_cb);
  12386. +}
  12387. +#else
  12388. extern void __put_task_struct(struct task_struct *t);
  12389. static inline void put_task_struct(struct task_struct *t)
  12390. @@ -1926,6 +2016,7 @@ static inline void put_task_struct(struct task_struct *t)
  12391. if (atomic_dec_and_test(&t->usage))
  12392. __put_task_struct(t);
  12393. }
  12394. +#endif
  12395. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  12396. extern void task_cputime(struct task_struct *t,
  12397. @@ -1964,6 +2055,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
  12398. /*
  12399. * Per process flags
  12400. */
  12401. +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */
  12402. #define PF_EXITING 0x00000004 /* getting shut down */
  12403. #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
  12404. #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
  12405. @@ -2128,6 +2220,10 @@ extern void do_set_cpus_allowed(struct task_struct *p,
  12406. extern int set_cpus_allowed_ptr(struct task_struct *p,
  12407. const struct cpumask *new_mask);
  12408. +int migrate_me(void);
  12409. +void tell_sched_cpu_down_begin(int cpu);
  12410. +void tell_sched_cpu_down_done(int cpu);
  12411. +
  12412. #else
  12413. static inline void do_set_cpus_allowed(struct task_struct *p,
  12414. const struct cpumask *new_mask)
  12415. @@ -2140,6 +2236,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
  12416. return -EINVAL;
  12417. return 0;
  12418. }
  12419. +static inline int migrate_me(void) { return 0; }
  12420. +static inline void tell_sched_cpu_down_begin(int cpu) { }
  12421. +static inline void tell_sched_cpu_down_done(int cpu) { }
  12422. #endif
  12423. #ifdef CONFIG_NO_HZ_COMMON
  12424. @@ -2356,6 +2455,7 @@ extern void xtime_update(unsigned long ticks);
  12425. extern int wake_up_state(struct task_struct *tsk, unsigned int state);
  12426. extern int wake_up_process(struct task_struct *tsk);
  12427. +extern int wake_up_lock_sleeper(struct task_struct * tsk);
  12428. extern void wake_up_new_task(struct task_struct *tsk);
  12429. #ifdef CONFIG_SMP
  12430. extern void kick_process(struct task_struct *tsk);
  12431. @@ -2472,12 +2572,24 @@ extern struct mm_struct * mm_alloc(void);
  12432. /* mmdrop drops the mm and the page tables */
  12433. extern void __mmdrop(struct mm_struct *);
  12434. +
  12435. static inline void mmdrop(struct mm_struct * mm)
  12436. {
  12437. if (unlikely(atomic_dec_and_test(&mm->mm_count)))
  12438. __mmdrop(mm);
  12439. }
  12440. +#ifdef CONFIG_PREEMPT_RT_BASE
  12441. +extern void __mmdrop_delayed(struct rcu_head *rhp);
  12442. +static inline void mmdrop_delayed(struct mm_struct *mm)
  12443. +{
  12444. + if (atomic_dec_and_test(&mm->mm_count))
  12445. + call_rcu(&mm->delayed_drop, __mmdrop_delayed);
  12446. +}
  12447. +#else
  12448. +# define mmdrop_delayed(mm) mmdrop(mm)
  12449. +#endif
  12450. +
  12451. /* mmput gets rid of the mappings and all user-space */
  12452. extern void mmput(struct mm_struct *);
  12453. /* Grab a reference to a task's mm, if it is not already going away */
  12454. @@ -2789,6 +2901,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
  12455. return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
  12456. }
  12457. +#ifdef CONFIG_PREEMPT_LAZY
  12458. +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
  12459. +{
  12460. + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
  12461. +}
  12462. +
  12463. +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
  12464. +{
  12465. + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
  12466. +}
  12467. +
  12468. +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
  12469. +{
  12470. + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
  12471. +}
  12472. +
  12473. +static inline int need_resched_lazy(void)
  12474. +{
  12475. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  12476. +}
  12477. +
  12478. +static inline int need_resched_now(void)
  12479. +{
  12480. + return test_thread_flag(TIF_NEED_RESCHED);
  12481. +}
  12482. +
  12483. +#else
  12484. +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
  12485. +static inline int need_resched_lazy(void) { return 0; }
  12486. +
  12487. +static inline int need_resched_now(void)
  12488. +{
  12489. + return test_thread_flag(TIF_NEED_RESCHED);
  12490. +}
  12491. +
  12492. +#endif
  12493. +
  12494. static inline int restart_syscall(void)
  12495. {
  12496. set_tsk_thread_flag(current, TIF_SIGPENDING);
  12497. @@ -2820,6 +2969,51 @@ static inline int signal_pending_state(long state, struct task_struct *p)
  12498. return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
  12499. }
  12500. +static inline bool __task_is_stopped_or_traced(struct task_struct *task)
  12501. +{
  12502. + if (task->state & (__TASK_STOPPED | __TASK_TRACED))
  12503. + return true;
  12504. +#ifdef CONFIG_PREEMPT_RT_FULL
  12505. + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
  12506. + return true;
  12507. +#endif
  12508. + return false;
  12509. +}
  12510. +
  12511. +static inline bool task_is_stopped_or_traced(struct task_struct *task)
  12512. +{
  12513. + bool traced_stopped;
  12514. +
  12515. +#ifdef CONFIG_PREEMPT_RT_FULL
  12516. + unsigned long flags;
  12517. +
  12518. + raw_spin_lock_irqsave(&task->pi_lock, flags);
  12519. + traced_stopped = __task_is_stopped_or_traced(task);
  12520. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  12521. +#else
  12522. + traced_stopped = __task_is_stopped_or_traced(task);
  12523. +#endif
  12524. + return traced_stopped;
  12525. +}
  12526. +
  12527. +static inline bool task_is_traced(struct task_struct *task)
  12528. +{
  12529. + bool traced = false;
  12530. +
  12531. + if (task->state & __TASK_TRACED)
  12532. + return true;
  12533. +#ifdef CONFIG_PREEMPT_RT_FULL
  12534. + /* in case the task is sleeping on tasklist_lock */
  12535. + raw_spin_lock_irq(&task->pi_lock);
  12536. + if (task->state & __TASK_TRACED)
  12537. + traced = true;
  12538. + else if (task->saved_state & __TASK_TRACED)
  12539. + traced = true;
  12540. + raw_spin_unlock_irq(&task->pi_lock);
  12541. +#endif
  12542. + return traced;
  12543. +}
  12544. +
  12545. /*
  12546. * cond_resched() and cond_resched_lock(): latency reduction via
  12547. * explicit rescheduling in places that are safe. The return
  12548. @@ -2841,12 +3035,16 @@ extern int __cond_resched_lock(spinlock_t *lock);
  12549. __cond_resched_lock(lock); \
  12550. })
  12551. +#ifndef CONFIG_PREEMPT_RT_FULL
  12552. extern int __cond_resched_softirq(void);
  12553. #define cond_resched_softirq() ({ \
  12554. ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
  12555. __cond_resched_softirq(); \
  12556. })
  12557. +#else
  12558. +# define cond_resched_softirq() cond_resched()
  12559. +#endif
  12560. static inline void cond_resched_rcu(void)
  12561. {
  12562. @@ -3013,6 +3211,26 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
  12563. #endif /* CONFIG_SMP */
  12564. +static inline int __migrate_disabled(struct task_struct *p)
  12565. +{
  12566. +#ifdef CONFIG_PREEMPT_RT_FULL
  12567. + return p->migrate_disable;
  12568. +#else
  12569. + return 0;
  12570. +#endif
  12571. +}
  12572. +
  12573. +/* Future-safe accessor for struct task_struct's cpus_allowed. */
  12574. +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p)
  12575. +{
  12576. +#ifdef CONFIG_PREEMPT_RT_FULL
  12577. + if (p->migrate_disable)
  12578. + return cpumask_of(task_cpu(p));
  12579. +#endif
  12580. +
  12581. + return &p->cpus_allowed;
  12582. +}
  12583. +
  12584. extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
  12585. extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
  12586. diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
  12587. index c07e3a536099..381bf3999617 100644
  12588. --- a/include/linux/seqlock.h
  12589. +++ b/include/linux/seqlock.h
  12590. @@ -219,20 +219,30 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
  12591. return __read_seqcount_retry(s, start);
  12592. }
  12593. -
  12594. -
  12595. -static inline void raw_write_seqcount_begin(seqcount_t *s)
  12596. +static inline void __raw_write_seqcount_begin(seqcount_t *s)
  12597. {
  12598. s->sequence++;
  12599. smp_wmb();
  12600. }
  12601. -static inline void raw_write_seqcount_end(seqcount_t *s)
  12602. +static inline void raw_write_seqcount_begin(seqcount_t *s)
  12603. +{
  12604. + preempt_disable_rt();
  12605. + __raw_write_seqcount_begin(s);
  12606. +}
  12607. +
  12608. +static inline void __raw_write_seqcount_end(seqcount_t *s)
  12609. {
  12610. smp_wmb();
  12611. s->sequence++;
  12612. }
  12613. +static inline void raw_write_seqcount_end(seqcount_t *s)
  12614. +{
  12615. + __raw_write_seqcount_end(s);
  12616. + preempt_enable_rt();
  12617. +}
  12618. +
  12619. /*
  12620. * raw_write_seqcount_latch - redirect readers to even/odd copy
  12621. * @s: pointer to seqcount_t
  12622. @@ -305,10 +315,32 @@ typedef struct {
  12623. /*
  12624. * Read side functions for starting and finalizing a read side section.
  12625. */
  12626. +#ifndef CONFIG_PREEMPT_RT_FULL
  12627. static inline unsigned read_seqbegin(const seqlock_t *sl)
  12628. {
  12629. return read_seqcount_begin(&sl->seqcount);
  12630. }
  12631. +#else
  12632. +/*
  12633. + * Starvation safe read side for RT
  12634. + */
  12635. +static inline unsigned read_seqbegin(seqlock_t *sl)
  12636. +{
  12637. + unsigned ret;
  12638. +
  12639. +repeat:
  12640. + ret = ACCESS_ONCE(sl->seqcount.sequence);
  12641. + if (unlikely(ret & 1)) {
  12642. + /*
  12643. + * Take the lock and let the writer proceed (i.e. evtl
  12644. + * boost it), otherwise we could loop here forever.
  12645. + */
  12646. + spin_unlock_wait(&sl->lock);
  12647. + goto repeat;
  12648. + }
  12649. + return ret;
  12650. +}
  12651. +#endif
  12652. static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
  12653. {
  12654. @@ -323,36 +355,36 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
  12655. static inline void write_seqlock(seqlock_t *sl)
  12656. {
  12657. spin_lock(&sl->lock);
  12658. - write_seqcount_begin(&sl->seqcount);
  12659. + __raw_write_seqcount_begin(&sl->seqcount);
  12660. }
  12661. static inline void write_sequnlock(seqlock_t *sl)
  12662. {
  12663. - write_seqcount_end(&sl->seqcount);
  12664. + __raw_write_seqcount_end(&sl->seqcount);
  12665. spin_unlock(&sl->lock);
  12666. }
  12667. static inline void write_seqlock_bh(seqlock_t *sl)
  12668. {
  12669. spin_lock_bh(&sl->lock);
  12670. - write_seqcount_begin(&sl->seqcount);
  12671. + __raw_write_seqcount_begin(&sl->seqcount);
  12672. }
  12673. static inline void write_sequnlock_bh(seqlock_t *sl)
  12674. {
  12675. - write_seqcount_end(&sl->seqcount);
  12676. + __raw_write_seqcount_end(&sl->seqcount);
  12677. spin_unlock_bh(&sl->lock);
  12678. }
  12679. static inline void write_seqlock_irq(seqlock_t *sl)
  12680. {
  12681. spin_lock_irq(&sl->lock);
  12682. - write_seqcount_begin(&sl->seqcount);
  12683. + __raw_write_seqcount_begin(&sl->seqcount);
  12684. }
  12685. static inline void write_sequnlock_irq(seqlock_t *sl)
  12686. {
  12687. - write_seqcount_end(&sl->seqcount);
  12688. + __raw_write_seqcount_end(&sl->seqcount);
  12689. spin_unlock_irq(&sl->lock);
  12690. }
  12691. @@ -361,7 +393,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
  12692. unsigned long flags;
  12693. spin_lock_irqsave(&sl->lock, flags);
  12694. - write_seqcount_begin(&sl->seqcount);
  12695. + __raw_write_seqcount_begin(&sl->seqcount);
  12696. return flags;
  12697. }
  12698. @@ -371,7 +403,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
  12699. static inline void
  12700. write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
  12701. {
  12702. - write_seqcount_end(&sl->seqcount);
  12703. + __raw_write_seqcount_end(&sl->seqcount);
  12704. spin_unlock_irqrestore(&sl->lock, flags);
  12705. }
  12706. diff --git a/include/linux/signal.h b/include/linux/signal.h
  12707. index 883ceb1439fa..6da98d067bad 100644
  12708. --- a/include/linux/signal.h
  12709. +++ b/include/linux/signal.h
  12710. @@ -233,6 +233,7 @@ static inline void init_sigpending(struct sigpending *sig)
  12711. }
  12712. extern void flush_sigqueue(struct sigpending *queue);
  12713. +extern void flush_task_sigqueue(struct task_struct *tsk);
  12714. /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
  12715. static inline int valid_signal(unsigned long sig)
  12716. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
  12717. index ca2e26a486ee..ea41a11d3bc7 100644
  12718. --- a/include/linux/skbuff.h
  12719. +++ b/include/linux/skbuff.h
  12720. @@ -187,6 +187,7 @@ struct sk_buff_head {
  12721. __u32 qlen;
  12722. spinlock_t lock;
  12723. + raw_spinlock_t raw_lock;
  12724. };
  12725. struct sk_buff;
  12726. @@ -1337,6 +1338,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
  12727. __skb_queue_head_init(list);
  12728. }
  12729. +static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
  12730. +{
  12731. + raw_spin_lock_init(&list->raw_lock);
  12732. + __skb_queue_head_init(list);
  12733. +}
  12734. +
  12735. static inline void skb_queue_head_init_class(struct sk_buff_head *list,
  12736. struct lock_class_key *class)
  12737. {
  12738. diff --git a/include/linux/smp.h b/include/linux/smp.h
  12739. index c4414074bd88..e6ab36aeaaab 100644
  12740. --- a/include/linux/smp.h
  12741. +++ b/include/linux/smp.h
  12742. @@ -185,6 +185,9 @@ static inline void smp_init(void) { }
  12743. #define get_cpu() ({ preempt_disable(); smp_processor_id(); })
  12744. #define put_cpu() preempt_enable()
  12745. +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
  12746. +#define put_cpu_light() migrate_enable()
  12747. +
  12748. /*
  12749. * Callback to arch code if there's nosmp or maxcpus=0 on the
  12750. * boot command line:
  12751. diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
  12752. index 3e18379dfa6f..28f4366fd495 100644
  12753. --- a/include/linux/spinlock.h
  12754. +++ b/include/linux/spinlock.h
  12755. @@ -281,7 +281,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
  12756. #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock))
  12757. /* Include rwlock functions */
  12758. -#include <linux/rwlock.h>
  12759. +#ifdef CONFIG_PREEMPT_RT_FULL
  12760. +# include <linux/rwlock_rt.h>
  12761. +#else
  12762. +# include <linux/rwlock.h>
  12763. +#endif
  12764. /*
  12765. * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
  12766. @@ -292,6 +296,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
  12767. # include <linux/spinlock_api_up.h>
  12768. #endif
  12769. +#ifdef CONFIG_PREEMPT_RT_FULL
  12770. +# include <linux/spinlock_rt.h>
  12771. +#else /* PREEMPT_RT_FULL */
  12772. +
  12773. /*
  12774. * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
  12775. */
  12776. @@ -426,4 +434,6 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
  12777. #define atomic_dec_and_lock(atomic, lock) \
  12778. __cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
  12779. +#endif /* !PREEMPT_RT_FULL */
  12780. +
  12781. #endif /* __LINUX_SPINLOCK_H */
  12782. diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
  12783. index 5344268e6e62..043263f30e81 100644
  12784. --- a/include/linux/spinlock_api_smp.h
  12785. +++ b/include/linux/spinlock_api_smp.h
  12786. @@ -189,6 +189,8 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock)
  12787. return 0;
  12788. }
  12789. -#include <linux/rwlock_api_smp.h>
  12790. +#ifndef CONFIG_PREEMPT_RT_FULL
  12791. +# include <linux/rwlock_api_smp.h>
  12792. +#endif
  12793. #endif /* __LINUX_SPINLOCK_API_SMP_H */
  12794. diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
  12795. new file mode 100644
  12796. index 000000000000..f757096b230c
  12797. --- /dev/null
  12798. +++ b/include/linux/spinlock_rt.h
  12799. @@ -0,0 +1,174 @@
  12800. +#ifndef __LINUX_SPINLOCK_RT_H
  12801. +#define __LINUX_SPINLOCK_RT_H
  12802. +
  12803. +#ifndef __LINUX_SPINLOCK_H
  12804. +#error Do not include directly. Use spinlock.h
  12805. +#endif
  12806. +
  12807. +#include <linux/bug.h>
  12808. +
  12809. +extern void
  12810. +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key);
  12811. +
  12812. +#define spin_lock_init(slock) \
  12813. +do { \
  12814. + static struct lock_class_key __key; \
  12815. + \
  12816. + rt_mutex_init(&(slock)->lock); \
  12817. + __rt_spin_lock_init(slock, #slock, &__key); \
  12818. +} while (0)
  12819. +
  12820. +extern void __lockfunc rt_spin_lock(spinlock_t *lock);
  12821. +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
  12822. +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
  12823. +extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
  12824. +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
  12825. +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
  12826. +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
  12827. +extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
  12828. +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock);
  12829. +
  12830. +/*
  12831. + * lockdep-less calls, for derived types like rwlock:
  12832. + * (for trylock they can use rt_mutex_trylock() directly.
  12833. + */
  12834. +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
  12835. +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
  12836. +extern int __lockfunc __rt_spin_trylock(struct rt_mutex *lock);
  12837. +
  12838. +#define spin_lock(lock) \
  12839. + do { \
  12840. + migrate_disable(); \
  12841. + rt_spin_lock(lock); \
  12842. + } while (0)
  12843. +
  12844. +#define spin_lock_bh(lock) \
  12845. + do { \
  12846. + local_bh_disable(); \
  12847. + migrate_disable(); \
  12848. + rt_spin_lock(lock); \
  12849. + } while (0)
  12850. +
  12851. +#define spin_lock_irq(lock) spin_lock(lock)
  12852. +
  12853. +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock))
  12854. +
  12855. +#define spin_trylock(lock) \
  12856. +({ \
  12857. + int __locked; \
  12858. + migrate_disable(); \
  12859. + __locked = spin_do_trylock(lock); \
  12860. + if (!__locked) \
  12861. + migrate_enable(); \
  12862. + __locked; \
  12863. +})
  12864. +
  12865. +#ifdef CONFIG_LOCKDEP
  12866. +# define spin_lock_nested(lock, subclass) \
  12867. + do { \
  12868. + migrate_disable(); \
  12869. + rt_spin_lock_nested(lock, subclass); \
  12870. + } while (0)
  12871. +
  12872. +#define spin_lock_bh_nested(lock, subclass) \
  12873. + do { \
  12874. + local_bh_disable(); \
  12875. + migrate_disable(); \
  12876. + rt_spin_lock_nested(lock, subclass); \
  12877. + } while (0)
  12878. +
  12879. +# define spin_lock_irqsave_nested(lock, flags, subclass) \
  12880. + do { \
  12881. + typecheck(unsigned long, flags); \
  12882. + flags = 0; \
  12883. + migrate_disable(); \
  12884. + rt_spin_lock_nested(lock, subclass); \
  12885. + } while (0)
  12886. +#else
  12887. +# define spin_lock_nested(lock, subclass) spin_lock(lock)
  12888. +# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock)
  12889. +
  12890. +# define spin_lock_irqsave_nested(lock, flags, subclass) \
  12891. + do { \
  12892. + typecheck(unsigned long, flags); \
  12893. + flags = 0; \
  12894. + spin_lock(lock); \
  12895. + } while (0)
  12896. +#endif
  12897. +
  12898. +#define spin_lock_irqsave(lock, flags) \
  12899. + do { \
  12900. + typecheck(unsigned long, flags); \
  12901. + flags = 0; \
  12902. + spin_lock(lock); \
  12903. + } while (0)
  12904. +
  12905. +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock)
  12906. +{
  12907. + unsigned long flags = 0;
  12908. +#ifdef CONFIG_TRACE_IRQFLAGS
  12909. + flags = rt_spin_lock_trace_flags(lock);
  12910. +#else
  12911. + spin_lock(lock); /* lock_local */
  12912. +#endif
  12913. + return flags;
  12914. +}
  12915. +
  12916. +/* FIXME: we need rt_spin_lock_nest_lock */
  12917. +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0)
  12918. +
  12919. +#define spin_unlock(lock) \
  12920. + do { \
  12921. + rt_spin_unlock(lock); \
  12922. + migrate_enable(); \
  12923. + } while (0)
  12924. +
  12925. +#define spin_unlock_bh(lock) \
  12926. + do { \
  12927. + rt_spin_unlock(lock); \
  12928. + migrate_enable(); \
  12929. + local_bh_enable(); \
  12930. + } while (0)
  12931. +
  12932. +#define spin_unlock_irq(lock) spin_unlock(lock)
  12933. +
  12934. +#define spin_unlock_irqrestore(lock, flags) \
  12935. + do { \
  12936. + typecheck(unsigned long, flags); \
  12937. + (void) flags; \
  12938. + spin_unlock(lock); \
  12939. + } while (0)
  12940. +
  12941. +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock))
  12942. +#define spin_trylock_irq(lock) spin_trylock(lock)
  12943. +
  12944. +#define spin_trylock_irqsave(lock, flags) \
  12945. + rt_spin_trylock_irqsave(lock, &(flags))
  12946. +
  12947. +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock)
  12948. +
  12949. +#ifdef CONFIG_GENERIC_LOCKBREAK
  12950. +# define spin_is_contended(lock) ((lock)->break_lock)
  12951. +#else
  12952. +# define spin_is_contended(lock) (((void)(lock), 0))
  12953. +#endif
  12954. +
  12955. +static inline int spin_can_lock(spinlock_t *lock)
  12956. +{
  12957. + return !rt_mutex_is_locked(&lock->lock);
  12958. +}
  12959. +
  12960. +static inline int spin_is_locked(spinlock_t *lock)
  12961. +{
  12962. + return rt_mutex_is_locked(&lock->lock);
  12963. +}
  12964. +
  12965. +static inline void assert_spin_locked(spinlock_t *lock)
  12966. +{
  12967. + BUG_ON(!spin_is_locked(lock));
  12968. +}
  12969. +
  12970. +#define atomic_dec_and_lock(atomic, lock) \
  12971. + atomic_dec_and_spin_lock(atomic, lock)
  12972. +
  12973. +#endif
  12974. diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
  12975. index 73548eb13a5d..10bac715ea96 100644
  12976. --- a/include/linux/spinlock_types.h
  12977. +++ b/include/linux/spinlock_types.h
  12978. @@ -9,80 +9,15 @@
  12979. * Released under the General Public License (GPL).
  12980. */
  12981. -#if defined(CONFIG_SMP)
  12982. -# include <asm/spinlock_types.h>
  12983. -#else
  12984. -# include <linux/spinlock_types_up.h>
  12985. -#endif
  12986. -
  12987. -#include <linux/lockdep.h>
  12988. -
  12989. -typedef struct raw_spinlock {
  12990. - arch_spinlock_t raw_lock;
  12991. -#ifdef CONFIG_GENERIC_LOCKBREAK
  12992. - unsigned int break_lock;
  12993. -#endif
  12994. -#ifdef CONFIG_DEBUG_SPINLOCK
  12995. - unsigned int magic, owner_cpu;
  12996. - void *owner;
  12997. -#endif
  12998. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12999. - struct lockdep_map dep_map;
  13000. -#endif
  13001. -} raw_spinlock_t;
  13002. -
  13003. -#define SPINLOCK_MAGIC 0xdead4ead
  13004. -
  13005. -#define SPINLOCK_OWNER_INIT ((void *)-1L)
  13006. -
  13007. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13008. -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  13009. -#else
  13010. -# define SPIN_DEP_MAP_INIT(lockname)
  13011. -#endif
  13012. +#include <linux/spinlock_types_raw.h>
  13013. -#ifdef CONFIG_DEBUG_SPINLOCK
  13014. -# define SPIN_DEBUG_INIT(lockname) \
  13015. - .magic = SPINLOCK_MAGIC, \
  13016. - .owner_cpu = -1, \
  13017. - .owner = SPINLOCK_OWNER_INIT,
  13018. +#ifndef CONFIG_PREEMPT_RT_FULL
  13019. +# include <linux/spinlock_types_nort.h>
  13020. +# include <linux/rwlock_types.h>
  13021. #else
  13022. -# define SPIN_DEBUG_INIT(lockname)
  13023. +# include <linux/rtmutex.h>
  13024. +# include <linux/spinlock_types_rt.h>
  13025. +# include <linux/rwlock_types_rt.h>
  13026. #endif
  13027. -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
  13028. - { \
  13029. - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
  13030. - SPIN_DEBUG_INIT(lockname) \
  13031. - SPIN_DEP_MAP_INIT(lockname) }
  13032. -
  13033. -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
  13034. - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
  13035. -
  13036. -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
  13037. -
  13038. -typedef struct spinlock {
  13039. - union {
  13040. - struct raw_spinlock rlock;
  13041. -
  13042. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13043. -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
  13044. - struct {
  13045. - u8 __padding[LOCK_PADSIZE];
  13046. - struct lockdep_map dep_map;
  13047. - };
  13048. -#endif
  13049. - };
  13050. -} spinlock_t;
  13051. -
  13052. -#define __SPIN_LOCK_INITIALIZER(lockname) \
  13053. - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
  13054. -
  13055. -#define __SPIN_LOCK_UNLOCKED(lockname) \
  13056. - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
  13057. -
  13058. -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
  13059. -
  13060. -#include <linux/rwlock_types.h>
  13061. -
  13062. #endif /* __LINUX_SPINLOCK_TYPES_H */
  13063. diff --git a/include/linux/spinlock_types_nort.h b/include/linux/spinlock_types_nort.h
  13064. new file mode 100644
  13065. index 000000000000..f1dac1fb1d6a
  13066. --- /dev/null
  13067. +++ b/include/linux/spinlock_types_nort.h
  13068. @@ -0,0 +1,33 @@
  13069. +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H
  13070. +#define __LINUX_SPINLOCK_TYPES_NORT_H
  13071. +
  13072. +#ifndef __LINUX_SPINLOCK_TYPES_H
  13073. +#error "Do not include directly. Include spinlock_types.h instead"
  13074. +#endif
  13075. +
  13076. +/*
  13077. + * The non RT version maps spinlocks to raw_spinlocks
  13078. + */
  13079. +typedef struct spinlock {
  13080. + union {
  13081. + struct raw_spinlock rlock;
  13082. +
  13083. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13084. +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
  13085. + struct {
  13086. + u8 __padding[LOCK_PADSIZE];
  13087. + struct lockdep_map dep_map;
  13088. + };
  13089. +#endif
  13090. + };
  13091. +} spinlock_t;
  13092. +
  13093. +#define __SPIN_LOCK_INITIALIZER(lockname) \
  13094. + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
  13095. +
  13096. +#define __SPIN_LOCK_UNLOCKED(lockname) \
  13097. + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
  13098. +
  13099. +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
  13100. +
  13101. +#endif
  13102. diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h
  13103. new file mode 100644
  13104. index 000000000000..edffc4d53fc9
  13105. --- /dev/null
  13106. +++ b/include/linux/spinlock_types_raw.h
  13107. @@ -0,0 +1,56 @@
  13108. +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  13109. +#define __LINUX_SPINLOCK_TYPES_RAW_H
  13110. +
  13111. +#if defined(CONFIG_SMP)
  13112. +# include <asm/spinlock_types.h>
  13113. +#else
  13114. +# include <linux/spinlock_types_up.h>
  13115. +#endif
  13116. +
  13117. +#include <linux/lockdep.h>
  13118. +
  13119. +typedef struct raw_spinlock {
  13120. + arch_spinlock_t raw_lock;
  13121. +#ifdef CONFIG_GENERIC_LOCKBREAK
  13122. + unsigned int break_lock;
  13123. +#endif
  13124. +#ifdef CONFIG_DEBUG_SPINLOCK
  13125. + unsigned int magic, owner_cpu;
  13126. + void *owner;
  13127. +#endif
  13128. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13129. + struct lockdep_map dep_map;
  13130. +#endif
  13131. +} raw_spinlock_t;
  13132. +
  13133. +#define SPINLOCK_MAGIC 0xdead4ead
  13134. +
  13135. +#define SPINLOCK_OWNER_INIT ((void *)-1L)
  13136. +
  13137. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13138. +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  13139. +#else
  13140. +# define SPIN_DEP_MAP_INIT(lockname)
  13141. +#endif
  13142. +
  13143. +#ifdef CONFIG_DEBUG_SPINLOCK
  13144. +# define SPIN_DEBUG_INIT(lockname) \
  13145. + .magic = SPINLOCK_MAGIC, \
  13146. + .owner_cpu = -1, \
  13147. + .owner = SPINLOCK_OWNER_INIT,
  13148. +#else
  13149. +# define SPIN_DEBUG_INIT(lockname)
  13150. +#endif
  13151. +
  13152. +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
  13153. + { \
  13154. + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
  13155. + SPIN_DEBUG_INIT(lockname) \
  13156. + SPIN_DEP_MAP_INIT(lockname) }
  13157. +
  13158. +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
  13159. + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
  13160. +
  13161. +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
  13162. +
  13163. +#endif
  13164. diff --git a/include/linux/spinlock_types_rt.h b/include/linux/spinlock_types_rt.h
  13165. new file mode 100644
  13166. index 000000000000..9fd431967abc
  13167. --- /dev/null
  13168. +++ b/include/linux/spinlock_types_rt.h
  13169. @@ -0,0 +1,51 @@
  13170. +#ifndef __LINUX_SPINLOCK_TYPES_RT_H
  13171. +#define __LINUX_SPINLOCK_TYPES_RT_H
  13172. +
  13173. +#ifndef __LINUX_SPINLOCK_TYPES_H
  13174. +#error "Do not include directly. Include spinlock_types.h instead"
  13175. +#endif
  13176. +
  13177. +#include <linux/cache.h>
  13178. +
  13179. +/*
  13180. + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field:
  13181. + */
  13182. +typedef struct spinlock {
  13183. + struct rt_mutex lock;
  13184. + unsigned int break_lock;
  13185. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13186. + struct lockdep_map dep_map;
  13187. +#endif
  13188. +} spinlock_t;
  13189. +
  13190. +#ifdef CONFIG_DEBUG_RT_MUTEXES
  13191. +# define __RT_SPIN_INITIALIZER(name) \
  13192. + { \
  13193. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
  13194. + .save_state = 1, \
  13195. + .file = __FILE__, \
  13196. + .line = __LINE__ , \
  13197. + }
  13198. +#else
  13199. +# define __RT_SPIN_INITIALIZER(name) \
  13200. + { \
  13201. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
  13202. + .save_state = 1, \
  13203. + }
  13204. +#endif
  13205. +
  13206. +/*
  13207. +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock)
  13208. +*/
  13209. +
  13210. +#define __SPIN_LOCK_UNLOCKED(name) \
  13211. + { .lock = __RT_SPIN_INITIALIZER(name.lock), \
  13212. + SPIN_DEP_MAP_INIT(name) }
  13213. +
  13214. +#define __DEFINE_SPINLOCK(name) \
  13215. + spinlock_t name = __SPIN_LOCK_UNLOCKED(name)
  13216. +
  13217. +#define DEFINE_SPINLOCK(name) \
  13218. + spinlock_t name __cacheline_aligned_in_smp = __SPIN_LOCK_UNLOCKED(name)
  13219. +
  13220. +#endif
  13221. diff --git a/include/linux/srcu.h b/include/linux/srcu.h
  13222. index bdeb4567b71e..a9c3c49cda5d 100644
  13223. --- a/include/linux/srcu.h
  13224. +++ b/include/linux/srcu.h
  13225. @@ -84,10 +84,10 @@ int init_srcu_struct(struct srcu_struct *sp);
  13226. void process_srcu(struct work_struct *work);
  13227. -#define __SRCU_STRUCT_INIT(name) \
  13228. +#define __SRCU_STRUCT_INIT(name, pcpu_name) \
  13229. { \
  13230. .completed = -300, \
  13231. - .per_cpu_ref = &name##_srcu_array, \
  13232. + .per_cpu_ref = &pcpu_name, \
  13233. .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \
  13234. .running = false, \
  13235. .batch_queue = RCU_BATCH_INIT(name.batch_queue), \
  13236. @@ -104,7 +104,7 @@ void process_srcu(struct work_struct *work);
  13237. */
  13238. #define __DEFINE_SRCU(name, is_static) \
  13239. static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
  13240. - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
  13241. + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array)
  13242. #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
  13243. #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
  13244. diff --git a/include/linux/swap.h b/include/linux/swap.h
  13245. index cee108cbe2d5..4c07c12d2d82 100644
  13246. --- a/include/linux/swap.h
  13247. +++ b/include/linux/swap.h
  13248. @@ -11,6 +11,7 @@
  13249. #include <linux/fs.h>
  13250. #include <linux/atomic.h>
  13251. #include <linux/page-flags.h>
  13252. +#include <linux/locallock.h>
  13253. #include <asm/page.h>
  13254. struct notifier_block;
  13255. @@ -252,7 +253,8 @@ struct swap_info_struct {
  13256. void *workingset_eviction(struct address_space *mapping, struct page *page);
  13257. bool workingset_refault(void *shadow);
  13258. void workingset_activation(struct page *page);
  13259. -extern struct list_lru workingset_shadow_nodes;
  13260. +extern struct list_lru __workingset_shadow_nodes;
  13261. +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
  13262. static inline unsigned int workingset_node_pages(struct radix_tree_node *node)
  13263. {
  13264. @@ -296,6 +298,7 @@ extern unsigned long nr_free_pagecache_pages(void);
  13265. /* linux/mm/swap.c */
  13266. +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock);
  13267. extern void lru_cache_add(struct page *);
  13268. extern void lru_cache_add_anon(struct page *page);
  13269. extern void lru_cache_add_file(struct page *page);
  13270. diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
  13271. index ff307b548ed3..be9f9dc6a4e1 100644
  13272. --- a/include/linux/thread_info.h
  13273. +++ b/include/linux/thread_info.h
  13274. @@ -102,7 +102,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
  13275. #define test_thread_flag(flag) \
  13276. test_ti_thread_flag(current_thread_info(), flag)
  13277. -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
  13278. +#ifdef CONFIG_PREEMPT_LAZY
  13279. +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \
  13280. + test_thread_flag(TIF_NEED_RESCHED_LAZY))
  13281. +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
  13282. +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY))
  13283. +
  13284. +#else
  13285. +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
  13286. +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
  13287. +#define tif_need_resched_lazy() 0
  13288. +#endif
  13289. #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
  13290. /*
  13291. diff --git a/include/linux/timer.h b/include/linux/timer.h
  13292. index 8c5a197e1587..5fcd72c57ebe 100644
  13293. --- a/include/linux/timer.h
  13294. +++ b/include/linux/timer.h
  13295. @@ -241,7 +241,7 @@ extern void add_timer(struct timer_list *timer);
  13296. extern int try_to_del_timer_sync(struct timer_list *timer);
  13297. -#ifdef CONFIG_SMP
  13298. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  13299. extern int del_timer_sync(struct timer_list *timer);
  13300. #else
  13301. # define del_timer_sync(t) del_timer(t)
  13302. diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
  13303. index ecd3319dac33..941b2dab50cd 100644
  13304. --- a/include/linux/uaccess.h
  13305. +++ b/include/linux/uaccess.h
  13306. @@ -1,21 +1,31 @@
  13307. #ifndef __LINUX_UACCESS_H__
  13308. #define __LINUX_UACCESS_H__
  13309. -#include <linux/preempt.h>
  13310. +#include <linux/sched.h>
  13311. #include <asm/uaccess.h>
  13312. +static __always_inline void pagefault_disabled_inc(void)
  13313. +{
  13314. + current->pagefault_disabled++;
  13315. +}
  13316. +
  13317. +static __always_inline void pagefault_disabled_dec(void)
  13318. +{
  13319. + current->pagefault_disabled--;
  13320. + WARN_ON(current->pagefault_disabled < 0);
  13321. +}
  13322. +
  13323. /*
  13324. - * These routines enable/disable the pagefault handler in that
  13325. - * it will not take any locks and go straight to the fixup table.
  13326. + * These routines enable/disable the pagefault handler. If disabled, it will
  13327. + * not take any locks and go straight to the fixup table.
  13328. *
  13329. - * They have great resemblance to the preempt_disable/enable calls
  13330. - * and in fact they are identical; this is because currently there is
  13331. - * no other way to make the pagefault handlers do this. So we do
  13332. - * disable preemption but we don't necessarily care about that.
  13333. + * User access methods will not sleep when called from a pagefault_disabled()
  13334. + * environment.
  13335. */
  13336. static inline void pagefault_disable(void)
  13337. {
  13338. - preempt_count_inc();
  13339. + migrate_disable();
  13340. + pagefault_disabled_inc();
  13341. /*
  13342. * make sure to have issued the store before a pagefault
  13343. * can hit.
  13344. @@ -25,18 +35,32 @@ static inline void pagefault_disable(void)
  13345. static inline void pagefault_enable(void)
  13346. {
  13347. -#ifndef CONFIG_PREEMPT
  13348. /*
  13349. * make sure to issue those last loads/stores before enabling
  13350. * the pagefault handler again.
  13351. */
  13352. barrier();
  13353. - preempt_count_dec();
  13354. -#else
  13355. - preempt_enable();
  13356. -#endif
  13357. + pagefault_disabled_dec();
  13358. + migrate_enable();
  13359. }
  13360. +/*
  13361. + * Is the pagefault handler disabled? If so, user access methods will not sleep.
  13362. + */
  13363. +#define pagefault_disabled() (current->pagefault_disabled != 0)
  13364. +
  13365. +/*
  13366. + * The pagefault handler is in general disabled by pagefault_disable() or
  13367. + * when in irq context (via in_atomic()).
  13368. + *
  13369. + * This function should only be used by the fault handlers. Other users should
  13370. + * stick to pagefault_disabled().
  13371. + * Please NEVER use preempt_disable() to disable the fault handler. With
  13372. + * !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled.
  13373. + * in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT.
  13374. + */
  13375. +#define faulthandler_disabled() (pagefault_disabled() || in_atomic())
  13376. +
  13377. #ifndef ARCH_HAS_NOCACHE_UACCESS
  13378. static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
  13379. diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
  13380. index 60beb5dc7977..f5a644c649b4 100644
  13381. --- a/include/linux/uprobes.h
  13382. +++ b/include/linux/uprobes.h
  13383. @@ -27,6 +27,7 @@
  13384. #include <linux/errno.h>
  13385. #include <linux/rbtree.h>
  13386. #include <linux/types.h>
  13387. +#include <linux/wait.h>
  13388. struct vm_area_struct;
  13389. struct mm_struct;
  13390. diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
  13391. index 82e7db7f7100..3feaf770a8bd 100644
  13392. --- a/include/linux/vmstat.h
  13393. +++ b/include/linux/vmstat.h
  13394. @@ -33,7 +33,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
  13395. */
  13396. static inline void __count_vm_event(enum vm_event_item item)
  13397. {
  13398. + preempt_disable_rt();
  13399. raw_cpu_inc(vm_event_states.event[item]);
  13400. + preempt_enable_rt();
  13401. }
  13402. static inline void count_vm_event(enum vm_event_item item)
  13403. @@ -43,7 +45,9 @@ static inline void count_vm_event(enum vm_event_item item)
  13404. static inline void __count_vm_events(enum vm_event_item item, long delta)
  13405. {
  13406. + preempt_disable_rt();
  13407. raw_cpu_add(vm_event_states.event[item], delta);
  13408. + preempt_enable_rt();
  13409. }
  13410. static inline void count_vm_events(enum vm_event_item item, long delta)
  13411. diff --git a/include/linux/wait-simple.h b/include/linux/wait-simple.h
  13412. new file mode 100644
  13413. index 000000000000..f86bca2c41d5
  13414. --- /dev/null
  13415. +++ b/include/linux/wait-simple.h
  13416. @@ -0,0 +1,207 @@
  13417. +#ifndef _LINUX_WAIT_SIMPLE_H
  13418. +#define _LINUX_WAIT_SIMPLE_H
  13419. +
  13420. +#include <linux/spinlock.h>
  13421. +#include <linux/list.h>
  13422. +
  13423. +#include <asm/current.h>
  13424. +
  13425. +struct swaiter {
  13426. + struct task_struct *task;
  13427. + struct list_head node;
  13428. +};
  13429. +
  13430. +#define DEFINE_SWAITER(name) \
  13431. + struct swaiter name = { \
  13432. + .task = current, \
  13433. + .node = LIST_HEAD_INIT((name).node), \
  13434. + }
  13435. +
  13436. +struct swait_head {
  13437. + raw_spinlock_t lock;
  13438. + struct list_head list;
  13439. +};
  13440. +
  13441. +#define SWAIT_HEAD_INITIALIZER(name) { \
  13442. + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
  13443. + .list = LIST_HEAD_INIT((name).list), \
  13444. + }
  13445. +
  13446. +#define DEFINE_SWAIT_HEAD(name) \
  13447. + struct swait_head name = SWAIT_HEAD_INITIALIZER(name)
  13448. +
  13449. +extern void __init_swait_head(struct swait_head *h, struct lock_class_key *key);
  13450. +
  13451. +#define init_swait_head(swh) \
  13452. + do { \
  13453. + static struct lock_class_key __key; \
  13454. + \
  13455. + __init_swait_head((swh), &__key); \
  13456. + } while (0)
  13457. +
  13458. +/*
  13459. + * Waiter functions
  13460. + */
  13461. +extern void swait_prepare_locked(struct swait_head *head, struct swaiter *w);
  13462. +extern void swait_prepare(struct swait_head *head, struct swaiter *w, int state);
  13463. +extern void swait_finish_locked(struct swait_head *head, struct swaiter *w);
  13464. +extern void swait_finish(struct swait_head *head, struct swaiter *w);
  13465. +
  13466. +/* Check whether a head has waiters enqueued */
  13467. +static inline bool swaitqueue_active(struct swait_head *h)
  13468. +{
  13469. + /* Make sure the condition is visible before checking list_empty() */
  13470. + smp_mb();
  13471. + return !list_empty(&h->list);
  13472. +}
  13473. +
  13474. +/*
  13475. + * Wakeup functions
  13476. + */
  13477. +extern unsigned int __swait_wake(struct swait_head *head, unsigned int state, unsigned int num);
  13478. +extern unsigned int __swait_wake_locked(struct swait_head *head, unsigned int state, unsigned int num);
  13479. +
  13480. +#define swait_wake(head) __swait_wake(head, TASK_NORMAL, 1)
  13481. +#define swait_wake_interruptible(head) __swait_wake(head, TASK_INTERRUPTIBLE, 1)
  13482. +#define swait_wake_all(head) __swait_wake(head, TASK_NORMAL, 0)
  13483. +#define swait_wake_all_interruptible(head) __swait_wake(head, TASK_INTERRUPTIBLE, 0)
  13484. +
  13485. +/*
  13486. + * Event API
  13487. + */
  13488. +#define __swait_event(wq, condition) \
  13489. +do { \
  13490. + DEFINE_SWAITER(__wait); \
  13491. + \
  13492. + for (;;) { \
  13493. + swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
  13494. + if (condition) \
  13495. + break; \
  13496. + schedule(); \
  13497. + } \
  13498. + swait_finish(&wq, &__wait); \
  13499. +} while (0)
  13500. +
  13501. +/**
  13502. + * swait_event - sleep until a condition gets true
  13503. + * @wq: the waitqueue to wait on
  13504. + * @condition: a C expression for the event to wait for
  13505. + *
  13506. + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  13507. + * @condition evaluates to true. The @condition is checked each time
  13508. + * the waitqueue @wq is woken up.
  13509. + *
  13510. + * wake_up() has to be called after changing any variable that could
  13511. + * change the result of the wait condition.
  13512. + */
  13513. +#define swait_event(wq, condition) \
  13514. +do { \
  13515. + if (condition) \
  13516. + break; \
  13517. + __swait_event(wq, condition); \
  13518. +} while (0)
  13519. +
  13520. +#define __swait_event_interruptible(wq, condition, ret) \
  13521. +do { \
  13522. + DEFINE_SWAITER(__wait); \
  13523. + \
  13524. + for (;;) { \
  13525. + swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
  13526. + if (condition) \
  13527. + break; \
  13528. + if (signal_pending(current)) { \
  13529. + ret = -ERESTARTSYS; \
  13530. + break; \
  13531. + } \
  13532. + schedule(); \
  13533. + } \
  13534. + swait_finish(&wq, &__wait); \
  13535. +} while (0)
  13536. +
  13537. +#define __swait_event_interruptible_timeout(wq, condition, ret) \
  13538. +do { \
  13539. + DEFINE_SWAITER(__wait); \
  13540. + \
  13541. + for (;;) { \
  13542. + swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
  13543. + if (condition) \
  13544. + break; \
  13545. + if (signal_pending(current)) { \
  13546. + ret = -ERESTARTSYS; \
  13547. + break; \
  13548. + } \
  13549. + ret = schedule_timeout(ret); \
  13550. + if (!ret) \
  13551. + break; \
  13552. + } \
  13553. + swait_finish(&wq, &__wait); \
  13554. +} while (0)
  13555. +
  13556. +/**
  13557. + * swait_event_interruptible - sleep until a condition gets true
  13558. + * @wq: the waitqueue to wait on
  13559. + * @condition: a C expression for the event to wait for
  13560. + *
  13561. + * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  13562. + * @condition evaluates to true. The @condition is checked each time
  13563. + * the waitqueue @wq is woken up.
  13564. + *
  13565. + * wake_up() has to be called after changing any variable that could
  13566. + * change the result of the wait condition.
  13567. + */
  13568. +#define swait_event_interruptible(wq, condition) \
  13569. +({ \
  13570. + int __ret = 0; \
  13571. + if (!(condition)) \
  13572. + __swait_event_interruptible(wq, condition, __ret); \
  13573. + __ret; \
  13574. +})
  13575. +
  13576. +#define swait_event_interruptible_timeout(wq, condition, timeout) \
  13577. +({ \
  13578. + int __ret = timeout; \
  13579. + if (!(condition)) \
  13580. + __swait_event_interruptible_timeout(wq, condition, __ret); \
  13581. + __ret; \
  13582. +})
  13583. +
  13584. +#define __swait_event_timeout(wq, condition, ret) \
  13585. +do { \
  13586. + DEFINE_SWAITER(__wait); \
  13587. + \
  13588. + for (;;) { \
  13589. + swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
  13590. + if (condition) \
  13591. + break; \
  13592. + ret = schedule_timeout(ret); \
  13593. + if (!ret) \
  13594. + break; \
  13595. + } \
  13596. + swait_finish(&wq, &__wait); \
  13597. +} while (0)
  13598. +
  13599. +/**
  13600. + * swait_event_timeout - sleep until a condition gets true or a timeout elapses
  13601. + * @wq: the waitqueue to wait on
  13602. + * @condition: a C expression for the event to wait for
  13603. + * @timeout: timeout, in jiffies
  13604. + *
  13605. + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  13606. + * @condition evaluates to true. The @condition is checked each time
  13607. + * the waitqueue @wq is woken up.
  13608. + *
  13609. + * wake_up() has to be called after changing any variable that could
  13610. + * change the result of the wait condition.
  13611. + *
  13612. + * The function returns 0 if the @timeout elapsed, and the remaining
  13613. + * jiffies if the condition evaluated to true before the timeout elapsed.
  13614. + */
  13615. +#define swait_event_timeout(wq, condition, timeout) \
  13616. +({ \
  13617. + long __ret = timeout; \
  13618. + if (!(condition)) \
  13619. + __swait_event_timeout(wq, condition, __ret); \
  13620. + __ret; \
  13621. +})
  13622. +
  13623. +#endif
  13624. diff --git a/include/linux/wait.h b/include/linux/wait.h
  13625. index 2db83349865b..b3b54c26b6a0 100644
  13626. --- a/include/linux/wait.h
  13627. +++ b/include/linux/wait.h
  13628. @@ -8,6 +8,7 @@
  13629. #include <linux/spinlock.h>
  13630. #include <asm/current.h>
  13631. #include <uapi/linux/wait.h>
  13632. +#include <linux/atomic.h>
  13633. typedef struct __wait_queue wait_queue_t;
  13634. typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
  13635. diff --git a/include/linux/work-simple.h b/include/linux/work-simple.h
  13636. new file mode 100644
  13637. index 000000000000..f175fa9a6016
  13638. --- /dev/null
  13639. +++ b/include/linux/work-simple.h
  13640. @@ -0,0 +1,24 @@
  13641. +#ifndef _LINUX_SWORK_H
  13642. +#define _LINUX_SWORK_H
  13643. +
  13644. +#include <linux/list.h>
  13645. +
  13646. +struct swork_event {
  13647. + struct list_head item;
  13648. + unsigned long flags;
  13649. + void (*func)(struct swork_event *);
  13650. +};
  13651. +
  13652. +static inline void INIT_SWORK(struct swork_event *event,
  13653. + void (*func)(struct swork_event *))
  13654. +{
  13655. + event->flags = 0;
  13656. + event->func = func;
  13657. +}
  13658. +
  13659. +bool swork_queue(struct swork_event *sev);
  13660. +
  13661. +int swork_get(void);
  13662. +void swork_put(void);
  13663. +
  13664. +#endif /* _LINUX_SWORK_H */
  13665. diff --git a/include/net/dst.h b/include/net/dst.h
  13666. index 182b812d45e1..74baade721d6 100644
  13667. --- a/include/net/dst.h
  13668. +++ b/include/net/dst.h
  13669. @@ -436,7 +436,7 @@ static inline void dst_confirm(struct dst_entry *dst)
  13670. static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
  13671. struct sk_buff *skb)
  13672. {
  13673. - const struct hh_cache *hh;
  13674. + struct hh_cache *hh;
  13675. if (dst->pending_confirm) {
  13676. unsigned long now = jiffies;
  13677. diff --git a/include/net/neighbour.h b/include/net/neighbour.h
  13678. index bd33e66f49aa..9c38018c6038 100644
  13679. --- a/include/net/neighbour.h
  13680. +++ b/include/net/neighbour.h
  13681. @@ -445,7 +445,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
  13682. }
  13683. #endif
  13684. -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
  13685. +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
  13686. {
  13687. unsigned int seq;
  13688. int hh_len;
  13689. @@ -500,7 +500,7 @@ struct neighbour_cb {
  13690. #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
  13691. -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
  13692. +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n,
  13693. const struct net_device *dev)
  13694. {
  13695. unsigned int seq;
  13696. diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
  13697. index 614a49be68a9..b4bdbe10b77a 100644
  13698. --- a/include/net/netns/ipv4.h
  13699. +++ b/include/net/netns/ipv4.h
  13700. @@ -69,6 +69,7 @@ struct netns_ipv4 {
  13701. int sysctl_icmp_echo_ignore_all;
  13702. int sysctl_icmp_echo_ignore_broadcasts;
  13703. + int sysctl_icmp_echo_sysrq;
  13704. int sysctl_icmp_ignore_bogus_error_responses;
  13705. int sysctl_icmp_ratelimit;
  13706. int sysctl_icmp_ratemask;
  13707. diff --git a/include/trace/events/hist.h b/include/trace/events/hist.h
  13708. new file mode 100644
  13709. index 000000000000..37f6eb8c9dc2
  13710. --- /dev/null
  13711. +++ b/include/trace/events/hist.h
  13712. @@ -0,0 +1,74 @@
  13713. +#undef TRACE_SYSTEM
  13714. +#define TRACE_SYSTEM hist
  13715. +
  13716. +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ)
  13717. +#define _TRACE_HIST_H
  13718. +
  13719. +#include "latency_hist.h"
  13720. +#include <linux/tracepoint.h>
  13721. +
  13722. +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST)
  13723. +#define trace_preemptirqsoff_hist(a, b)
  13724. +#define trace_preemptirqsoff_hist_rcuidle(a, b)
  13725. +#else
  13726. +TRACE_EVENT(preemptirqsoff_hist,
  13727. +
  13728. + TP_PROTO(int reason, int starthist),
  13729. +
  13730. + TP_ARGS(reason, starthist),
  13731. +
  13732. + TP_STRUCT__entry(
  13733. + __field(int, reason)
  13734. + __field(int, starthist)
  13735. + ),
  13736. +
  13737. + TP_fast_assign(
  13738. + __entry->reason = reason;
  13739. + __entry->starthist = starthist;
  13740. + ),
  13741. +
  13742. + TP_printk("reason=%s starthist=%s", getaction(__entry->reason),
  13743. + __entry->starthist ? "start" : "stop")
  13744. +);
  13745. +#endif
  13746. +
  13747. +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST
  13748. +#define trace_hrtimer_interrupt(a, b, c, d)
  13749. +#define trace_hrtimer_interrupt_rcuidle(a, b, c, d)
  13750. +#else
  13751. +TRACE_EVENT(hrtimer_interrupt,
  13752. +
  13753. + TP_PROTO(int cpu, long long offset, struct task_struct *curr,
  13754. + struct task_struct *task),
  13755. +
  13756. + TP_ARGS(cpu, offset, curr, task),
  13757. +
  13758. + TP_STRUCT__entry(
  13759. + __field(int, cpu)
  13760. + __field(long long, offset)
  13761. + __array(char, ccomm, TASK_COMM_LEN)
  13762. + __field(int, cprio)
  13763. + __array(char, tcomm, TASK_COMM_LEN)
  13764. + __field(int, tprio)
  13765. + ),
  13766. +
  13767. + TP_fast_assign(
  13768. + __entry->cpu = cpu;
  13769. + __entry->offset = offset;
  13770. + memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN);
  13771. + __entry->cprio = curr->prio;
  13772. + memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>",
  13773. + task != NULL ? TASK_COMM_LEN : 7);
  13774. + __entry->tprio = task != NULL ? task->prio : -1;
  13775. + ),
  13776. +
  13777. + TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]",
  13778. + __entry->cpu, __entry->offset, __entry->ccomm,
  13779. + __entry->cprio, __entry->tcomm, __entry->tprio)
  13780. +);
  13781. +#endif
  13782. +
  13783. +#endif /* _TRACE_HIST_H */
  13784. +
  13785. +/* This part must be outside protection */
  13786. +#include <trace/define_trace.h>
  13787. diff --git a/include/trace/events/latency_hist.h b/include/trace/events/latency_hist.h
  13788. new file mode 100644
  13789. index 000000000000..d3f2fbd560b1
  13790. --- /dev/null
  13791. +++ b/include/trace/events/latency_hist.h
  13792. @@ -0,0 +1,29 @@
  13793. +#ifndef _LATENCY_HIST_H
  13794. +#define _LATENCY_HIST_H
  13795. +
  13796. +enum hist_action {
  13797. + IRQS_ON,
  13798. + PREEMPT_ON,
  13799. + TRACE_STOP,
  13800. + IRQS_OFF,
  13801. + PREEMPT_OFF,
  13802. + TRACE_START,
  13803. +};
  13804. +
  13805. +static char *actions[] = {
  13806. + "IRQS_ON",
  13807. + "PREEMPT_ON",
  13808. + "TRACE_STOP",
  13809. + "IRQS_OFF",
  13810. + "PREEMPT_OFF",
  13811. + "TRACE_START",
  13812. +};
  13813. +
  13814. +static inline char *getaction(int action)
  13815. +{
  13816. + if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0]))
  13817. + return actions[action];
  13818. + return "unknown";
  13819. +}
  13820. +
  13821. +#endif /* _LATENCY_HIST_H */
  13822. diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
  13823. index 30fedaf3e56a..3b63828390a6 100644
  13824. --- a/include/trace/events/sched.h
  13825. +++ b/include/trace/events/sched.h
  13826. @@ -55,9 +55,9 @@ TRACE_EVENT(sched_kthread_stop_ret,
  13827. */
  13828. DECLARE_EVENT_CLASS(sched_wakeup_template,
  13829. - TP_PROTO(struct task_struct *p, int success),
  13830. + TP_PROTO(struct task_struct *p),
  13831. - TP_ARGS(__perf_task(p), success),
  13832. + TP_ARGS(__perf_task(p)),
  13833. TP_STRUCT__entry(
  13834. __array( char, comm, TASK_COMM_LEN )
  13835. @@ -71,25 +71,37 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
  13836. memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
  13837. __entry->pid = p->pid;
  13838. __entry->prio = p->prio;
  13839. - __entry->success = success;
  13840. + __entry->success = 1; /* rudiment, kill when possible */
  13841. __entry->target_cpu = task_cpu(p);
  13842. ),
  13843. - TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
  13844. + TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d",
  13845. __entry->comm, __entry->pid, __entry->prio,
  13846. - __entry->success, __entry->target_cpu)
  13847. + __entry->target_cpu)
  13848. );
  13849. +/*
  13850. + * Tracepoint called when waking a task; this tracepoint is guaranteed to be
  13851. + * called from the waking context.
  13852. + */
  13853. +DEFINE_EVENT(sched_wakeup_template, sched_waking,
  13854. + TP_PROTO(struct task_struct *p),
  13855. + TP_ARGS(p));
  13856. +
  13857. +/*
  13858. + * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
  13859. + * It it not always called from the waking context.
  13860. + */
  13861. DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
  13862. - TP_PROTO(struct task_struct *p, int success),
  13863. - TP_ARGS(p, success));
  13864. + TP_PROTO(struct task_struct *p),
  13865. + TP_ARGS(p));
  13866. /*
  13867. * Tracepoint for waking up a new task:
  13868. */
  13869. DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
  13870. - TP_PROTO(struct task_struct *p, int success),
  13871. - TP_ARGS(p, success));
  13872. + TP_PROTO(struct task_struct *p),
  13873. + TP_ARGS(p));
  13874. #ifdef CREATE_TRACE_POINTS
  13875. static inline long __trace_sched_switch_state(struct task_struct *p)
  13876. diff --git a/init/Kconfig b/init/Kconfig
  13877. index dc24dec60232..a70b5002df06 100644
  13878. --- a/init/Kconfig
  13879. +++ b/init/Kconfig
  13880. @@ -637,7 +637,7 @@ config RCU_FANOUT_EXACT
  13881. config RCU_FAST_NO_HZ
  13882. bool "Accelerate last non-dyntick-idle CPU's grace periods"
  13883. - depends on NO_HZ_COMMON && SMP
  13884. + depends on NO_HZ_COMMON && SMP && !PREEMPT_RT_FULL
  13885. default n
  13886. help
  13887. This option permits CPUs to enter dynticks-idle state even if
  13888. @@ -664,7 +664,7 @@ config TREE_RCU_TRACE
  13889. config RCU_BOOST
  13890. bool "Enable RCU priority boosting"
  13891. depends on RT_MUTEXES && PREEMPT_RCU
  13892. - default n
  13893. + default y if PREEMPT_RT_FULL
  13894. help
  13895. This option boosts the priority of preempted RCU readers that
  13896. block the current preemptible RCU grace period for too long.
  13897. @@ -1101,6 +1101,7 @@ config CFS_BANDWIDTH
  13898. config RT_GROUP_SCHED
  13899. bool "Group scheduling for SCHED_RR/FIFO"
  13900. depends on CGROUP_SCHED
  13901. + depends on !PREEMPT_RT_FULL
  13902. default n
  13903. help
  13904. This feature lets you explicitly allocate real CPU bandwidth
  13905. @@ -1688,6 +1689,7 @@ choice
  13906. config SLAB
  13907. bool "SLAB"
  13908. + depends on !PREEMPT_RT_FULL
  13909. help
  13910. The regular slab allocator that is established and known to work
  13911. well in all environments. It organizes cache hot objects in
  13912. @@ -1706,6 +1708,7 @@ config SLUB
  13913. config SLOB
  13914. depends on EXPERT
  13915. bool "SLOB (Simple Allocator)"
  13916. + depends on !PREEMPT_RT_FULL
  13917. help
  13918. SLOB replaces the stock allocator with a drastically simpler
  13919. allocator. SLOB is generally more space efficient but
  13920. @@ -1715,7 +1718,7 @@ endchoice
  13921. config SLUB_CPU_PARTIAL
  13922. default y
  13923. - depends on SLUB && SMP
  13924. + depends on SLUB && SMP && !PREEMPT_RT_FULL
  13925. bool "SLUB per cpu partial cache"
  13926. help
  13927. Per cpu partial caches accellerate objects allocation and freeing
  13928. diff --git a/init/Makefile b/init/Makefile
  13929. index 7bc47ee31c36..88cf473554e0 100644
  13930. --- a/init/Makefile
  13931. +++ b/init/Makefile
  13932. @@ -33,4 +33,4 @@ silent_chk_compile.h = :
  13933. include/generated/compile.h: FORCE
  13934. @$($(quiet)chk_compile.h)
  13935. $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
  13936. - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
  13937. + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)"
  13938. diff --git a/init/main.c b/init/main.c
  13939. index 2a89545e0a5d..0486a8e11fc0 100644
  13940. --- a/init/main.c
  13941. +++ b/init/main.c
  13942. @@ -525,6 +525,7 @@ asmlinkage __visible void __init start_kernel(void)
  13943. setup_command_line(command_line);
  13944. setup_nr_cpu_ids();
  13945. setup_per_cpu_areas();
  13946. + softirq_early_init();
  13947. smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
  13948. build_all_zonelists(NULL, NULL);
  13949. diff --git a/ipc/mqueue.c b/ipc/mqueue.c
  13950. index c3fc5c2b63f3..161a1807e6ef 100644
  13951. --- a/ipc/mqueue.c
  13952. +++ b/ipc/mqueue.c
  13953. @@ -47,8 +47,7 @@
  13954. #define RECV 1
  13955. #define STATE_NONE 0
  13956. -#define STATE_PENDING 1
  13957. -#define STATE_READY 2
  13958. +#define STATE_READY 1
  13959. struct posix_msg_tree_node {
  13960. struct rb_node rb_node;
  13961. @@ -568,15 +567,12 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr,
  13962. wq_add(info, sr, ewp);
  13963. for (;;) {
  13964. - set_current_state(TASK_INTERRUPTIBLE);
  13965. + __set_current_state(TASK_INTERRUPTIBLE);
  13966. spin_unlock(&info->lock);
  13967. time = schedule_hrtimeout_range_clock(timeout, 0,
  13968. HRTIMER_MODE_ABS, CLOCK_REALTIME);
  13969. - while (ewp->state == STATE_PENDING)
  13970. - cpu_relax();
  13971. -
  13972. if (ewp->state == STATE_READY) {
  13973. retval = 0;
  13974. goto out;
  13975. @@ -904,11 +900,15 @@ out_name:
  13976. * list of waiting receivers. A sender checks that list before adding the new
  13977. * message into the message array. If there is a waiting receiver, then it
  13978. * bypasses the message array and directly hands the message over to the
  13979. - * receiver.
  13980. - * The receiver accepts the message and returns without grabbing the queue
  13981. - * spinlock. Therefore an intermediate STATE_PENDING state and memory barriers
  13982. - * are necessary. The same algorithm is used for sysv semaphores, see
  13983. - * ipc/sem.c for more details.
  13984. + * receiver. The receiver accepts the message and returns without grabbing the
  13985. + * queue spinlock:
  13986. + *
  13987. + * - Set pointer to message.
  13988. + * - Queue the receiver task for later wakeup (without the info->lock).
  13989. + * - Update its state to STATE_READY. Now the receiver can continue.
  13990. + * - Wake up the process after the lock is dropped. Should the process wake up
  13991. + * before this wakeup (due to a timeout or a signal) it will either see
  13992. + * STATE_READY and continue or acquire the lock to check the state again.
  13993. *
  13994. * The same algorithm is used for senders.
  13995. */
  13996. @@ -916,21 +916,29 @@ out_name:
  13997. /* pipelined_send() - send a message directly to the task waiting in
  13998. * sys_mq_timedreceive() (without inserting message into a queue).
  13999. */
  14000. -static inline void pipelined_send(struct mqueue_inode_info *info,
  14001. +static inline void pipelined_send(struct wake_q_head *wake_q,
  14002. + struct mqueue_inode_info *info,
  14003. struct msg_msg *message,
  14004. struct ext_wait_queue *receiver)
  14005. {
  14006. receiver->msg = message;
  14007. list_del(&receiver->list);
  14008. - receiver->state = STATE_PENDING;
  14009. - wake_up_process(receiver->task);
  14010. - smp_wmb();
  14011. + wake_q_add(wake_q, receiver->task);
  14012. + /*
  14013. + * Rely on the implicit cmpxchg barrier from wake_q_add such
  14014. + * that we can ensure that updating receiver->state is the last
  14015. + * write operation: As once set, the receiver can continue,
  14016. + * and if we don't have the reference count from the wake_q,
  14017. + * yet, at that point we can later have a use-after-free
  14018. + * condition and bogus wakeup.
  14019. + */
  14020. receiver->state = STATE_READY;
  14021. }
  14022. /* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
  14023. * gets its message and put to the queue (we have one free place for sure). */
  14024. -static inline void pipelined_receive(struct mqueue_inode_info *info)
  14025. +static inline void pipelined_receive(struct wake_q_head *wake_q,
  14026. + struct mqueue_inode_info *info)
  14027. {
  14028. struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND);
  14029. @@ -941,10 +949,9 @@ static inline void pipelined_receive(struct mqueue_inode_info *info)
  14030. }
  14031. if (msg_insert(sender->msg, info))
  14032. return;
  14033. +
  14034. list_del(&sender->list);
  14035. - sender->state = STATE_PENDING;
  14036. - wake_up_process(sender->task);
  14037. - smp_wmb();
  14038. + wake_q_add(wake_q, sender->task);
  14039. sender->state = STATE_READY;
  14040. }
  14041. @@ -962,6 +969,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
  14042. struct timespec ts;
  14043. struct posix_msg_tree_node *new_leaf = NULL;
  14044. int ret = 0;
  14045. + WAKE_Q(wake_q);
  14046. if (u_abs_timeout) {
  14047. int res = prepare_timeout(u_abs_timeout, &expires, &ts);
  14048. @@ -1045,7 +1053,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
  14049. } else {
  14050. receiver = wq_get_first_waiter(info, RECV);
  14051. if (receiver) {
  14052. - pipelined_send(info, msg_ptr, receiver);
  14053. + pipelined_send(&wake_q, info, msg_ptr, receiver);
  14054. } else {
  14055. /* adds message to the queue */
  14056. ret = msg_insert(msg_ptr, info);
  14057. @@ -1058,6 +1066,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
  14058. }
  14059. out_unlock:
  14060. spin_unlock(&info->lock);
  14061. + wake_up_q(&wake_q);
  14062. out_free:
  14063. if (ret)
  14064. free_msg(msg_ptr);
  14065. @@ -1144,14 +1153,17 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
  14066. msg_ptr = wait.msg;
  14067. }
  14068. } else {
  14069. + WAKE_Q(wake_q);
  14070. +
  14071. msg_ptr = msg_get(info);
  14072. inode->i_atime = inode->i_mtime = inode->i_ctime =
  14073. CURRENT_TIME;
  14074. /* There is now free space in queue. */
  14075. - pipelined_receive(info);
  14076. + pipelined_receive(&wake_q, info);
  14077. spin_unlock(&info->lock);
  14078. + wake_up_q(&wake_q);
  14079. ret = 0;
  14080. }
  14081. if (ret == 0) {
  14082. diff --git a/ipc/msg.c b/ipc/msg.c
  14083. index 3b2b0f5149ab..a75e79ff05ee 100644
  14084. --- a/ipc/msg.c
  14085. +++ b/ipc/msg.c
  14086. @@ -188,6 +188,12 @@ static void expunge_all(struct msg_queue *msq, int res)
  14087. struct msg_receiver *msr, *t;
  14088. list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
  14089. + /*
  14090. + * Make sure that the wakeup doesnt preempt
  14091. + * this CPU prematurely. (on PREEMPT_RT)
  14092. + */
  14093. + preempt_disable_rt();
  14094. +
  14095. msr->r_msg = NULL; /* initialize expunge ordering */
  14096. wake_up_process(msr->r_tsk);
  14097. /*
  14098. @@ -198,6 +204,8 @@ static void expunge_all(struct msg_queue *msq, int res)
  14099. */
  14100. smp_mb();
  14101. msr->r_msg = ERR_PTR(res);
  14102. +
  14103. + preempt_enable_rt();
  14104. }
  14105. }
  14106. @@ -574,6 +582,11 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
  14107. if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
  14108. !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
  14109. msr->r_msgtype, msr->r_mode)) {
  14110. + /*
  14111. + * Make sure that the wakeup doesnt preempt
  14112. + * this CPU prematurely. (on PREEMPT_RT)
  14113. + */
  14114. + preempt_disable_rt();
  14115. list_del(&msr->r_list);
  14116. if (msr->r_maxsize < msg->m_ts) {
  14117. @@ -595,12 +608,13 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
  14118. */
  14119. smp_mb();
  14120. msr->r_msg = msg;
  14121. + preempt_enable_rt();
  14122. return 1;
  14123. }
  14124. + preempt_enable_rt();
  14125. }
  14126. }
  14127. -
  14128. return 0;
  14129. }
  14130. diff --git a/ipc/sem.c b/ipc/sem.c
  14131. index 534caee6bf33..fbfdb0b699e0 100644
  14132. --- a/ipc/sem.c
  14133. +++ b/ipc/sem.c
  14134. @@ -690,6 +690,13 @@ undo:
  14135. static void wake_up_sem_queue_prepare(struct list_head *pt,
  14136. struct sem_queue *q, int error)
  14137. {
  14138. +#ifdef CONFIG_PREEMPT_RT_BASE
  14139. + struct task_struct *p = q->sleeper;
  14140. + get_task_struct(p);
  14141. + q->status = error;
  14142. + wake_up_process(p);
  14143. + put_task_struct(p);
  14144. +#else
  14145. if (list_empty(pt)) {
  14146. /*
  14147. * Hold preempt off so that we don't get preempted and have the
  14148. @@ -701,6 +708,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt,
  14149. q->pid = error;
  14150. list_add_tail(&q->list, pt);
  14151. +#endif
  14152. }
  14153. /**
  14154. @@ -714,6 +722,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt,
  14155. */
  14156. static void wake_up_sem_queue_do(struct list_head *pt)
  14157. {
  14158. +#ifndef CONFIG_PREEMPT_RT_BASE
  14159. struct sem_queue *q, *t;
  14160. int did_something;
  14161. @@ -726,6 +735,7 @@ static void wake_up_sem_queue_do(struct list_head *pt)
  14162. }
  14163. if (did_something)
  14164. preempt_enable();
  14165. +#endif
  14166. }
  14167. static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
  14168. diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
  14169. index 08561f1acd13..c61e9131ecec 100644
  14170. --- a/kernel/Kconfig.locks
  14171. +++ b/kernel/Kconfig.locks
  14172. @@ -225,11 +225,11 @@ config ARCH_SUPPORTS_ATOMIC_RMW
  14173. config MUTEX_SPIN_ON_OWNER
  14174. def_bool y
  14175. - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
  14176. + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
  14177. config RWSEM_SPIN_ON_OWNER
  14178. def_bool y
  14179. - depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
  14180. + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
  14181. config LOCK_SPIN_ON_OWNER
  14182. def_bool y
  14183. diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
  14184. index 3f9c97419f02..11dbe26a8279 100644
  14185. --- a/kernel/Kconfig.preempt
  14186. +++ b/kernel/Kconfig.preempt
  14187. @@ -1,3 +1,16 @@
  14188. +config PREEMPT
  14189. + bool
  14190. + select PREEMPT_COUNT
  14191. +
  14192. +config PREEMPT_RT_BASE
  14193. + bool
  14194. + select PREEMPT
  14195. +
  14196. +config HAVE_PREEMPT_LAZY
  14197. + bool
  14198. +
  14199. +config PREEMPT_LAZY
  14200. + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL
  14201. choice
  14202. prompt "Preemption Model"
  14203. @@ -33,9 +46,9 @@ config PREEMPT_VOLUNTARY
  14204. Select this if you are building a kernel for a desktop system.
  14205. -config PREEMPT
  14206. +config PREEMPT__LL
  14207. bool "Preemptible Kernel (Low-Latency Desktop)"
  14208. - select PREEMPT_COUNT
  14209. + select PREEMPT
  14210. select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
  14211. help
  14212. This option reduces the latency of the kernel by making
  14213. @@ -52,6 +65,22 @@ config PREEMPT
  14214. embedded system with latency requirements in the milliseconds
  14215. range.
  14216. +config PREEMPT_RTB
  14217. + bool "Preemptible Kernel (Basic RT)"
  14218. + select PREEMPT_RT_BASE
  14219. + help
  14220. + This option is basically the same as (Low-Latency Desktop) but
  14221. + enables changes which are preliminary for the full preemptible
  14222. + RT kernel.
  14223. +
  14224. +config PREEMPT_RT_FULL
  14225. + bool "Fully Preemptible Kernel (RT)"
  14226. + depends on IRQ_FORCED_THREADING
  14227. + select PREEMPT_RT_BASE
  14228. + select PREEMPT_RCU
  14229. + help
  14230. + All and everything
  14231. +
  14232. endchoice
  14233. config PREEMPT_COUNT
  14234. diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
  14235. index 83c209d9b17a..972b76bf54b7 100644
  14236. --- a/kernel/bpf/hashtab.c
  14237. +++ b/kernel/bpf/hashtab.c
  14238. @@ -17,7 +17,7 @@
  14239. struct bpf_htab {
  14240. struct bpf_map map;
  14241. struct hlist_head *buckets;
  14242. - spinlock_t lock;
  14243. + raw_spinlock_t lock;
  14244. u32 count; /* number of elements in this hashtable */
  14245. u32 n_buckets; /* number of hash buckets */
  14246. u32 elem_size; /* size of each element in bytes */
  14247. @@ -82,7 +82,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
  14248. for (i = 0; i < htab->n_buckets; i++)
  14249. INIT_HLIST_HEAD(&htab->buckets[i]);
  14250. - spin_lock_init(&htab->lock);
  14251. + raw_spin_lock_init(&htab->lock);
  14252. htab->count = 0;
  14253. htab->elem_size = sizeof(struct htab_elem) +
  14254. @@ -230,7 +230,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
  14255. l_new->hash = htab_map_hash(l_new->key, key_size);
  14256. /* bpf_map_update_elem() can be called in_irq() */
  14257. - spin_lock_irqsave(&htab->lock, flags);
  14258. + raw_spin_lock_irqsave(&htab->lock, flags);
  14259. head = select_bucket(htab, l_new->hash);
  14260. @@ -266,11 +266,11 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
  14261. } else {
  14262. htab->count++;
  14263. }
  14264. - spin_unlock_irqrestore(&htab->lock, flags);
  14265. + raw_spin_unlock_irqrestore(&htab->lock, flags);
  14266. return 0;
  14267. err:
  14268. - spin_unlock_irqrestore(&htab->lock, flags);
  14269. + raw_spin_unlock_irqrestore(&htab->lock, flags);
  14270. kfree(l_new);
  14271. return ret;
  14272. }
  14273. @@ -291,7 +291,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
  14274. hash = htab_map_hash(key, key_size);
  14275. - spin_lock_irqsave(&htab->lock, flags);
  14276. + raw_spin_lock_irqsave(&htab->lock, flags);
  14277. head = select_bucket(htab, hash);
  14278. @@ -304,7 +304,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
  14279. ret = 0;
  14280. }
  14281. - spin_unlock_irqrestore(&htab->lock, flags);
  14282. + raw_spin_unlock_irqrestore(&htab->lock, flags);
  14283. return ret;
  14284. }
  14285. diff --git a/kernel/cgroup.c b/kernel/cgroup.c
  14286. index 3abce1e0f910..d818976f1d62 100644
  14287. --- a/kernel/cgroup.c
  14288. +++ b/kernel/cgroup.c
  14289. @@ -4423,10 +4423,10 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head)
  14290. queue_work(cgroup_destroy_wq, &css->destroy_work);
  14291. }
  14292. -static void css_release_work_fn(struct work_struct *work)
  14293. +static void css_release_work_fn(struct swork_event *sev)
  14294. {
  14295. struct cgroup_subsys_state *css =
  14296. - container_of(work, struct cgroup_subsys_state, destroy_work);
  14297. + container_of(sev, struct cgroup_subsys_state, destroy_swork);
  14298. struct cgroup_subsys *ss = css->ss;
  14299. struct cgroup *cgrp = css->cgroup;
  14300. @@ -4465,8 +4465,8 @@ static void css_release(struct percpu_ref *ref)
  14301. struct cgroup_subsys_state *css =
  14302. container_of(ref, struct cgroup_subsys_state, refcnt);
  14303. - INIT_WORK(&css->destroy_work, css_release_work_fn);
  14304. - queue_work(cgroup_destroy_wq, &css->destroy_work);
  14305. + INIT_SWORK(&css->destroy_swork, css_release_work_fn);
  14306. + swork_queue(&css->destroy_swork);
  14307. }
  14308. static void init_and_link_css(struct cgroup_subsys_state *css,
  14309. @@ -5080,6 +5080,7 @@ static int __init cgroup_wq_init(void)
  14310. */
  14311. cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
  14312. BUG_ON(!cgroup_destroy_wq);
  14313. + BUG_ON(swork_get());
  14314. /*
  14315. * Used to destroy pidlists and separate to serve as flush domain.
  14316. diff --git a/kernel/cpu.c b/kernel/cpu.c
  14317. index 94bbe4695232..0351ac42263e 100644
  14318. --- a/kernel/cpu.c
  14319. +++ b/kernel/cpu.c
  14320. @@ -74,8 +74,8 @@ static struct {
  14321. #endif
  14322. } cpu_hotplug = {
  14323. .active_writer = NULL,
  14324. - .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
  14325. .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
  14326. + .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
  14327. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  14328. .dep_map = {.name = "cpu_hotplug.lock" },
  14329. #endif
  14330. @@ -88,6 +88,289 @@ static struct {
  14331. #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
  14332. #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
  14333. +/**
  14334. + * hotplug_pcp - per cpu hotplug descriptor
  14335. + * @unplug: set when pin_current_cpu() needs to sync tasks
  14336. + * @sync_tsk: the task that waits for tasks to finish pinned sections
  14337. + * @refcount: counter of tasks in pinned sections
  14338. + * @grab_lock: set when the tasks entering pinned sections should wait
  14339. + * @synced: notifier for @sync_tsk to tell cpu_down it's finished
  14340. + * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
  14341. + * @mutex_init: zero if the mutex hasn't been initialized yet.
  14342. + *
  14343. + * Although @unplug and @sync_tsk may point to the same task, the @unplug
  14344. + * is used as a flag and still exists after @sync_tsk has exited and
  14345. + * @sync_tsk set to NULL.
  14346. + */
  14347. +struct hotplug_pcp {
  14348. + struct task_struct *unplug;
  14349. + struct task_struct *sync_tsk;
  14350. + int refcount;
  14351. + int grab_lock;
  14352. + struct completion synced;
  14353. + struct completion unplug_wait;
  14354. +#ifdef CONFIG_PREEMPT_RT_FULL
  14355. + /*
  14356. + * Note, on PREEMPT_RT, the hotplug lock must save the state of
  14357. + * the task, otherwise the mutex will cause the task to fail
  14358. + * to sleep when required. (Because it's called from migrate_disable())
  14359. + *
  14360. + * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
  14361. + * state.
  14362. + */
  14363. + spinlock_t lock;
  14364. +#else
  14365. + struct mutex mutex;
  14366. +#endif
  14367. + int mutex_init;
  14368. +};
  14369. +
  14370. +#ifdef CONFIG_PREEMPT_RT_FULL
  14371. +# define hotplug_lock(hp) rt_spin_lock(&(hp)->lock)
  14372. +# define hotplug_unlock(hp) rt_spin_unlock(&(hp)->lock)
  14373. +#else
  14374. +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
  14375. +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
  14376. +#endif
  14377. +
  14378. +static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
  14379. +
  14380. +/**
  14381. + * pin_current_cpu - Prevent the current cpu from being unplugged
  14382. + *
  14383. + * Lightweight version of get_online_cpus() to prevent cpu from being
  14384. + * unplugged when code runs in a migration disabled region.
  14385. + *
  14386. + * Must be called with preemption disabled (preempt_count = 1)!
  14387. + */
  14388. +void pin_current_cpu(void)
  14389. +{
  14390. + struct hotplug_pcp *hp;
  14391. + int force = 0;
  14392. +
  14393. +retry:
  14394. + hp = this_cpu_ptr(&hotplug_pcp);
  14395. +
  14396. + if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
  14397. + hp->unplug == current) {
  14398. + hp->refcount++;
  14399. + return;
  14400. + }
  14401. + if (hp->grab_lock) {
  14402. + preempt_enable();
  14403. + hotplug_lock(hp);
  14404. + hotplug_unlock(hp);
  14405. + } else {
  14406. + preempt_enable();
  14407. + /*
  14408. + * Try to push this task off of this CPU.
  14409. + */
  14410. + if (!migrate_me()) {
  14411. + preempt_disable();
  14412. + hp = this_cpu_ptr(&hotplug_pcp);
  14413. + if (!hp->grab_lock) {
  14414. + /*
  14415. + * Just let it continue it's already pinned
  14416. + * or about to sleep.
  14417. + */
  14418. + force = 1;
  14419. + goto retry;
  14420. + }
  14421. + preempt_enable();
  14422. + }
  14423. + }
  14424. + preempt_disable();
  14425. + goto retry;
  14426. +}
  14427. +
  14428. +/**
  14429. + * unpin_current_cpu - Allow unplug of current cpu
  14430. + *
  14431. + * Must be called with preemption or interrupts disabled!
  14432. + */
  14433. +void unpin_current_cpu(void)
  14434. +{
  14435. + struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
  14436. +
  14437. + WARN_ON(hp->refcount <= 0);
  14438. +
  14439. + /* This is safe. sync_unplug_thread is pinned to this cpu */
  14440. + if (!--hp->refcount && hp->unplug && hp->unplug != current)
  14441. + wake_up_process(hp->unplug);
  14442. +}
  14443. +
  14444. +static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
  14445. +{
  14446. + set_current_state(TASK_UNINTERRUPTIBLE);
  14447. + while (hp->refcount) {
  14448. + schedule_preempt_disabled();
  14449. + set_current_state(TASK_UNINTERRUPTIBLE);
  14450. + }
  14451. +}
  14452. +
  14453. +static int sync_unplug_thread(void *data)
  14454. +{
  14455. + struct hotplug_pcp *hp = data;
  14456. +
  14457. + wait_for_completion(&hp->unplug_wait);
  14458. + preempt_disable();
  14459. + hp->unplug = current;
  14460. + wait_for_pinned_cpus(hp);
  14461. +
  14462. + /*
  14463. + * This thread will synchronize the cpu_down() with threads
  14464. + * that have pinned the CPU. When the pinned CPU count reaches
  14465. + * zero, we inform the cpu_down code to continue to the next step.
  14466. + */
  14467. + set_current_state(TASK_UNINTERRUPTIBLE);
  14468. + preempt_enable();
  14469. + complete(&hp->synced);
  14470. +
  14471. + /*
  14472. + * If all succeeds, the next step will need tasks to wait till
  14473. + * the CPU is offline before continuing. To do this, the grab_lock
  14474. + * is set and tasks going into pin_current_cpu() will block on the
  14475. + * mutex. But we still need to wait for those that are already in
  14476. + * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
  14477. + * will kick this thread out.
  14478. + */
  14479. + while (!hp->grab_lock && !kthread_should_stop()) {
  14480. + schedule();
  14481. + set_current_state(TASK_UNINTERRUPTIBLE);
  14482. + }
  14483. +
  14484. + /* Make sure grab_lock is seen before we see a stale completion */
  14485. + smp_mb();
  14486. +
  14487. + /*
  14488. + * Now just before cpu_down() enters stop machine, we need to make
  14489. + * sure all tasks that are in pinned CPU sections are out, and new
  14490. + * tasks will now grab the lock, keeping them from entering pinned
  14491. + * CPU sections.
  14492. + */
  14493. + if (!kthread_should_stop()) {
  14494. + preempt_disable();
  14495. + wait_for_pinned_cpus(hp);
  14496. + preempt_enable();
  14497. + complete(&hp->synced);
  14498. + }
  14499. +
  14500. + set_current_state(TASK_UNINTERRUPTIBLE);
  14501. + while (!kthread_should_stop()) {
  14502. + schedule();
  14503. + set_current_state(TASK_UNINTERRUPTIBLE);
  14504. + }
  14505. + set_current_state(TASK_RUNNING);
  14506. +
  14507. + /*
  14508. + * Force this thread off this CPU as it's going down and
  14509. + * we don't want any more work on this CPU.
  14510. + */
  14511. + current->flags &= ~PF_NO_SETAFFINITY;
  14512. + set_cpus_allowed_ptr(current, cpu_present_mask);
  14513. + migrate_me();
  14514. + return 0;
  14515. +}
  14516. +
  14517. +static void __cpu_unplug_sync(struct hotplug_pcp *hp)
  14518. +{
  14519. + wake_up_process(hp->sync_tsk);
  14520. + wait_for_completion(&hp->synced);
  14521. +}
  14522. +
  14523. +static void __cpu_unplug_wait(unsigned int cpu)
  14524. +{
  14525. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  14526. +
  14527. + complete(&hp->unplug_wait);
  14528. + wait_for_completion(&hp->synced);
  14529. +}
  14530. +
  14531. +/*
  14532. + * Start the sync_unplug_thread on the target cpu and wait for it to
  14533. + * complete.
  14534. + */
  14535. +static int cpu_unplug_begin(unsigned int cpu)
  14536. +{
  14537. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  14538. + int err;
  14539. +
  14540. + /* Protected by cpu_hotplug.lock */
  14541. + if (!hp->mutex_init) {
  14542. +#ifdef CONFIG_PREEMPT_RT_FULL
  14543. + spin_lock_init(&hp->lock);
  14544. +#else
  14545. + mutex_init(&hp->mutex);
  14546. +#endif
  14547. + hp->mutex_init = 1;
  14548. + }
  14549. +
  14550. + /* Inform the scheduler to migrate tasks off this CPU */
  14551. + tell_sched_cpu_down_begin(cpu);
  14552. +
  14553. + init_completion(&hp->synced);
  14554. + init_completion(&hp->unplug_wait);
  14555. +
  14556. + hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
  14557. + if (IS_ERR(hp->sync_tsk)) {
  14558. + err = PTR_ERR(hp->sync_tsk);
  14559. + hp->sync_tsk = NULL;
  14560. + return err;
  14561. + }
  14562. + kthread_bind(hp->sync_tsk, cpu);
  14563. +
  14564. + /*
  14565. + * Wait for tasks to get out of the pinned sections,
  14566. + * it's still OK if new tasks enter. Some CPU notifiers will
  14567. + * wait for tasks that are going to enter these sections and
  14568. + * we must not have them block.
  14569. + */
  14570. + wake_up_process(hp->sync_tsk);
  14571. + return 0;
  14572. +}
  14573. +
  14574. +static void cpu_unplug_sync(unsigned int cpu)
  14575. +{
  14576. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  14577. +
  14578. + init_completion(&hp->synced);
  14579. + /* The completion needs to be initialzied before setting grab_lock */
  14580. + smp_wmb();
  14581. +
  14582. + /* Grab the mutex before setting grab_lock */
  14583. + hotplug_lock(hp);
  14584. + hp->grab_lock = 1;
  14585. +
  14586. + /*
  14587. + * The CPU notifiers have been completed.
  14588. + * Wait for tasks to get out of pinned CPU sections and have new
  14589. + * tasks block until the CPU is completely down.
  14590. + */
  14591. + __cpu_unplug_sync(hp);
  14592. +
  14593. + /* All done with the sync thread */
  14594. + kthread_stop(hp->sync_tsk);
  14595. + hp->sync_tsk = NULL;
  14596. +}
  14597. +
  14598. +static void cpu_unplug_done(unsigned int cpu)
  14599. +{
  14600. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  14601. +
  14602. + hp->unplug = NULL;
  14603. + /* Let all tasks know cpu unplug is finished before cleaning up */
  14604. + smp_wmb();
  14605. +
  14606. + if (hp->sync_tsk)
  14607. + kthread_stop(hp->sync_tsk);
  14608. +
  14609. + if (hp->grab_lock) {
  14610. + hotplug_unlock(hp);
  14611. + /* protected by cpu_hotplug.lock */
  14612. + hp->grab_lock = 0;
  14613. + }
  14614. + tell_sched_cpu_down_done(cpu);
  14615. +}
  14616. void get_online_cpus(void)
  14617. {
  14618. @@ -349,13 +632,15 @@ static int __ref take_cpu_down(void *_param)
  14619. /* Requires cpu_add_remove_lock to be held */
  14620. static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
  14621. {
  14622. - int err, nr_calls = 0;
  14623. + int mycpu, err, nr_calls = 0;
  14624. void *hcpu = (void *)(long)cpu;
  14625. unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
  14626. struct take_cpu_down_param tcd_param = {
  14627. .mod = mod,
  14628. .hcpu = hcpu,
  14629. };
  14630. + cpumask_var_t cpumask;
  14631. + cpumask_var_t cpumask_org;
  14632. if (num_online_cpus() == 1)
  14633. return -EBUSY;
  14634. @@ -363,7 +648,34 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
  14635. if (!cpu_online(cpu))
  14636. return -EINVAL;
  14637. + /* Move the downtaker off the unplug cpu */
  14638. + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
  14639. + return -ENOMEM;
  14640. + if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) {
  14641. + free_cpumask_var(cpumask);
  14642. + return -ENOMEM;
  14643. + }
  14644. +
  14645. + cpumask_copy(cpumask_org, tsk_cpus_allowed(current));
  14646. + cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
  14647. + set_cpus_allowed_ptr(current, cpumask);
  14648. + free_cpumask_var(cpumask);
  14649. + migrate_disable();
  14650. + mycpu = smp_processor_id();
  14651. + if (mycpu == cpu) {
  14652. + printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
  14653. + migrate_enable();
  14654. + err = -EBUSY;
  14655. + goto restore_cpus;
  14656. + }
  14657. + migrate_enable();
  14658. +
  14659. cpu_hotplug_begin();
  14660. + err = cpu_unplug_begin(cpu);
  14661. + if (err) {
  14662. + printk("cpu_unplug_begin(%d) failed\n", cpu);
  14663. + goto out_cancel;
  14664. + }
  14665. err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
  14666. if (err) {
  14667. @@ -389,8 +701,12 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
  14668. #endif
  14669. synchronize_rcu();
  14670. + __cpu_unplug_wait(cpu);
  14671. smpboot_park_threads(cpu);
  14672. + /* Notifiers are done. Don't let any more tasks pin this CPU. */
  14673. + cpu_unplug_sync(cpu);
  14674. +
  14675. /*
  14676. * So now all preempt/rcu users must observe !cpu_active().
  14677. */
  14678. @@ -427,9 +743,14 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
  14679. check_for_tasks(cpu);
  14680. out_release:
  14681. + cpu_unplug_done(cpu);
  14682. +out_cancel:
  14683. cpu_hotplug_done();
  14684. if (!err)
  14685. cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
  14686. +restore_cpus:
  14687. + set_cpus_allowed_ptr(current, cpumask_org);
  14688. + free_cpumask_var(cpumask_org);
  14689. return err;
  14690. }
  14691. diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
  14692. index fc1ef736253c..83c666537a7a 100644
  14693. --- a/kernel/debug/kdb/kdb_io.c
  14694. +++ b/kernel/debug/kdb/kdb_io.c
  14695. @@ -554,7 +554,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
  14696. int linecount;
  14697. int colcount;
  14698. int logging, saved_loglevel = 0;
  14699. - int saved_trap_printk;
  14700. int got_printf_lock = 0;
  14701. int retlen = 0;
  14702. int fnd, len;
  14703. @@ -565,8 +564,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
  14704. unsigned long uninitialized_var(flags);
  14705. preempt_disable();
  14706. - saved_trap_printk = kdb_trap_printk;
  14707. - kdb_trap_printk = 0;
  14708. /* Serialize kdb_printf if multiple cpus try to write at once.
  14709. * But if any cpu goes recursive in kdb, just print the output,
  14710. @@ -855,7 +852,6 @@ kdb_print_out:
  14711. } else {
  14712. __release(kdb_printf_lock);
  14713. }
  14714. - kdb_trap_printk = saved_trap_printk;
  14715. preempt_enable();
  14716. return retlen;
  14717. }
  14718. @@ -865,9 +861,11 @@ int kdb_printf(const char *fmt, ...)
  14719. va_list ap;
  14720. int r;
  14721. + kdb_trap_printk++;
  14722. va_start(ap, fmt);
  14723. r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap);
  14724. va_end(ap);
  14725. + kdb_trap_printk--;
  14726. return r;
  14727. }
  14728. diff --git a/kernel/events/core.c b/kernel/events/core.c
  14729. index 6da64f0d0630..aa35b5850d36 100644
  14730. --- a/kernel/events/core.c
  14731. +++ b/kernel/events/core.c
  14732. @@ -6925,6 +6925,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
  14733. hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  14734. hwc->hrtimer.function = perf_swevent_hrtimer;
  14735. + hwc->hrtimer.irqsafe = 1;
  14736. /*
  14737. * Since hrtimers have a fixed rate, we can do a static freq->period
  14738. diff --git a/kernel/exit.c b/kernel/exit.c
  14739. index 819f51ec4f55..44eb884773e0 100644
  14740. --- a/kernel/exit.c
  14741. +++ b/kernel/exit.c
  14742. @@ -144,7 +144,7 @@ static void __exit_signal(struct task_struct *tsk)
  14743. * Do this under ->siglock, we can race with another thread
  14744. * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
  14745. */
  14746. - flush_sigqueue(&tsk->pending);
  14747. + flush_task_sigqueue(tsk);
  14748. tsk->sighand = NULL;
  14749. spin_unlock(&sighand->siglock);
  14750. diff --git a/kernel/fork.c b/kernel/fork.c
  14751. index 8209fa2d36ef..8f8a0a13d212 100644
  14752. --- a/kernel/fork.c
  14753. +++ b/kernel/fork.c
  14754. @@ -108,7 +108,7 @@ int max_threads; /* tunable limit on nr_threads */
  14755. DEFINE_PER_CPU(unsigned long, process_counts) = 0;
  14756. -__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
  14757. +DEFINE_RWLOCK(tasklist_lock); /* outer */
  14758. #ifdef CONFIG_PROVE_RCU
  14759. int lockdep_tasklist_lock_is_held(void)
  14760. @@ -244,7 +244,9 @@ static inline void put_signal_struct(struct signal_struct *sig)
  14761. if (atomic_dec_and_test(&sig->sigcnt))
  14762. free_signal_struct(sig);
  14763. }
  14764. -
  14765. +#ifdef CONFIG_PREEMPT_RT_BASE
  14766. +static
  14767. +#endif
  14768. void __put_task_struct(struct task_struct *tsk)
  14769. {
  14770. WARN_ON(!tsk->exit_state);
  14771. @@ -260,7 +262,18 @@ void __put_task_struct(struct task_struct *tsk)
  14772. if (!profile_handoff_task(tsk))
  14773. free_task(tsk);
  14774. }
  14775. +#ifndef CONFIG_PREEMPT_RT_BASE
  14776. EXPORT_SYMBOL_GPL(__put_task_struct);
  14777. +#else
  14778. +void __put_task_struct_cb(struct rcu_head *rhp)
  14779. +{
  14780. + struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu);
  14781. +
  14782. + __put_task_struct(tsk);
  14783. +
  14784. +}
  14785. +EXPORT_SYMBOL_GPL(__put_task_struct_cb);
  14786. +#endif
  14787. void __init __weak arch_task_cache_init(void) { }
  14788. @@ -374,6 +387,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
  14789. #endif
  14790. tsk->splice_pipe = NULL;
  14791. tsk->task_frag.page = NULL;
  14792. + tsk->wake_q.next = NULL;
  14793. account_kernel_stack(ti, 1);
  14794. @@ -680,6 +694,19 @@ void __mmdrop(struct mm_struct *mm)
  14795. }
  14796. EXPORT_SYMBOL_GPL(__mmdrop);
  14797. +#ifdef CONFIG_PREEMPT_RT_BASE
  14798. +/*
  14799. + * RCU callback for delayed mm drop. Not strictly rcu, but we don't
  14800. + * want another facility to make this work.
  14801. + */
  14802. +void __mmdrop_delayed(struct rcu_head *rhp)
  14803. +{
  14804. + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
  14805. +
  14806. + __mmdrop(mm);
  14807. +}
  14808. +#endif
  14809. +
  14810. /*
  14811. * Decrement the use count and release all resources for an mm.
  14812. */
  14813. @@ -1214,6 +1241,9 @@ static void rt_mutex_init_task(struct task_struct *p)
  14814. */
  14815. static void posix_cpu_timers_init(struct task_struct *tsk)
  14816. {
  14817. +#ifdef CONFIG_PREEMPT_RT_BASE
  14818. + tsk->posix_timer_list = NULL;
  14819. +#endif
  14820. tsk->cputime_expires.prof_exp = 0;
  14821. tsk->cputime_expires.virt_exp = 0;
  14822. tsk->cputime_expires.sched_exp = 0;
  14823. @@ -1338,6 +1368,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
  14824. spin_lock_init(&p->alloc_lock);
  14825. init_sigpending(&p->pending);
  14826. + p->sigqueue_cache = NULL;
  14827. p->utime = p->stime = p->gtime = 0;
  14828. p->utimescaled = p->stimescaled = 0;
  14829. @@ -1345,7 +1376,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
  14830. p->prev_cputime.utime = p->prev_cputime.stime = 0;
  14831. #endif
  14832. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  14833. - seqlock_init(&p->vtime_seqlock);
  14834. + raw_spin_lock_init(&p->vtime_lock);
  14835. + seqcount_init(&p->vtime_seq);
  14836. p->vtime_snap = 0;
  14837. p->vtime_snap_whence = VTIME_SLEEPING;
  14838. #endif
  14839. @@ -1396,6 +1428,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
  14840. p->hardirq_context = 0;
  14841. p->softirq_context = 0;
  14842. #endif
  14843. +
  14844. + p->pagefault_disabled = 0;
  14845. +
  14846. #ifdef CONFIG_LOCKDEP
  14847. p->lockdep_depth = 0; /* no locks held yet */
  14848. p->curr_chain_key = 0;
  14849. diff --git a/kernel/futex.c b/kernel/futex.c
  14850. index 2214b70f1910..70ba363359a5 100644
  14851. --- a/kernel/futex.c
  14852. +++ b/kernel/futex.c
  14853. @@ -738,7 +738,9 @@ void exit_pi_state_list(struct task_struct *curr)
  14854. * task still owns the PI-state:
  14855. */
  14856. if (head->next != next) {
  14857. + raw_spin_unlock_irq(&curr->pi_lock);
  14858. spin_unlock(&hb->lock);
  14859. + raw_spin_lock_irq(&curr->pi_lock);
  14860. continue;
  14861. }
  14862. @@ -1090,9 +1092,11 @@ static void __unqueue_futex(struct futex_q *q)
  14863. /*
  14864. * The hash bucket lock must be held when this is called.
  14865. - * Afterwards, the futex_q must not be accessed.
  14866. + * Afterwards, the futex_q must not be accessed. Callers
  14867. + * must ensure to later call wake_up_q() for the actual
  14868. + * wakeups to occur.
  14869. */
  14870. -static void wake_futex(struct futex_q *q)
  14871. +static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
  14872. {
  14873. struct task_struct *p = q->task;
  14874. @@ -1100,14 +1104,10 @@ static void wake_futex(struct futex_q *q)
  14875. return;
  14876. /*
  14877. - * We set q->lock_ptr = NULL _before_ we wake up the task. If
  14878. - * a non-futex wake up happens on another CPU then the task
  14879. - * might exit and p would dereference a non-existing task
  14880. - * struct. Prevent this by holding a reference on p across the
  14881. - * wake up.
  14882. + * Queue the task for later wakeup for after we've released
  14883. + * the hb->lock. wake_q_add() grabs reference to p.
  14884. */
  14885. - get_task_struct(p);
  14886. -
  14887. + wake_q_add(wake_q, p);
  14888. __unqueue_futex(q);
  14889. /*
  14890. * The waiting task can free the futex_q as soon as
  14891. @@ -1117,16 +1117,15 @@ static void wake_futex(struct futex_q *q)
  14892. */
  14893. smp_wmb();
  14894. q->lock_ptr = NULL;
  14895. -
  14896. - wake_up_state(p, TASK_NORMAL);
  14897. - put_task_struct(p);
  14898. }
  14899. -static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
  14900. +static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
  14901. + struct futex_hash_bucket *hb)
  14902. {
  14903. struct task_struct *new_owner;
  14904. struct futex_pi_state *pi_state = this->pi_state;
  14905. u32 uninitialized_var(curval), newval;
  14906. + bool deboost;
  14907. int ret = 0;
  14908. if (!pi_state)
  14909. @@ -1188,7 +1187,17 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
  14910. raw_spin_unlock_irq(&new_owner->pi_lock);
  14911. raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
  14912. - rt_mutex_unlock(&pi_state->pi_mutex);
  14913. +
  14914. + deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex);
  14915. +
  14916. + /*
  14917. + * We deboost after dropping hb->lock. That prevents a double
  14918. + * wakeup on RT.
  14919. + */
  14920. + spin_unlock(&hb->lock);
  14921. +
  14922. + if (deboost)
  14923. + rt_mutex_adjust_prio(current);
  14924. return 0;
  14925. }
  14926. @@ -1227,6 +1236,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
  14927. struct futex_q *this, *next;
  14928. union futex_key key = FUTEX_KEY_INIT;
  14929. int ret;
  14930. + WAKE_Q(wake_q);
  14931. if (!bitset)
  14932. return -EINVAL;
  14933. @@ -1254,13 +1264,14 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
  14934. if (!(this->bitset & bitset))
  14935. continue;
  14936. - wake_futex(this);
  14937. + mark_wake_futex(&wake_q, this);
  14938. if (++ret >= nr_wake)
  14939. break;
  14940. }
  14941. }
  14942. spin_unlock(&hb->lock);
  14943. + wake_up_q(&wake_q);
  14944. out_put_key:
  14945. put_futex_key(&key);
  14946. out:
  14947. @@ -1279,6 +1290,7 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
  14948. struct futex_hash_bucket *hb1, *hb2;
  14949. struct futex_q *this, *next;
  14950. int ret, op_ret;
  14951. + WAKE_Q(wake_q);
  14952. retry:
  14953. ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
  14954. @@ -1330,7 +1342,7 @@ retry_private:
  14955. ret = -EINVAL;
  14956. goto out_unlock;
  14957. }
  14958. - wake_futex(this);
  14959. + mark_wake_futex(&wake_q, this);
  14960. if (++ret >= nr_wake)
  14961. break;
  14962. }
  14963. @@ -1344,7 +1356,7 @@ retry_private:
  14964. ret = -EINVAL;
  14965. goto out_unlock;
  14966. }
  14967. - wake_futex(this);
  14968. + mark_wake_futex(&wake_q, this);
  14969. if (++op_ret >= nr_wake2)
  14970. break;
  14971. }
  14972. @@ -1354,6 +1366,7 @@ retry_private:
  14973. out_unlock:
  14974. double_unlock_hb(hb1, hb2);
  14975. + wake_up_q(&wake_q);
  14976. out_put_keys:
  14977. put_futex_key(&key2);
  14978. out_put_key1:
  14979. @@ -1513,6 +1526,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
  14980. struct futex_pi_state *pi_state = NULL;
  14981. struct futex_hash_bucket *hb1, *hb2;
  14982. struct futex_q *this, *next;
  14983. + WAKE_Q(wake_q);
  14984. if (requeue_pi) {
  14985. /*
  14986. @@ -1689,7 +1703,7 @@ retry_private:
  14987. * woken by futex_unlock_pi().
  14988. */
  14989. if (++task_count <= nr_wake && !requeue_pi) {
  14990. - wake_futex(this);
  14991. + mark_wake_futex(&wake_q, this);
  14992. continue;
  14993. }
  14994. @@ -1715,6 +1729,16 @@ retry_private:
  14995. requeue_pi_wake_futex(this, &key2, hb2);
  14996. drop_count++;
  14997. continue;
  14998. + } else if (ret == -EAGAIN) {
  14999. + /*
  15000. + * Waiter was woken by timeout or
  15001. + * signal and has set pi_blocked_on to
  15002. + * PI_WAKEUP_INPROGRESS before we
  15003. + * tried to enqueue it on the rtmutex.
  15004. + */
  15005. + this->pi_state = NULL;
  15006. + free_pi_state(pi_state);
  15007. + continue;
  15008. } else if (ret) {
  15009. /* -EDEADLK */
  15010. this->pi_state = NULL;
  15011. @@ -1729,6 +1753,7 @@ retry_private:
  15012. out_unlock:
  15013. free_pi_state(pi_state);
  15014. double_unlock_hb(hb1, hb2);
  15015. + wake_up_q(&wake_q);
  15016. hb_waiters_dec(hb2);
  15017. /*
  15018. @@ -2422,13 +2447,22 @@ retry:
  15019. */
  15020. match = futex_top_waiter(hb, &key);
  15021. if (match) {
  15022. - ret = wake_futex_pi(uaddr, uval, match);
  15023. + ret = wake_futex_pi(uaddr, uval, match, hb);
  15024. +
  15025. + /*
  15026. + * In case of success wake_futex_pi dropped the hash
  15027. + * bucket lock.
  15028. + */
  15029. + if (!ret)
  15030. + goto out_putkey;
  15031. +
  15032. /*
  15033. * The atomic access to the futex value generated a
  15034. * pagefault, so retry the user-access and the wakeup:
  15035. */
  15036. if (ret == -EFAULT)
  15037. goto pi_faulted;
  15038. +
  15039. /*
  15040. * A unconditional UNLOCK_PI op raced against a waiter
  15041. * setting the FUTEX_WAITERS bit. Try again.
  15042. @@ -2438,6 +2472,11 @@ retry:
  15043. put_futex_key(&key);
  15044. goto retry;
  15045. }
  15046. +
  15047. + /*
  15048. + * wake_futex_pi has detected invalid state. Tell user
  15049. + * space.
  15050. + */
  15051. goto out_unlock;
  15052. }
  15053. @@ -2458,6 +2497,7 @@ retry:
  15054. out_unlock:
  15055. spin_unlock(&hb->lock);
  15056. +out_putkey:
  15057. put_futex_key(&key);
  15058. return ret;
  15059. @@ -2568,7 +2608,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  15060. struct hrtimer_sleeper timeout, *to = NULL;
  15061. struct rt_mutex_waiter rt_waiter;
  15062. struct rt_mutex *pi_mutex = NULL;
  15063. - struct futex_hash_bucket *hb;
  15064. + struct futex_hash_bucket *hb, *hb2;
  15065. union futex_key key2 = FUTEX_KEY_INIT;
  15066. struct futex_q q = futex_q_init;
  15067. int res, ret;
  15068. @@ -2593,10 +2633,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  15069. * The waiter is allocated on our stack, manipulated by the requeue
  15070. * code while we sleep on uaddr.
  15071. */
  15072. - debug_rt_mutex_init_waiter(&rt_waiter);
  15073. - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
  15074. - RB_CLEAR_NODE(&rt_waiter.tree_entry);
  15075. - rt_waiter.task = NULL;
  15076. + rt_mutex_init_waiter(&rt_waiter, false);
  15077. ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
  15078. if (unlikely(ret != 0))
  15079. @@ -2627,20 +2664,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  15080. /* Queue the futex_q, drop the hb lock, wait for wakeup. */
  15081. futex_wait_queue_me(hb, &q, to);
  15082. - spin_lock(&hb->lock);
  15083. - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  15084. - spin_unlock(&hb->lock);
  15085. - if (ret)
  15086. - goto out_put_keys;
  15087. + /*
  15088. + * On RT we must avoid races with requeue and trying to block
  15089. + * on two mutexes (hb->lock and uaddr2's rtmutex) by
  15090. + * serializing access to pi_blocked_on with pi_lock.
  15091. + */
  15092. + raw_spin_lock_irq(&current->pi_lock);
  15093. + if (current->pi_blocked_on) {
  15094. + /*
  15095. + * We have been requeued or are in the process of
  15096. + * being requeued.
  15097. + */
  15098. + raw_spin_unlock_irq(&current->pi_lock);
  15099. + } else {
  15100. + /*
  15101. + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS
  15102. + * prevents a concurrent requeue from moving us to the
  15103. + * uaddr2 rtmutex. After that we can safely acquire
  15104. + * (and possibly block on) hb->lock.
  15105. + */
  15106. + current->pi_blocked_on = PI_WAKEUP_INPROGRESS;
  15107. + raw_spin_unlock_irq(&current->pi_lock);
  15108. +
  15109. + spin_lock(&hb->lock);
  15110. +
  15111. + /*
  15112. + * Clean up pi_blocked_on. We might leak it otherwise
  15113. + * when we succeeded with the hb->lock in the fast
  15114. + * path.
  15115. + */
  15116. + raw_spin_lock_irq(&current->pi_lock);
  15117. + current->pi_blocked_on = NULL;
  15118. + raw_spin_unlock_irq(&current->pi_lock);
  15119. +
  15120. + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  15121. + spin_unlock(&hb->lock);
  15122. + if (ret)
  15123. + goto out_put_keys;
  15124. + }
  15125. /*
  15126. - * In order for us to be here, we know our q.key == key2, and since
  15127. - * we took the hb->lock above, we also know that futex_requeue() has
  15128. - * completed and we no longer have to concern ourselves with a wakeup
  15129. - * race with the atomic proxy lock acquisition by the requeue code. The
  15130. - * futex_requeue dropped our key1 reference and incremented our key2
  15131. - * reference count.
  15132. + * In order to be here, we have either been requeued, are in
  15133. + * the process of being requeued, or requeue successfully
  15134. + * acquired uaddr2 on our behalf. If pi_blocked_on was
  15135. + * non-null above, we may be racing with a requeue. Do not
  15136. + * rely on q->lock_ptr to be hb2->lock until after blocking on
  15137. + * hb->lock or hb2->lock. The futex_requeue dropped our key1
  15138. + * reference and incremented our key2 reference count.
  15139. */
  15140. + hb2 = hash_futex(&key2);
  15141. /* Check if the requeue code acquired the second futex for us. */
  15142. if (!q.rt_waiter) {
  15143. @@ -2649,14 +2721,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  15144. * did a lock-steal - fix up the PI-state in that case.
  15145. */
  15146. if (q.pi_state && (q.pi_state->owner != current)) {
  15147. - spin_lock(q.lock_ptr);
  15148. + spin_lock(&hb2->lock);
  15149. + BUG_ON(&hb2->lock != q.lock_ptr);
  15150. ret = fixup_pi_state_owner(uaddr2, &q, current);
  15151. /*
  15152. * Drop the reference to the pi state which
  15153. * the requeue_pi() code acquired for us.
  15154. */
  15155. free_pi_state(q.pi_state);
  15156. - spin_unlock(q.lock_ptr);
  15157. + spin_unlock(&hb2->lock);
  15158. }
  15159. } else {
  15160. /*
  15161. @@ -2669,7 +2742,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  15162. ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
  15163. debug_rt_mutex_free_waiter(&rt_waiter);
  15164. - spin_lock(q.lock_ptr);
  15165. + spin_lock(&hb2->lock);
  15166. + BUG_ON(&hb2->lock != q.lock_ptr);
  15167. /*
  15168. * Fixup the pi_state owner and possibly acquire the lock if we
  15169. * haven't already.
  15170. diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
  15171. index 635480270858..26a63672c263 100644
  15172. --- a/kernel/irq/handle.c
  15173. +++ b/kernel/irq/handle.c
  15174. @@ -133,6 +133,8 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
  15175. irqreturn_t
  15176. handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
  15177. {
  15178. + struct pt_regs *regs = get_irq_regs();
  15179. + u64 ip = regs ? instruction_pointer(regs) : 0;
  15180. irqreturn_t retval = IRQ_NONE;
  15181. unsigned int flags = 0, irq = desc->irq_data.irq;
  15182. @@ -173,7 +175,11 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
  15183. action = action->next;
  15184. } while (action);
  15185. - add_interrupt_randomness(irq, flags);
  15186. +#ifndef CONFIG_PREEMPT_RT_FULL
  15187. + add_interrupt_randomness(irq, flags, ip);
  15188. +#else
  15189. + desc->random_ip = ip;
  15190. +#endif
  15191. if (!noirqdebug)
  15192. note_interrupt(irq, desc, retval);
  15193. diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
  15194. index e68932bb308e..79c55c26eaee 100644
  15195. --- a/kernel/irq/manage.c
  15196. +++ b/kernel/irq/manage.c
  15197. @@ -22,6 +22,7 @@
  15198. #include "internals.h"
  15199. #ifdef CONFIG_IRQ_FORCED_THREADING
  15200. +# ifndef CONFIG_PREEMPT_RT_BASE
  15201. __read_mostly bool force_irqthreads;
  15202. static int __init setup_forced_irqthreads(char *arg)
  15203. @@ -30,6 +31,7 @@ static int __init setup_forced_irqthreads(char *arg)
  15204. return 0;
  15205. }
  15206. early_param("threadirqs", setup_forced_irqthreads);
  15207. +# endif
  15208. #endif
  15209. static void __synchronize_hardirq(struct irq_desc *desc)
  15210. @@ -179,6 +181,62 @@ static inline void
  15211. irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
  15212. #endif
  15213. +#ifdef CONFIG_PREEMPT_RT_FULL
  15214. +static void _irq_affinity_notify(struct irq_affinity_notify *notify);
  15215. +static struct task_struct *set_affinity_helper;
  15216. +static LIST_HEAD(affinity_list);
  15217. +static DEFINE_RAW_SPINLOCK(affinity_list_lock);
  15218. +
  15219. +static int set_affinity_thread(void *unused)
  15220. +{
  15221. + while (1) {
  15222. + struct irq_affinity_notify *notify;
  15223. + int empty;
  15224. +
  15225. + set_current_state(TASK_INTERRUPTIBLE);
  15226. +
  15227. + raw_spin_lock_irq(&affinity_list_lock);
  15228. + empty = list_empty(&affinity_list);
  15229. + raw_spin_unlock_irq(&affinity_list_lock);
  15230. +
  15231. + if (empty)
  15232. + schedule();
  15233. + if (kthread_should_stop())
  15234. + break;
  15235. + set_current_state(TASK_RUNNING);
  15236. +try_next:
  15237. + notify = NULL;
  15238. +
  15239. + raw_spin_lock_irq(&affinity_list_lock);
  15240. + if (!list_empty(&affinity_list)) {
  15241. + notify = list_first_entry(&affinity_list,
  15242. + struct irq_affinity_notify, list);
  15243. + list_del_init(&notify->list);
  15244. + }
  15245. + raw_spin_unlock_irq(&affinity_list_lock);
  15246. +
  15247. + if (!notify)
  15248. + continue;
  15249. + _irq_affinity_notify(notify);
  15250. + goto try_next;
  15251. + }
  15252. + return 0;
  15253. +}
  15254. +
  15255. +static void init_helper_thread(void)
  15256. +{
  15257. + if (set_affinity_helper)
  15258. + return;
  15259. + set_affinity_helper = kthread_run(set_affinity_thread, NULL,
  15260. + "affinity-cb");
  15261. + WARN_ON(IS_ERR(set_affinity_helper));
  15262. +}
  15263. +#else
  15264. +
  15265. +static inline void init_helper_thread(void) { }
  15266. +
  15267. +#endif
  15268. +
  15269. int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
  15270. bool force)
  15271. {
  15272. @@ -218,7 +276,17 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
  15273. if (desc->affinity_notify) {
  15274. kref_get(&desc->affinity_notify->kref);
  15275. +
  15276. +#ifdef CONFIG_PREEMPT_RT_FULL
  15277. + raw_spin_lock(&affinity_list_lock);
  15278. + if (list_empty(&desc->affinity_notify->list))
  15279. + list_add_tail(&affinity_list,
  15280. + &desc->affinity_notify->list);
  15281. + raw_spin_unlock(&affinity_list_lock);
  15282. + wake_up_process(set_affinity_helper);
  15283. +#else
  15284. schedule_work(&desc->affinity_notify->work);
  15285. +#endif
  15286. }
  15287. irqd_set(data, IRQD_AFFINITY_SET);
  15288. @@ -256,10 +324,8 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
  15289. }
  15290. EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
  15291. -static void irq_affinity_notify(struct work_struct *work)
  15292. +static void _irq_affinity_notify(struct irq_affinity_notify *notify)
  15293. {
  15294. - struct irq_affinity_notify *notify =
  15295. - container_of(work, struct irq_affinity_notify, work);
  15296. struct irq_desc *desc = irq_to_desc(notify->irq);
  15297. cpumask_var_t cpumask;
  15298. unsigned long flags;
  15299. @@ -281,6 +347,13 @@ out:
  15300. kref_put(&notify->kref, notify->release);
  15301. }
  15302. +static void irq_affinity_notify(struct work_struct *work)
  15303. +{
  15304. + struct irq_affinity_notify *notify =
  15305. + container_of(work, struct irq_affinity_notify, work);
  15306. + _irq_affinity_notify(notify);
  15307. +}
  15308. +
  15309. /**
  15310. * irq_set_affinity_notifier - control notification of IRQ affinity changes
  15311. * @irq: Interrupt for which to enable/disable notification
  15312. @@ -310,6 +383,8 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
  15313. notify->irq = irq;
  15314. kref_init(&notify->kref);
  15315. INIT_WORK(&notify->work, irq_affinity_notify);
  15316. + INIT_LIST_HEAD(&notify->list);
  15317. + init_helper_thread();
  15318. }
  15319. raw_spin_lock_irqsave(&desc->lock, flags);
  15320. @@ -697,6 +772,12 @@ static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id)
  15321. return IRQ_NONE;
  15322. }
  15323. +static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id)
  15324. +{
  15325. + WARN(1, "Secondary action handler called for irq %d\n", irq);
  15326. + return IRQ_NONE;
  15327. +}
  15328. +
  15329. static int irq_wait_for_interrupt(struct irqaction *action)
  15330. {
  15331. set_current_state(TASK_INTERRUPTIBLE);
  15332. @@ -723,7 +804,8 @@ static int irq_wait_for_interrupt(struct irqaction *action)
  15333. static void irq_finalize_oneshot(struct irq_desc *desc,
  15334. struct irqaction *action)
  15335. {
  15336. - if (!(desc->istate & IRQS_ONESHOT))
  15337. + if (!(desc->istate & IRQS_ONESHOT) ||
  15338. + action->handler == irq_forced_secondary_handler)
  15339. return;
  15340. again:
  15341. chip_bus_lock(desc);
  15342. @@ -825,7 +907,15 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
  15343. local_bh_disable();
  15344. ret = action->thread_fn(action->irq, action->dev_id);
  15345. irq_finalize_oneshot(desc, action);
  15346. - local_bh_enable();
  15347. + /*
  15348. + * Interrupts which have real time requirements can be set up
  15349. + * to avoid softirq processing in the thread handler. This is
  15350. + * safe as these interrupts do not raise soft interrupts.
  15351. + */
  15352. + if (irq_settings_no_softirq_call(desc))
  15353. + _local_bh_enable();
  15354. + else
  15355. + local_bh_enable();
  15356. return ret;
  15357. }
  15358. @@ -877,6 +967,18 @@ static void irq_thread_dtor(struct callback_head *unused)
  15359. irq_finalize_oneshot(desc, action);
  15360. }
  15361. +static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action)
  15362. +{
  15363. + struct irqaction *secondary = action->secondary;
  15364. +
  15365. + if (WARN_ON_ONCE(!secondary))
  15366. + return;
  15367. +
  15368. + raw_spin_lock_irq(&desc->lock);
  15369. + __irq_wake_thread(desc, secondary);
  15370. + raw_spin_unlock_irq(&desc->lock);
  15371. +}
  15372. +
  15373. /*
  15374. * Interrupt handler thread
  15375. */
  15376. @@ -907,7 +1009,15 @@ static int irq_thread(void *data)
  15377. action_ret = handler_fn(desc, action);
  15378. if (action_ret == IRQ_HANDLED)
  15379. atomic_inc(&desc->threads_handled);
  15380. -
  15381. + if (action_ret == IRQ_WAKE_THREAD)
  15382. + irq_wake_secondary(desc, action);
  15383. +
  15384. +#ifdef CONFIG_PREEMPT_RT_FULL
  15385. + migrate_disable();
  15386. + add_interrupt_randomness(action->irq, 0,
  15387. + desc->random_ip ^ (unsigned long) action);
  15388. + migrate_enable();
  15389. +#endif
  15390. wake_threads_waitq(desc);
  15391. }
  15392. @@ -951,20 +1061,36 @@ void irq_wake_thread(unsigned int irq, void *dev_id)
  15393. }
  15394. EXPORT_SYMBOL_GPL(irq_wake_thread);
  15395. -static void irq_setup_forced_threading(struct irqaction *new)
  15396. +static int irq_setup_forced_threading(struct irqaction *new)
  15397. {
  15398. if (!force_irqthreads)
  15399. - return;
  15400. + return 0;
  15401. if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
  15402. - return;
  15403. + return 0;
  15404. new->flags |= IRQF_ONESHOT;
  15405. - if (!new->thread_fn) {
  15406. - set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
  15407. - new->thread_fn = new->handler;
  15408. - new->handler = irq_default_primary_handler;
  15409. + /*
  15410. + * Handle the case where we have a real primary handler and a
  15411. + * thread handler. We force thread them as well by creating a
  15412. + * secondary action.
  15413. + */
  15414. + if (new->handler != irq_default_primary_handler && new->thread_fn) {
  15415. + /* Allocate the secondary action */
  15416. + new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
  15417. + if (!new->secondary)
  15418. + return -ENOMEM;
  15419. + new->secondary->handler = irq_forced_secondary_handler;
  15420. + new->secondary->thread_fn = new->thread_fn;
  15421. + new->secondary->dev_id = new->dev_id;
  15422. + new->secondary->irq = new->irq;
  15423. + new->secondary->name = new->name;
  15424. }
  15425. + /* Deal with the primary handler */
  15426. + set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
  15427. + new->thread_fn = new->handler;
  15428. + new->handler = irq_default_primary_handler;
  15429. + return 0;
  15430. }
  15431. static int irq_request_resources(struct irq_desc *desc)
  15432. @@ -984,6 +1110,48 @@ static void irq_release_resources(struct irq_desc *desc)
  15433. c->irq_release_resources(d);
  15434. }
  15435. +static int
  15436. +setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
  15437. +{
  15438. + struct task_struct *t;
  15439. + struct sched_param param = {
  15440. + .sched_priority = MAX_USER_RT_PRIO/2,
  15441. + };
  15442. +
  15443. + if (!secondary) {
  15444. + t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
  15445. + new->name);
  15446. + } else {
  15447. + t = kthread_create(irq_thread, new, "irq/%d-s-%s", irq,
  15448. + new->name);
  15449. + param.sched_priority += 1;
  15450. + }
  15451. +
  15452. + if (IS_ERR(t))
  15453. + return PTR_ERR(t);
  15454. +
  15455. + sched_setscheduler_nocheck(t, SCHED_FIFO, &param);
  15456. +
  15457. + /*
  15458. + * We keep the reference to the task struct even if
  15459. + * the thread dies to avoid that the interrupt code
  15460. + * references an already freed task_struct.
  15461. + */
  15462. + get_task_struct(t);
  15463. + new->thread = t;
  15464. + /*
  15465. + * Tell the thread to set its affinity. This is
  15466. + * important for shared interrupt handlers as we do
  15467. + * not invoke setup_affinity() for the secondary
  15468. + * handlers as everything is already set up. Even for
  15469. + * interrupts marked with IRQF_NO_BALANCE this is
  15470. + * correct as we want the thread to move to the cpu(s)
  15471. + * on which the requesting code placed the interrupt.
  15472. + */
  15473. + set_bit(IRQTF_AFFINITY, &new->thread_flags);
  15474. + return 0;
  15475. +}
  15476. +
  15477. /*
  15478. * Internal function to register an irqaction - typically used to
  15479. * allocate special interrupts that are part of the architecture.
  15480. @@ -1004,6 +1172,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
  15481. if (!try_module_get(desc->owner))
  15482. return -ENODEV;
  15483. + new->irq = irq;
  15484. +
  15485. /*
  15486. * Check whether the interrupt nests into another interrupt
  15487. * thread.
  15488. @@ -1021,8 +1191,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
  15489. */
  15490. new->handler = irq_nested_primary_handler;
  15491. } else {
  15492. - if (irq_settings_can_thread(desc))
  15493. - irq_setup_forced_threading(new);
  15494. + if (irq_settings_can_thread(desc)) {
  15495. + ret = irq_setup_forced_threading(new);
  15496. + if (ret)
  15497. + goto out_mput;
  15498. + }
  15499. }
  15500. /*
  15501. @@ -1031,37 +1204,14 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
  15502. * thread.
  15503. */
  15504. if (new->thread_fn && !nested) {
  15505. - struct task_struct *t;
  15506. - static const struct sched_param param = {
  15507. - .sched_priority = MAX_USER_RT_PRIO/2,
  15508. - };
  15509. -
  15510. - t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
  15511. - new->name);
  15512. - if (IS_ERR(t)) {
  15513. - ret = PTR_ERR(t);
  15514. + ret = setup_irq_thread(new, irq, false);
  15515. + if (ret)
  15516. goto out_mput;
  15517. + if (new->secondary) {
  15518. + ret = setup_irq_thread(new->secondary, irq, true);
  15519. + if (ret)
  15520. + goto out_thread;
  15521. }
  15522. -
  15523. - sched_setscheduler_nocheck(t, SCHED_FIFO, &param);
  15524. -
  15525. - /*
  15526. - * We keep the reference to the task struct even if
  15527. - * the thread dies to avoid that the interrupt code
  15528. - * references an already freed task_struct.
  15529. - */
  15530. - get_task_struct(t);
  15531. - new->thread = t;
  15532. - /*
  15533. - * Tell the thread to set its affinity. This is
  15534. - * important for shared interrupt handlers as we do
  15535. - * not invoke setup_affinity() for the secondary
  15536. - * handlers as everything is already set up. Even for
  15537. - * interrupts marked with IRQF_NO_BALANCE this is
  15538. - * correct as we want the thread to move to the cpu(s)
  15539. - * on which the requesting code placed the interrupt.
  15540. - */
  15541. - set_bit(IRQTF_AFFINITY, &new->thread_flags);
  15542. }
  15543. if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
  15544. @@ -1221,6 +1371,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
  15545. irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
  15546. }
  15547. + if (new->flags & IRQF_NO_SOFTIRQ_CALL)
  15548. + irq_settings_set_no_softirq_call(desc);
  15549. +
  15550. /* Set default affinity mask once everything is setup */
  15551. setup_affinity(irq, desc, mask);
  15552. @@ -1234,7 +1387,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
  15553. irq, nmsk, omsk);
  15554. }
  15555. - new->irq = irq;
  15556. *old_ptr = new;
  15557. irq_pm_install_action(desc, new);
  15558. @@ -1260,6 +1412,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
  15559. */
  15560. if (new->thread)
  15561. wake_up_process(new->thread);
  15562. + if (new->secondary)
  15563. + wake_up_process(new->secondary->thread);
  15564. register_irq_proc(irq, desc);
  15565. new->dir = NULL;
  15566. @@ -1290,6 +1444,13 @@ out_thread:
  15567. kthread_stop(t);
  15568. put_task_struct(t);
  15569. }
  15570. + if (new->secondary && new->secondary->thread) {
  15571. + struct task_struct *t = new->secondary->thread;
  15572. +
  15573. + new->secondary->thread = NULL;
  15574. + kthread_stop(t);
  15575. + put_task_struct(t);
  15576. + }
  15577. out_mput:
  15578. module_put(desc->owner);
  15579. return ret;
  15580. @@ -1397,9 +1558,14 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
  15581. if (action->thread) {
  15582. kthread_stop(action->thread);
  15583. put_task_struct(action->thread);
  15584. + if (action->secondary && action->secondary->thread) {
  15585. + kthread_stop(action->secondary->thread);
  15586. + put_task_struct(action->secondary->thread);
  15587. + }
  15588. }
  15589. module_put(desc->owner);
  15590. + kfree(action->secondary);
  15591. return action;
  15592. }
  15593. @@ -1543,8 +1709,10 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
  15594. retval = __setup_irq(irq, desc, action);
  15595. chip_bus_sync_unlock(desc);
  15596. - if (retval)
  15597. + if (retval) {
  15598. + kfree(action->secondary);
  15599. kfree(action);
  15600. + }
  15601. #ifdef CONFIG_DEBUG_SHIRQ_FIXME
  15602. if (!retval && (irqflags & IRQF_SHARED)) {
  15603. diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
  15604. index 3320b84cc60f..34b803b89d41 100644
  15605. --- a/kernel/irq/settings.h
  15606. +++ b/kernel/irq/settings.h
  15607. @@ -15,6 +15,7 @@ enum {
  15608. _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD,
  15609. _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID,
  15610. _IRQ_IS_POLLED = IRQ_IS_POLLED,
  15611. + _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL,
  15612. _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
  15613. };
  15614. @@ -28,6 +29,7 @@ enum {
  15615. #define IRQ_NESTED_THREAD GOT_YOU_MORON
  15616. #define IRQ_PER_CPU_DEVID GOT_YOU_MORON
  15617. #define IRQ_IS_POLLED GOT_YOU_MORON
  15618. +#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON
  15619. #undef IRQF_MODIFY_MASK
  15620. #define IRQF_MODIFY_MASK GOT_YOU_MORON
  15621. @@ -38,6 +40,16 @@ irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set)
  15622. desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
  15623. }
  15624. +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc)
  15625. +{
  15626. + return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL;
  15627. +}
  15628. +
  15629. +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc)
  15630. +{
  15631. + desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL;
  15632. +}
  15633. +
  15634. static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
  15635. {
  15636. return desc->status_use_accessors & _IRQ_PER_CPU;
  15637. diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
  15638. index e2514b0e439e..903a69c45689 100644
  15639. --- a/kernel/irq/spurious.c
  15640. +++ b/kernel/irq/spurious.c
  15641. @@ -444,6 +444,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
  15642. static int __init irqfixup_setup(char *str)
  15643. {
  15644. +#ifdef CONFIG_PREEMPT_RT_BASE
  15645. + pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
  15646. + return 1;
  15647. +#endif
  15648. irqfixup = 1;
  15649. printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
  15650. printk(KERN_WARNING "This may impact system performance.\n");
  15651. @@ -456,6 +460,10 @@ module_param(irqfixup, int, 0644);
  15652. static int __init irqpoll_setup(char *str)
  15653. {
  15654. +#ifdef CONFIG_PREEMPT_RT_BASE
  15655. + pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
  15656. + return 1;
  15657. +#endif
  15658. irqfixup = 2;
  15659. printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
  15660. "enabled\n");
  15661. diff --git a/kernel/irq_work.c b/kernel/irq_work.c
  15662. index cbf9fb899d92..58cf46638ca0 100644
  15663. --- a/kernel/irq_work.c
  15664. +++ b/kernel/irq_work.c
  15665. @@ -17,6 +17,7 @@
  15666. #include <linux/cpu.h>
  15667. #include <linux/notifier.h>
  15668. #include <linux/smp.h>
  15669. +#include <linux/interrupt.h>
  15670. #include <asm/processor.h>
  15671. @@ -65,6 +66,8 @@ void __weak arch_irq_work_raise(void)
  15672. */
  15673. bool irq_work_queue_on(struct irq_work *work, int cpu)
  15674. {
  15675. + struct llist_head *list;
  15676. +
  15677. /* All work should have been flushed before going offline */
  15678. WARN_ON_ONCE(cpu_is_offline(cpu));
  15679. @@ -75,7 +78,12 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
  15680. if (!irq_work_claim(work))
  15681. return false;
  15682. - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
  15683. + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ))
  15684. + list = &per_cpu(lazy_list, cpu);
  15685. + else
  15686. + list = &per_cpu(raised_list, cpu);
  15687. +
  15688. + if (llist_add(&work->llnode, list))
  15689. arch_send_call_function_single_ipi(cpu);
  15690. return true;
  15691. @@ -86,6 +94,9 @@ EXPORT_SYMBOL_GPL(irq_work_queue_on);
  15692. /* Enqueue the irq work @work on the current CPU */
  15693. bool irq_work_queue(struct irq_work *work)
  15694. {
  15695. + struct llist_head *list;
  15696. + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL);
  15697. +
  15698. /* Only queue if not already pending */
  15699. if (!irq_work_claim(work))
  15700. return false;
  15701. @@ -93,13 +104,15 @@ bool irq_work_queue(struct irq_work *work)
  15702. /* Queue the entry and raise the IPI if needed. */
  15703. preempt_disable();
  15704. - /* If the work is "lazy", handle it from next tick if any */
  15705. - if (work->flags & IRQ_WORK_LAZY) {
  15706. - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
  15707. - tick_nohz_tick_stopped())
  15708. - arch_irq_work_raise();
  15709. - } else {
  15710. - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
  15711. + lazy_work = work->flags & IRQ_WORK_LAZY;
  15712. +
  15713. + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ)))
  15714. + list = this_cpu_ptr(&lazy_list);
  15715. + else
  15716. + list = this_cpu_ptr(&raised_list);
  15717. +
  15718. + if (llist_add(&work->llnode, list)) {
  15719. + if (!lazy_work || tick_nohz_tick_stopped())
  15720. arch_irq_work_raise();
  15721. }
  15722. @@ -116,9 +129,8 @@ bool irq_work_needs_cpu(void)
  15723. raised = this_cpu_ptr(&raised_list);
  15724. lazy = this_cpu_ptr(&lazy_list);
  15725. - if (llist_empty(raised) || arch_irq_work_has_interrupt())
  15726. - if (llist_empty(lazy))
  15727. - return false;
  15728. + if (llist_empty(raised) && llist_empty(lazy))
  15729. + return false;
  15730. /* All work should have been flushed before going offline */
  15731. WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
  15732. @@ -132,7 +144,7 @@ static void irq_work_run_list(struct llist_head *list)
  15733. struct irq_work *work;
  15734. struct llist_node *llnode;
  15735. - BUG_ON(!irqs_disabled());
  15736. + BUG_ON_NONRT(!irqs_disabled());
  15737. if (llist_empty(list))
  15738. return;
  15739. @@ -169,7 +181,16 @@ static void irq_work_run_list(struct llist_head *list)
  15740. void irq_work_run(void)
  15741. {
  15742. irq_work_run_list(this_cpu_ptr(&raised_list));
  15743. - irq_work_run_list(this_cpu_ptr(&lazy_list));
  15744. + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) {
  15745. + /*
  15746. + * NOTE: we raise softirq via IPI for safety,
  15747. + * and execute in irq_work_tick() to move the
  15748. + * overhead from hard to soft irq context.
  15749. + */
  15750. + if (!llist_empty(this_cpu_ptr(&lazy_list)))
  15751. + raise_softirq(TIMER_SOFTIRQ);
  15752. + } else
  15753. + irq_work_run_list(this_cpu_ptr(&lazy_list));
  15754. }
  15755. EXPORT_SYMBOL_GPL(irq_work_run);
  15756. @@ -179,8 +200,17 @@ void irq_work_tick(void)
  15757. if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
  15758. irq_work_run_list(raised);
  15759. +
  15760. + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
  15761. + irq_work_run_list(this_cpu_ptr(&lazy_list));
  15762. +}
  15763. +
  15764. +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
  15765. +void irq_work_tick_soft(void)
  15766. +{
  15767. irq_work_run_list(this_cpu_ptr(&lazy_list));
  15768. }
  15769. +#endif
  15770. /*
  15771. * Synchronize against the irq_work @entry, ensures the entry is not
  15772. diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
  15773. index 6683ccef9fff..d6fc8eeaab8f 100644
  15774. --- a/kernel/ksysfs.c
  15775. +++ b/kernel/ksysfs.c
  15776. @@ -136,6 +136,15 @@ KERNEL_ATTR_RO(vmcoreinfo);
  15777. #endif /* CONFIG_KEXEC */
  15778. +#if defined(CONFIG_PREEMPT_RT_FULL)
  15779. +static ssize_t realtime_show(struct kobject *kobj,
  15780. + struct kobj_attribute *attr, char *buf)
  15781. +{
  15782. + return sprintf(buf, "%d\n", 1);
  15783. +}
  15784. +KERNEL_ATTR_RO(realtime);
  15785. +#endif
  15786. +
  15787. /* whether file capabilities are enabled */
  15788. static ssize_t fscaps_show(struct kobject *kobj,
  15789. struct kobj_attribute *attr, char *buf)
  15790. @@ -203,6 +212,9 @@ static struct attribute * kernel_attrs[] = {
  15791. &vmcoreinfo_attr.attr,
  15792. #endif
  15793. &rcu_expedited_attr.attr,
  15794. +#ifdef CONFIG_PREEMPT_RT_FULL
  15795. + &realtime_attr.attr,
  15796. +#endif
  15797. NULL
  15798. };
  15799. diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
  15800. index de7a416cca2a..ab269cf0475a 100644
  15801. --- a/kernel/locking/Makefile
  15802. +++ b/kernel/locking/Makefile
  15803. @@ -1,5 +1,5 @@
  15804. -obj-y += mutex.o semaphore.o rwsem.o
  15805. +obj-y += semaphore.o
  15806. ifdef CONFIG_FUNCTION_TRACER
  15807. CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
  15808. @@ -8,7 +8,11 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE)
  15809. CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE)
  15810. endif
  15811. +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
  15812. +obj-y += mutex.o
  15813. obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
  15814. +obj-y += rwsem.o
  15815. +endif
  15816. obj-$(CONFIG_LOCKDEP) += lockdep.o
  15817. ifeq ($(CONFIG_PROC_FS),y)
  15818. obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
  15819. @@ -22,8 +26,11 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
  15820. obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
  15821. obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
  15822. obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
  15823. +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
  15824. obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
  15825. obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
  15826. +endif
  15827. obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
  15828. +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o
  15829. obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o
  15830. obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
  15831. diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c
  15832. index 86ae2aebf004..9397974b142f 100644
  15833. --- a/kernel/locking/lglock.c
  15834. +++ b/kernel/locking/lglock.c
  15835. @@ -4,6 +4,15 @@
  15836. #include <linux/cpu.h>
  15837. #include <linux/string.h>
  15838. +#ifndef CONFIG_PREEMPT_RT_FULL
  15839. +# define lg_lock_ptr arch_spinlock_t
  15840. +# define lg_do_lock(l) arch_spin_lock(l)
  15841. +# define lg_do_unlock(l) arch_spin_unlock(l)
  15842. +#else
  15843. +# define lg_lock_ptr struct rt_mutex
  15844. +# define lg_do_lock(l) __rt_spin_lock(l)
  15845. +# define lg_do_unlock(l) __rt_spin_unlock(l)
  15846. +#endif
  15847. /*
  15848. * Note there is no uninit, so lglocks cannot be defined in
  15849. * modules (but it's fine to use them from there)
  15850. @@ -12,51 +21,60 @@
  15851. void lg_lock_init(struct lglock *lg, char *name)
  15852. {
  15853. +#ifdef CONFIG_PREEMPT_RT_FULL
  15854. + int i;
  15855. +
  15856. + for_each_possible_cpu(i) {
  15857. + struct rt_mutex *lock = per_cpu_ptr(lg->lock, i);
  15858. +
  15859. + rt_mutex_init(lock);
  15860. + }
  15861. +#endif
  15862. LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
  15863. }
  15864. EXPORT_SYMBOL(lg_lock_init);
  15865. void lg_local_lock(struct lglock *lg)
  15866. {
  15867. - arch_spinlock_t *lock;
  15868. + lg_lock_ptr *lock;
  15869. - preempt_disable();
  15870. + migrate_disable();
  15871. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  15872. lock = this_cpu_ptr(lg->lock);
  15873. - arch_spin_lock(lock);
  15874. + lg_do_lock(lock);
  15875. }
  15876. EXPORT_SYMBOL(lg_local_lock);
  15877. void lg_local_unlock(struct lglock *lg)
  15878. {
  15879. - arch_spinlock_t *lock;
  15880. + lg_lock_ptr *lock;
  15881. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  15882. lock = this_cpu_ptr(lg->lock);
  15883. - arch_spin_unlock(lock);
  15884. - preempt_enable();
  15885. + lg_do_unlock(lock);
  15886. + migrate_enable();
  15887. }
  15888. EXPORT_SYMBOL(lg_local_unlock);
  15889. void lg_local_lock_cpu(struct lglock *lg, int cpu)
  15890. {
  15891. - arch_spinlock_t *lock;
  15892. + lg_lock_ptr *lock;
  15893. - preempt_disable();
  15894. + preempt_disable_nort();
  15895. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  15896. lock = per_cpu_ptr(lg->lock, cpu);
  15897. - arch_spin_lock(lock);
  15898. + lg_do_lock(lock);
  15899. }
  15900. EXPORT_SYMBOL(lg_local_lock_cpu);
  15901. void lg_local_unlock_cpu(struct lglock *lg, int cpu)
  15902. {
  15903. - arch_spinlock_t *lock;
  15904. + lg_lock_ptr *lock;
  15905. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  15906. lock = per_cpu_ptr(lg->lock, cpu);
  15907. - arch_spin_unlock(lock);
  15908. - preempt_enable();
  15909. + lg_do_unlock(lock);
  15910. + preempt_enable_nort();
  15911. }
  15912. EXPORT_SYMBOL(lg_local_unlock_cpu);
  15913. @@ -64,12 +82,12 @@ void lg_global_lock(struct lglock *lg)
  15914. {
  15915. int i;
  15916. - preempt_disable();
  15917. + preempt_disable_nort();
  15918. lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  15919. for_each_possible_cpu(i) {
  15920. - arch_spinlock_t *lock;
  15921. + lg_lock_ptr *lock;
  15922. lock = per_cpu_ptr(lg->lock, i);
  15923. - arch_spin_lock(lock);
  15924. + lg_do_lock(lock);
  15925. }
  15926. }
  15927. EXPORT_SYMBOL(lg_global_lock);
  15928. @@ -80,10 +98,35 @@ void lg_global_unlock(struct lglock *lg)
  15929. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  15930. for_each_possible_cpu(i) {
  15931. - arch_spinlock_t *lock;
  15932. + lg_lock_ptr *lock;
  15933. lock = per_cpu_ptr(lg->lock, i);
  15934. - arch_spin_unlock(lock);
  15935. + lg_do_unlock(lock);
  15936. }
  15937. - preempt_enable();
  15938. + preempt_enable_nort();
  15939. }
  15940. EXPORT_SYMBOL(lg_global_unlock);
  15941. +
  15942. +#ifdef CONFIG_PREEMPT_RT_FULL
  15943. +/*
  15944. + * HACK: If you use this, you get to keep the pieces.
  15945. + * Used in queue_stop_cpus_work() when stop machinery
  15946. + * is called from inactive CPU, so we can't schedule.
  15947. + */
  15948. +# define lg_do_trylock_relax(l) \
  15949. + do { \
  15950. + while (!__rt_spin_trylock(l)) \
  15951. + cpu_relax(); \
  15952. + } while (0)
  15953. +
  15954. +void lg_global_trylock_relax(struct lglock *lg)
  15955. +{
  15956. + int i;
  15957. +
  15958. + lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  15959. + for_each_possible_cpu(i) {
  15960. + lg_lock_ptr *lock;
  15961. + lock = per_cpu_ptr(lg->lock, i);
  15962. + lg_do_trylock_relax(lock);
  15963. + }
  15964. +}
  15965. +#endif
  15966. diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
  15967. index aaeae885d9af..577f02617c63 100644
  15968. --- a/kernel/locking/lockdep.c
  15969. +++ b/kernel/locking/lockdep.c
  15970. @@ -3563,6 +3563,7 @@ static void check_flags(unsigned long flags)
  15971. }
  15972. }
  15973. +#ifndef CONFIG_PREEMPT_RT_FULL
  15974. /*
  15975. * We dont accurately track softirq state in e.g.
  15976. * hardirq contexts (such as on 4KSTACKS), so only
  15977. @@ -3577,6 +3578,7 @@ static void check_flags(unsigned long flags)
  15978. DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
  15979. }
  15980. }
  15981. +#endif
  15982. if (!debug_locks)
  15983. print_irqtrace_events(current);
  15984. diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
  15985. index ec8cce259779..aa60d919e336 100644
  15986. --- a/kernel/locking/locktorture.c
  15987. +++ b/kernel/locking/locktorture.c
  15988. @@ -24,7 +24,6 @@
  15989. #include <linux/module.h>
  15990. #include <linux/kthread.h>
  15991. #include <linux/spinlock.h>
  15992. -#include <linux/rwlock.h>
  15993. #include <linux/mutex.h>
  15994. #include <linux/rwsem.h>
  15995. #include <linux/smp.h>
  15996. diff --git a/kernel/locking/rt.c b/kernel/locking/rt.c
  15997. new file mode 100644
  15998. index 000000000000..c236efa4834c
  15999. --- /dev/null
  16000. +++ b/kernel/locking/rt.c
  16001. @@ -0,0 +1,461 @@
  16002. +/*
  16003. + * kernel/rt.c
  16004. + *
  16005. + * Real-Time Preemption Support
  16006. + *
  16007. + * started by Ingo Molnar:
  16008. + *
  16009. + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  16010. + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  16011. + *
  16012. + * historic credit for proving that Linux spinlocks can be implemented via
  16013. + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow
  16014. + * and others) who prototyped it on 2.4 and did lots of comparative
  16015. + * research and analysis; TimeSys, for proving that you can implement a
  16016. + * fully preemptible kernel via the use of IRQ threading and mutexes;
  16017. + * Bill Huey for persuasively arguing on lkml that the mutex model is the
  16018. + * right one; and to MontaVista, who ported pmutexes to 2.6.
  16019. + *
  16020. + * This code is a from-scratch implementation and is not based on pmutexes,
  16021. + * but the idea of converting spinlocks to mutexes is used here too.
  16022. + *
  16023. + * lock debugging, locking tree, deadlock detection:
  16024. + *
  16025. + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
  16026. + * Released under the General Public License (GPL).
  16027. + *
  16028. + * Includes portions of the generic R/W semaphore implementation from:
  16029. + *
  16030. + * Copyright (c) 2001 David Howells (dhowells@redhat.com).
  16031. + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
  16032. + * - Derived also from comments by Linus
  16033. + *
  16034. + * Pending ownership of locks and ownership stealing:
  16035. + *
  16036. + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt
  16037. + *
  16038. + * (also by Steven Rostedt)
  16039. + * - Converted single pi_lock to individual task locks.
  16040. + *
  16041. + * By Esben Nielsen:
  16042. + * Doing priority inheritance with help of the scheduler.
  16043. + *
  16044. + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  16045. + * - major rework based on Esben Nielsens initial patch
  16046. + * - replaced thread_info references by task_struct refs
  16047. + * - removed task->pending_owner dependency
  16048. + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks
  16049. + * in the scheduler return path as discussed with Steven Rostedt
  16050. + *
  16051. + * Copyright (C) 2006, Kihon Technologies Inc.
  16052. + * Steven Rostedt <rostedt@goodmis.org>
  16053. + * - debugged and patched Thomas Gleixner's rework.
  16054. + * - added back the cmpxchg to the rework.
  16055. + * - turned atomic require back on for SMP.
  16056. + */
  16057. +
  16058. +#include <linux/spinlock.h>
  16059. +#include <linux/rtmutex.h>
  16060. +#include <linux/sched.h>
  16061. +#include <linux/delay.h>
  16062. +#include <linux/module.h>
  16063. +#include <linux/kallsyms.h>
  16064. +#include <linux/syscalls.h>
  16065. +#include <linux/interrupt.h>
  16066. +#include <linux/plist.h>
  16067. +#include <linux/fs.h>
  16068. +#include <linux/futex.h>
  16069. +#include <linux/hrtimer.h>
  16070. +
  16071. +#include "rtmutex_common.h"
  16072. +
  16073. +/*
  16074. + * struct mutex functions
  16075. + */
  16076. +void __mutex_do_init(struct mutex *mutex, const char *name,
  16077. + struct lock_class_key *key)
  16078. +{
  16079. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  16080. + /*
  16081. + * Make sure we are not reinitializing a held lock:
  16082. + */
  16083. + debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
  16084. + lockdep_init_map(&mutex->dep_map, name, key, 0);
  16085. +#endif
  16086. + mutex->lock.save_state = 0;
  16087. +}
  16088. +EXPORT_SYMBOL(__mutex_do_init);
  16089. +
  16090. +void __lockfunc _mutex_lock(struct mutex *lock)
  16091. +{
  16092. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  16093. + rt_mutex_lock(&lock->lock);
  16094. +}
  16095. +EXPORT_SYMBOL(_mutex_lock);
  16096. +
  16097. +int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
  16098. +{
  16099. + int ret;
  16100. +
  16101. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  16102. + ret = rt_mutex_lock_interruptible(&lock->lock);
  16103. + if (ret)
  16104. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16105. + return ret;
  16106. +}
  16107. +EXPORT_SYMBOL(_mutex_lock_interruptible);
  16108. +
  16109. +int __lockfunc _mutex_lock_killable(struct mutex *lock)
  16110. +{
  16111. + int ret;
  16112. +
  16113. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  16114. + ret = rt_mutex_lock_killable(&lock->lock);
  16115. + if (ret)
  16116. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16117. + return ret;
  16118. +}
  16119. +EXPORT_SYMBOL(_mutex_lock_killable);
  16120. +
  16121. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  16122. +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
  16123. +{
  16124. + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
  16125. + rt_mutex_lock(&lock->lock);
  16126. +}
  16127. +EXPORT_SYMBOL(_mutex_lock_nested);
  16128. +
  16129. +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
  16130. +{
  16131. + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
  16132. + rt_mutex_lock(&lock->lock);
  16133. +}
  16134. +EXPORT_SYMBOL(_mutex_lock_nest_lock);
  16135. +
  16136. +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass)
  16137. +{
  16138. + int ret;
  16139. +
  16140. + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
  16141. + ret = rt_mutex_lock_interruptible(&lock->lock);
  16142. + if (ret)
  16143. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16144. + return ret;
  16145. +}
  16146. +EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
  16147. +
  16148. +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
  16149. +{
  16150. + int ret;
  16151. +
  16152. + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
  16153. + ret = rt_mutex_lock_killable(&lock->lock);
  16154. + if (ret)
  16155. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16156. + return ret;
  16157. +}
  16158. +EXPORT_SYMBOL(_mutex_lock_killable_nested);
  16159. +#endif
  16160. +
  16161. +int __lockfunc _mutex_trylock(struct mutex *lock)
  16162. +{
  16163. + int ret = rt_mutex_trylock(&lock->lock);
  16164. +
  16165. + if (ret)
  16166. + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  16167. +
  16168. + return ret;
  16169. +}
  16170. +EXPORT_SYMBOL(_mutex_trylock);
  16171. +
  16172. +void __lockfunc _mutex_unlock(struct mutex *lock)
  16173. +{
  16174. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16175. + rt_mutex_unlock(&lock->lock);
  16176. +}
  16177. +EXPORT_SYMBOL(_mutex_unlock);
  16178. +
  16179. +/*
  16180. + * rwlock_t functions
  16181. + */
  16182. +int __lockfunc rt_write_trylock(rwlock_t *rwlock)
  16183. +{
  16184. + int ret;
  16185. +
  16186. + migrate_disable();
  16187. + ret = rt_mutex_trylock(&rwlock->lock);
  16188. + if (ret)
  16189. + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
  16190. + else
  16191. + migrate_enable();
  16192. +
  16193. + return ret;
  16194. +}
  16195. +EXPORT_SYMBOL(rt_write_trylock);
  16196. +
  16197. +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
  16198. +{
  16199. + int ret;
  16200. +
  16201. + *flags = 0;
  16202. + ret = rt_write_trylock(rwlock);
  16203. + return ret;
  16204. +}
  16205. +EXPORT_SYMBOL(rt_write_trylock_irqsave);
  16206. +
  16207. +int __lockfunc rt_read_trylock(rwlock_t *rwlock)
  16208. +{
  16209. + struct rt_mutex *lock = &rwlock->lock;
  16210. + int ret = 1;
  16211. +
  16212. + /*
  16213. + * recursive read locks succeed when current owns the lock,
  16214. + * but not when read_depth == 0 which means that the lock is
  16215. + * write locked.
  16216. + */
  16217. + if (rt_mutex_owner(lock) != current) {
  16218. + migrate_disable();
  16219. + ret = rt_mutex_trylock(lock);
  16220. + if (ret)
  16221. + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
  16222. + else
  16223. + migrate_enable();
  16224. +
  16225. + } else if (!rwlock->read_depth) {
  16226. + ret = 0;
  16227. + }
  16228. +
  16229. + if (ret)
  16230. + rwlock->read_depth++;
  16231. +
  16232. + return ret;
  16233. +}
  16234. +EXPORT_SYMBOL(rt_read_trylock);
  16235. +
  16236. +void __lockfunc rt_write_lock(rwlock_t *rwlock)
  16237. +{
  16238. + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
  16239. + migrate_disable();
  16240. + __rt_spin_lock(&rwlock->lock);
  16241. +}
  16242. +EXPORT_SYMBOL(rt_write_lock);
  16243. +
  16244. +void __lockfunc rt_read_lock(rwlock_t *rwlock)
  16245. +{
  16246. + struct rt_mutex *lock = &rwlock->lock;
  16247. +
  16248. +
  16249. + /*
  16250. + * recursive read locks succeed when current owns the lock
  16251. + */
  16252. + if (rt_mutex_owner(lock) != current) {
  16253. + migrate_disable();
  16254. + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
  16255. + __rt_spin_lock(lock);
  16256. + }
  16257. + rwlock->read_depth++;
  16258. +}
  16259. +
  16260. +EXPORT_SYMBOL(rt_read_lock);
  16261. +
  16262. +void __lockfunc rt_write_unlock(rwlock_t *rwlock)
  16263. +{
  16264. + /* NOTE: we always pass in '1' for nested, for simplicity */
  16265. + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
  16266. + __rt_spin_unlock(&rwlock->lock);
  16267. + migrate_enable();
  16268. +}
  16269. +EXPORT_SYMBOL(rt_write_unlock);
  16270. +
  16271. +void __lockfunc rt_read_unlock(rwlock_t *rwlock)
  16272. +{
  16273. + /* Release the lock only when read_depth is down to 0 */
  16274. + if (--rwlock->read_depth == 0) {
  16275. + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
  16276. + __rt_spin_unlock(&rwlock->lock);
  16277. + migrate_enable();
  16278. + }
  16279. +}
  16280. +EXPORT_SYMBOL(rt_read_unlock);
  16281. +
  16282. +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
  16283. +{
  16284. + rt_write_lock(rwlock);
  16285. +
  16286. + return 0;
  16287. +}
  16288. +EXPORT_SYMBOL(rt_write_lock_irqsave);
  16289. +
  16290. +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
  16291. +{
  16292. + rt_read_lock(rwlock);
  16293. +
  16294. + return 0;
  16295. +}
  16296. +EXPORT_SYMBOL(rt_read_lock_irqsave);
  16297. +
  16298. +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
  16299. +{
  16300. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  16301. + /*
  16302. + * Make sure we are not reinitializing a held lock:
  16303. + */
  16304. + debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
  16305. + lockdep_init_map(&rwlock->dep_map, name, key, 0);
  16306. +#endif
  16307. + rwlock->lock.save_state = 1;
  16308. + rwlock->read_depth = 0;
  16309. +}
  16310. +EXPORT_SYMBOL(__rt_rwlock_init);
  16311. +
  16312. +/*
  16313. + * rw_semaphores
  16314. + */
  16315. +
  16316. +void rt_up_write(struct rw_semaphore *rwsem)
  16317. +{
  16318. + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
  16319. + rt_mutex_unlock(&rwsem->lock);
  16320. +}
  16321. +EXPORT_SYMBOL(rt_up_write);
  16322. +
  16323. +void __rt_up_read(struct rw_semaphore *rwsem)
  16324. +{
  16325. + if (--rwsem->read_depth == 0)
  16326. + rt_mutex_unlock(&rwsem->lock);
  16327. +}
  16328. +
  16329. +void rt_up_read(struct rw_semaphore *rwsem)
  16330. +{
  16331. + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
  16332. + __rt_up_read(rwsem);
  16333. +}
  16334. +EXPORT_SYMBOL(rt_up_read);
  16335. +
  16336. +/*
  16337. + * downgrade a write lock into a read lock
  16338. + * - just wake up any readers at the front of the queue
  16339. + */
  16340. +void rt_downgrade_write(struct rw_semaphore *rwsem)
  16341. +{
  16342. + BUG_ON(rt_mutex_owner(&rwsem->lock) != current);
  16343. + rwsem->read_depth = 1;
  16344. +}
  16345. +EXPORT_SYMBOL(rt_downgrade_write);
  16346. +
  16347. +int rt_down_write_trylock(struct rw_semaphore *rwsem)
  16348. +{
  16349. + int ret = rt_mutex_trylock(&rwsem->lock);
  16350. +
  16351. + if (ret)
  16352. + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
  16353. + return ret;
  16354. +}
  16355. +EXPORT_SYMBOL(rt_down_write_trylock);
  16356. +
  16357. +void rt_down_write(struct rw_semaphore *rwsem)
  16358. +{
  16359. + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_);
  16360. + rt_mutex_lock(&rwsem->lock);
  16361. +}
  16362. +EXPORT_SYMBOL(rt_down_write);
  16363. +
  16364. +void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass)
  16365. +{
  16366. + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
  16367. + rt_mutex_lock(&rwsem->lock);
  16368. +}
  16369. +EXPORT_SYMBOL(rt_down_write_nested);
  16370. +
  16371. +void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
  16372. + struct lockdep_map *nest)
  16373. +{
  16374. + rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_);
  16375. + rt_mutex_lock(&rwsem->lock);
  16376. +}
  16377. +EXPORT_SYMBOL(rt_down_write_nested_lock);
  16378. +
  16379. +int rt_down_read_trylock(struct rw_semaphore *rwsem)
  16380. +{
  16381. + struct rt_mutex *lock = &rwsem->lock;
  16382. + int ret = 1;
  16383. +
  16384. + /*
  16385. + * recursive read locks succeed when current owns the rwsem,
  16386. + * but not when read_depth == 0 which means that the rwsem is
  16387. + * write locked.
  16388. + */
  16389. + if (rt_mutex_owner(lock) != current)
  16390. + ret = rt_mutex_trylock(&rwsem->lock);
  16391. + else if (!rwsem->read_depth)
  16392. + ret = 0;
  16393. +
  16394. + if (ret) {
  16395. + rwsem->read_depth++;
  16396. + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
  16397. + }
  16398. + return ret;
  16399. +}
  16400. +EXPORT_SYMBOL(rt_down_read_trylock);
  16401. +
  16402. +static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
  16403. +{
  16404. + struct rt_mutex *lock = &rwsem->lock;
  16405. +
  16406. + rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_);
  16407. +
  16408. + if (rt_mutex_owner(lock) != current)
  16409. + rt_mutex_lock(&rwsem->lock);
  16410. + rwsem->read_depth++;
  16411. +}
  16412. +
  16413. +void rt_down_read(struct rw_semaphore *rwsem)
  16414. +{
  16415. + __rt_down_read(rwsem, 0);
  16416. +}
  16417. +EXPORT_SYMBOL(rt_down_read);
  16418. +
  16419. +void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass)
  16420. +{
  16421. + __rt_down_read(rwsem, subclass);
  16422. +}
  16423. +EXPORT_SYMBOL(rt_down_read_nested);
  16424. +
  16425. +void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
  16426. + struct lock_class_key *key)
  16427. +{
  16428. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  16429. + /*
  16430. + * Make sure we are not reinitializing a held lock:
  16431. + */
  16432. + debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
  16433. + lockdep_init_map(&rwsem->dep_map, name, key, 0);
  16434. +#endif
  16435. + rwsem->read_depth = 0;
  16436. + rwsem->lock.save_state = 0;
  16437. +}
  16438. +EXPORT_SYMBOL(__rt_rwsem_init);
  16439. +
  16440. +/**
  16441. + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
  16442. + * @cnt: the atomic which we are to dec
  16443. + * @lock: the mutex to return holding if we dec to 0
  16444. + *
  16445. + * return true and hold lock if we dec to 0, return false otherwise
  16446. + */
  16447. +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
  16448. +{
  16449. + /* dec if we can't possibly hit 0 */
  16450. + if (atomic_add_unless(cnt, -1, 1))
  16451. + return 0;
  16452. + /* we might hit 0, so take the lock */
  16453. + mutex_lock(lock);
  16454. + if (!atomic_dec_and_test(cnt)) {
  16455. + /* when we actually did the dec, we didn't hit 0 */
  16456. + mutex_unlock(lock);
  16457. + return 0;
  16458. + }
  16459. + /* we hit 0, and we hold the lock */
  16460. + return 1;
  16461. +}
  16462. +EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
  16463. diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
  16464. index b025295f4966..e0b0d9b419b5 100644
  16465. --- a/kernel/locking/rtmutex.c
  16466. +++ b/kernel/locking/rtmutex.c
  16467. @@ -7,6 +7,11 @@
  16468. * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  16469. * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
  16470. * Copyright (C) 2006 Esben Nielsen
  16471. + * Adaptive Spinlocks:
  16472. + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
  16473. + * and Peter Morreale,
  16474. + * Adaptive Spinlocks simplification:
  16475. + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
  16476. *
  16477. * See Documentation/locking/rt-mutex-design.txt for details.
  16478. */
  16479. @@ -16,6 +21,7 @@
  16480. #include <linux/sched/rt.h>
  16481. #include <linux/sched/deadline.h>
  16482. #include <linux/timer.h>
  16483. +#include <linux/ww_mutex.h>
  16484. #include "rtmutex_common.h"
  16485. @@ -69,6 +75,12 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
  16486. clear_rt_mutex_waiters(lock);
  16487. }
  16488. +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
  16489. +{
  16490. + return waiter && waiter != PI_WAKEUP_INPROGRESS &&
  16491. + waiter != PI_REQUEUE_INPROGRESS;
  16492. +}
  16493. +
  16494. /*
  16495. * We can speed up the acquire/release, if the architecture
  16496. * supports cmpxchg and if there's no debugging state to be set up
  16497. @@ -300,7 +312,7 @@ static void __rt_mutex_adjust_prio(struct task_struct *task)
  16498. * of task. We do not use the spin_xx_mutex() variants here as we are
  16499. * outside of the debug path.)
  16500. */
  16501. -static void rt_mutex_adjust_prio(struct task_struct *task)
  16502. +void rt_mutex_adjust_prio(struct task_struct *task)
  16503. {
  16504. unsigned long flags;
  16505. @@ -335,6 +347,14 @@ static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
  16506. return debug_rt_mutex_detect_deadlock(waiter, chwalk);
  16507. }
  16508. +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
  16509. +{
  16510. + if (waiter->savestate)
  16511. + wake_up_lock_sleeper(waiter->task);
  16512. + else
  16513. + wake_up_process(waiter->task);
  16514. +}
  16515. +
  16516. /*
  16517. * Max number of times we'll walk the boosting chain:
  16518. */
  16519. @@ -342,7 +362,8 @@ int max_lock_depth = 1024;
  16520. static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
  16521. {
  16522. - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
  16523. + return rt_mutex_real_waiter(p->pi_blocked_on) ?
  16524. + p->pi_blocked_on->lock : NULL;
  16525. }
  16526. /*
  16527. @@ -479,7 +500,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  16528. * reached or the state of the chain has changed while we
  16529. * dropped the locks.
  16530. */
  16531. - if (!waiter)
  16532. + if (!rt_mutex_real_waiter(waiter))
  16533. goto out_unlock_pi;
  16534. /*
  16535. @@ -641,13 +662,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  16536. * follow here. This is the end of the chain we are walking.
  16537. */
  16538. if (!rt_mutex_owner(lock)) {
  16539. + struct rt_mutex_waiter *lock_top_waiter;
  16540. +
  16541. /*
  16542. * If the requeue [7] above changed the top waiter,
  16543. * then we need to wake the new top waiter up to try
  16544. * to get the lock.
  16545. */
  16546. - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
  16547. - wake_up_process(rt_mutex_top_waiter(lock)->task);
  16548. + lock_top_waiter = rt_mutex_top_waiter(lock);
  16549. + if (prerequeue_top_waiter != lock_top_waiter)
  16550. + rt_mutex_wake_waiter(lock_top_waiter);
  16551. raw_spin_unlock(&lock->wait_lock);
  16552. return 0;
  16553. }
  16554. @@ -740,6 +764,25 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  16555. return ret;
  16556. }
  16557. +
  16558. +#define STEAL_NORMAL 0
  16559. +#define STEAL_LATERAL 1
  16560. +
  16561. +/*
  16562. + * Note that RT tasks are excluded from lateral-steals to prevent the
  16563. + * introduction of an unbounded latency
  16564. + */
  16565. +static inline int lock_is_stealable(struct task_struct *task,
  16566. + struct task_struct *pendowner, int mode)
  16567. +{
  16568. + if (mode == STEAL_NORMAL || rt_task(task)) {
  16569. + if (task->prio >= pendowner->prio)
  16570. + return 0;
  16571. + } else if (task->prio > pendowner->prio)
  16572. + return 0;
  16573. + return 1;
  16574. +}
  16575. +
  16576. /*
  16577. * Try to take an rt-mutex
  16578. *
  16579. @@ -750,8 +793,9 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  16580. * @waiter: The waiter that is queued to the lock's wait list if the
  16581. * callsite called task_blocked_on_lock(), otherwise NULL
  16582. */
  16583. -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  16584. - struct rt_mutex_waiter *waiter)
  16585. +static int __try_to_take_rt_mutex(struct rt_mutex *lock,
  16586. + struct task_struct *task,
  16587. + struct rt_mutex_waiter *waiter, int mode)
  16588. {
  16589. unsigned long flags;
  16590. @@ -790,8 +834,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  16591. * If waiter is not the highest priority waiter of
  16592. * @lock, give up.
  16593. */
  16594. - if (waiter != rt_mutex_top_waiter(lock))
  16595. + if (waiter != rt_mutex_top_waiter(lock)) {
  16596. + /* XXX lock_is_stealable() ? */
  16597. return 0;
  16598. + }
  16599. /*
  16600. * We can acquire the lock. Remove the waiter from the
  16601. @@ -809,14 +855,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  16602. * not need to be dequeued.
  16603. */
  16604. if (rt_mutex_has_waiters(lock)) {
  16605. - /*
  16606. - * If @task->prio is greater than or equal to
  16607. - * the top waiter priority (kernel view),
  16608. - * @task lost.
  16609. - */
  16610. - if (task->prio >= rt_mutex_top_waiter(lock)->prio)
  16611. - return 0;
  16612. + struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
  16613. + if (task != pown && !lock_is_stealable(task, pown, mode))
  16614. + return 0;
  16615. /*
  16616. * The current top waiter stays enqueued. We
  16617. * don't have to change anything in the lock
  16618. @@ -865,6 +907,347 @@ takeit:
  16619. return 1;
  16620. }
  16621. +#ifdef CONFIG_PREEMPT_RT_FULL
  16622. +/*
  16623. + * preemptible spin_lock functions:
  16624. + */
  16625. +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
  16626. + void (*slowfn)(struct rt_mutex *lock))
  16627. +{
  16628. + might_sleep_no_state_check();
  16629. +
  16630. + if (likely(rt_mutex_cmpxchg(lock, NULL, current)))
  16631. + rt_mutex_deadlock_account_lock(lock, current);
  16632. + else
  16633. + slowfn(lock);
  16634. +}
  16635. +
  16636. +static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
  16637. + void (*slowfn)(struct rt_mutex *lock))
  16638. +{
  16639. + if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
  16640. + rt_mutex_deadlock_account_unlock(current);
  16641. + else
  16642. + slowfn(lock);
  16643. +}
  16644. +#ifdef CONFIG_SMP
  16645. +/*
  16646. + * Note that owner is a speculative pointer and dereferencing relies
  16647. + * on rcu_read_lock() and the check against the lock owner.
  16648. + */
  16649. +static int adaptive_wait(struct rt_mutex *lock,
  16650. + struct task_struct *owner)
  16651. +{
  16652. + int res = 0;
  16653. +
  16654. + rcu_read_lock();
  16655. + for (;;) {
  16656. + if (owner != rt_mutex_owner(lock))
  16657. + break;
  16658. + /*
  16659. + * Ensure that owner->on_cpu is dereferenced _after_
  16660. + * checking the above to be valid.
  16661. + */
  16662. + barrier();
  16663. + if (!owner->on_cpu) {
  16664. + res = 1;
  16665. + break;
  16666. + }
  16667. + cpu_relax();
  16668. + }
  16669. + rcu_read_unlock();
  16670. + return res;
  16671. +}
  16672. +#else
  16673. +static int adaptive_wait(struct rt_mutex *lock,
  16674. + struct task_struct *orig_owner)
  16675. +{
  16676. + return 1;
  16677. +}
  16678. +#endif
  16679. +
  16680. +# define pi_lock(lock) raw_spin_lock_irq(lock)
  16681. +# define pi_unlock(lock) raw_spin_unlock_irq(lock)
  16682. +
  16683. +static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  16684. + struct rt_mutex_waiter *waiter,
  16685. + struct task_struct *task,
  16686. + enum rtmutex_chainwalk chwalk);
  16687. +/*
  16688. + * Slow path lock function spin_lock style: this variant is very
  16689. + * careful not to miss any non-lock wakeups.
  16690. + *
  16691. + * We store the current state under p->pi_lock in p->saved_state and
  16692. + * the try_to_wake_up() code handles this accordingly.
  16693. + */
  16694. +static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock)
  16695. +{
  16696. + struct task_struct *lock_owner, *self = current;
  16697. + struct rt_mutex_waiter waiter, *top_waiter;
  16698. + int ret;
  16699. +
  16700. + rt_mutex_init_waiter(&waiter, true);
  16701. +
  16702. + raw_spin_lock(&lock->wait_lock);
  16703. +
  16704. + if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
  16705. + raw_spin_unlock(&lock->wait_lock);
  16706. + return;
  16707. + }
  16708. +
  16709. + BUG_ON(rt_mutex_owner(lock) == self);
  16710. +
  16711. + /*
  16712. + * We save whatever state the task is in and we'll restore it
  16713. + * after acquiring the lock taking real wakeups into account
  16714. + * as well. We are serialized via pi_lock against wakeups. See
  16715. + * try_to_wake_up().
  16716. + */
  16717. + pi_lock(&self->pi_lock);
  16718. + self->saved_state = self->state;
  16719. + __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
  16720. + pi_unlock(&self->pi_lock);
  16721. +
  16722. + ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK);
  16723. + BUG_ON(ret);
  16724. +
  16725. + for (;;) {
  16726. + /* Try to acquire the lock again. */
  16727. + if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
  16728. + break;
  16729. +
  16730. + top_waiter = rt_mutex_top_waiter(lock);
  16731. + lock_owner = rt_mutex_owner(lock);
  16732. +
  16733. + raw_spin_unlock(&lock->wait_lock);
  16734. +
  16735. + debug_rt_mutex_print_deadlock(&waiter);
  16736. +
  16737. + if (top_waiter != &waiter || adaptive_wait(lock, lock_owner))
  16738. + schedule_rt_mutex(lock);
  16739. +
  16740. + raw_spin_lock(&lock->wait_lock);
  16741. +
  16742. + pi_lock(&self->pi_lock);
  16743. + __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
  16744. + pi_unlock(&self->pi_lock);
  16745. + }
  16746. +
  16747. + /*
  16748. + * Restore the task state to current->saved_state. We set it
  16749. + * to the original state above and the try_to_wake_up() code
  16750. + * has possibly updated it when a real (non-rtmutex) wakeup
  16751. + * happened while we were blocked. Clear saved_state so
  16752. + * try_to_wakeup() does not get confused.
  16753. + */
  16754. + pi_lock(&self->pi_lock);
  16755. + __set_current_state_no_track(self->saved_state);
  16756. + self->saved_state = TASK_RUNNING;
  16757. + pi_unlock(&self->pi_lock);
  16758. +
  16759. + /*
  16760. + * try_to_take_rt_mutex() sets the waiter bit
  16761. + * unconditionally. We might have to fix that up:
  16762. + */
  16763. + fixup_rt_mutex_waiters(lock);
  16764. +
  16765. + BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
  16766. + BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
  16767. +
  16768. + raw_spin_unlock(&lock->wait_lock);
  16769. +
  16770. + debug_rt_mutex_free_waiter(&waiter);
  16771. +}
  16772. +
  16773. +static void wakeup_next_waiter(struct rt_mutex *lock);
  16774. +/*
  16775. + * Slow path to release a rt_mutex spin_lock style
  16776. + */
  16777. +static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
  16778. +{
  16779. + raw_spin_lock(&lock->wait_lock);
  16780. +
  16781. + debug_rt_mutex_unlock(lock);
  16782. +
  16783. + rt_mutex_deadlock_account_unlock(current);
  16784. +
  16785. + if (!rt_mutex_has_waiters(lock)) {
  16786. + lock->owner = NULL;
  16787. + raw_spin_unlock(&lock->wait_lock);
  16788. + return;
  16789. + }
  16790. +
  16791. + wakeup_next_waiter(lock);
  16792. +
  16793. + raw_spin_unlock(&lock->wait_lock);
  16794. +
  16795. + /* Undo pi boosting.when necessary */
  16796. + rt_mutex_adjust_prio(current);
  16797. +}
  16798. +
  16799. +void __lockfunc rt_spin_lock(spinlock_t *lock)
  16800. +{
  16801. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
  16802. + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  16803. +}
  16804. +EXPORT_SYMBOL(rt_spin_lock);
  16805. +
  16806. +void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
  16807. +{
  16808. + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
  16809. +}
  16810. +EXPORT_SYMBOL(__rt_spin_lock);
  16811. +
  16812. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  16813. +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
  16814. +{
  16815. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
  16816. + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
  16817. +}
  16818. +EXPORT_SYMBOL(rt_spin_lock_nested);
  16819. +#endif
  16820. +
  16821. +void __lockfunc rt_spin_unlock(spinlock_t *lock)
  16822. +{
  16823. + /* NOTE: we always pass in '1' for nested, for simplicity */
  16824. + spin_release(&lock->dep_map, 1, _RET_IP_);
  16825. + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
  16826. +}
  16827. +EXPORT_SYMBOL(rt_spin_unlock);
  16828. +
  16829. +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
  16830. +{
  16831. + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
  16832. +}
  16833. +EXPORT_SYMBOL(__rt_spin_unlock);
  16834. +
  16835. +/*
  16836. + * Wait for the lock to get unlocked: instead of polling for an unlock
  16837. + * (like raw spinlocks do), we lock and unlock, to force the kernel to
  16838. + * schedule if there's contention:
  16839. + */
  16840. +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
  16841. +{
  16842. + spin_lock(lock);
  16843. + spin_unlock(lock);
  16844. +}
  16845. +EXPORT_SYMBOL(rt_spin_unlock_wait);
  16846. +
  16847. +int __lockfunc __rt_spin_trylock(struct rt_mutex *lock)
  16848. +{
  16849. + return rt_mutex_trylock(lock);
  16850. +}
  16851. +
  16852. +int __lockfunc rt_spin_trylock(spinlock_t *lock)
  16853. +{
  16854. + int ret = rt_mutex_trylock(&lock->lock);
  16855. +
  16856. + if (ret)
  16857. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  16858. + return ret;
  16859. +}
  16860. +EXPORT_SYMBOL(rt_spin_trylock);
  16861. +
  16862. +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
  16863. +{
  16864. + int ret;
  16865. +
  16866. + local_bh_disable();
  16867. + ret = rt_mutex_trylock(&lock->lock);
  16868. + if (ret) {
  16869. + migrate_disable();
  16870. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  16871. + } else
  16872. + local_bh_enable();
  16873. + return ret;
  16874. +}
  16875. +EXPORT_SYMBOL(rt_spin_trylock_bh);
  16876. +
  16877. +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
  16878. +{
  16879. + int ret;
  16880. +
  16881. + *flags = 0;
  16882. + ret = rt_mutex_trylock(&lock->lock);
  16883. + if (ret) {
  16884. + migrate_disable();
  16885. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  16886. + }
  16887. + return ret;
  16888. +}
  16889. +EXPORT_SYMBOL(rt_spin_trylock_irqsave);
  16890. +
  16891. +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
  16892. +{
  16893. + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
  16894. + if (atomic_add_unless(atomic, -1, 1))
  16895. + return 0;
  16896. + migrate_disable();
  16897. + rt_spin_lock(lock);
  16898. + if (atomic_dec_and_test(atomic))
  16899. + return 1;
  16900. + rt_spin_unlock(lock);
  16901. + migrate_enable();
  16902. + return 0;
  16903. +}
  16904. +EXPORT_SYMBOL(atomic_dec_and_spin_lock);
  16905. +
  16906. + void
  16907. +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
  16908. +{
  16909. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  16910. + /*
  16911. + * Make sure we are not reinitializing a held lock:
  16912. + */
  16913. + debug_check_no_locks_freed((void *)lock, sizeof(*lock));
  16914. + lockdep_init_map(&lock->dep_map, name, key, 0);
  16915. +#endif
  16916. +}
  16917. +EXPORT_SYMBOL(__rt_spin_lock_init);
  16918. +
  16919. +#endif /* PREEMPT_RT_FULL */
  16920. +
  16921. +#ifdef CONFIG_PREEMPT_RT_FULL
  16922. + static inline int __sched
  16923. +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
  16924. +{
  16925. + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
  16926. + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
  16927. +
  16928. + if (!hold_ctx)
  16929. + return 0;
  16930. +
  16931. + if (unlikely(ctx == hold_ctx))
  16932. + return -EALREADY;
  16933. +
  16934. + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
  16935. + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
  16936. +#ifdef CONFIG_DEBUG_MUTEXES
  16937. + DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
  16938. + ctx->contending_lock = ww;
  16939. +#endif
  16940. + return -EDEADLK;
  16941. + }
  16942. +
  16943. + return 0;
  16944. +}
  16945. +#else
  16946. + static inline int __sched
  16947. +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
  16948. +{
  16949. + BUG();
  16950. + return 0;
  16951. +}
  16952. +
  16953. +#endif
  16954. +
  16955. +static inline int
  16956. +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  16957. + struct rt_mutex_waiter *waiter)
  16958. +{
  16959. + return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
  16960. +}
  16961. +
  16962. /*
  16963. * Task blocks on lock.
  16964. *
  16965. @@ -896,6 +1279,23 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  16966. return -EDEADLK;
  16967. raw_spin_lock_irqsave(&task->pi_lock, flags);
  16968. +
  16969. + /*
  16970. + * In the case of futex requeue PI, this will be a proxy
  16971. + * lock. The task will wake unaware that it is enqueueed on
  16972. + * this lock. Avoid blocking on two locks and corrupting
  16973. + * pi_blocked_on via the PI_WAKEUP_INPROGRESS
  16974. + * flag. futex_wait_requeue_pi() sets this when it wakes up
  16975. + * before requeue (due to a signal or timeout). Do not enqueue
  16976. + * the task if PI_WAKEUP_INPROGRESS is set.
  16977. + */
  16978. + if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
  16979. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  16980. + return -EAGAIN;
  16981. + }
  16982. +
  16983. + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
  16984. +
  16985. __rt_mutex_adjust_prio(task);
  16986. waiter->task = task;
  16987. waiter->lock = lock;
  16988. @@ -919,7 +1319,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  16989. rt_mutex_enqueue_pi(owner, waiter);
  16990. __rt_mutex_adjust_prio(owner);
  16991. - if (owner->pi_blocked_on)
  16992. + if (rt_mutex_real_waiter(owner->pi_blocked_on))
  16993. chain_walk = 1;
  16994. } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
  16995. chain_walk = 1;
  16996. @@ -957,8 +1357,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  16997. /*
  16998. * Wake up the next waiter on the lock.
  16999. *
  17000. - * Remove the top waiter from the current tasks pi waiter list and
  17001. - * wake it up.
  17002. + * Remove the top waiter from the current tasks pi waiter list,
  17003. + * wake it up and return whether the current task needs to undo
  17004. + * a potential priority boosting.
  17005. *
  17006. * Called with lock->wait_lock held.
  17007. */
  17008. @@ -996,7 +1397,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
  17009. * long as we hold lock->wait_lock. The waiter task needs to
  17010. * acquire it in order to dequeue the waiter.
  17011. */
  17012. - wake_up_process(waiter->task);
  17013. + rt_mutex_wake_waiter(waiter);
  17014. }
  17015. /*
  17016. @@ -1010,7 +1411,7 @@ static void remove_waiter(struct rt_mutex *lock,
  17017. {
  17018. bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
  17019. struct task_struct *owner = rt_mutex_owner(lock);
  17020. - struct rt_mutex *next_lock;
  17021. + struct rt_mutex *next_lock = NULL;
  17022. unsigned long flags;
  17023. raw_spin_lock_irqsave(&current->pi_lock, flags);
  17024. @@ -1035,7 +1436,8 @@ static void remove_waiter(struct rt_mutex *lock,
  17025. __rt_mutex_adjust_prio(owner);
  17026. /* Store the lock on which owner is blocked or NULL */
  17027. - next_lock = task_blocked_on_lock(owner);
  17028. + if (rt_mutex_real_waiter(owner->pi_blocked_on))
  17029. + next_lock = task_blocked_on_lock(owner);
  17030. raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
  17031. @@ -1071,17 +1473,17 @@ void rt_mutex_adjust_pi(struct task_struct *task)
  17032. raw_spin_lock_irqsave(&task->pi_lock, flags);
  17033. waiter = task->pi_blocked_on;
  17034. - if (!waiter || (waiter->prio == task->prio &&
  17035. + if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio &&
  17036. !dl_prio(task->prio))) {
  17037. raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17038. return;
  17039. }
  17040. next_lock = waiter->lock;
  17041. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17042. /* gets dropped in rt_mutex_adjust_prio_chain()! */
  17043. get_task_struct(task);
  17044. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17045. rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
  17046. next_lock, NULL, task);
  17047. }
  17048. @@ -1099,7 +1501,8 @@ void rt_mutex_adjust_pi(struct task_struct *task)
  17049. static int __sched
  17050. __rt_mutex_slowlock(struct rt_mutex *lock, int state,
  17051. struct hrtimer_sleeper *timeout,
  17052. - struct rt_mutex_waiter *waiter)
  17053. + struct rt_mutex_waiter *waiter,
  17054. + struct ww_acquire_ctx *ww_ctx)
  17055. {
  17056. int ret = 0;
  17057. @@ -1122,6 +1525,12 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
  17058. break;
  17059. }
  17060. + if (ww_ctx && ww_ctx->acquired > 0) {
  17061. + ret = __mutex_lock_check_stamp(lock, ww_ctx);
  17062. + if (ret)
  17063. + break;
  17064. + }
  17065. +
  17066. raw_spin_unlock(&lock->wait_lock);
  17067. debug_rt_mutex_print_deadlock(waiter);
  17068. @@ -1156,25 +1565,102 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
  17069. }
  17070. }
  17071. +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
  17072. + struct ww_acquire_ctx *ww_ctx)
  17073. +{
  17074. +#ifdef CONFIG_DEBUG_MUTEXES
  17075. + /*
  17076. + * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
  17077. + * but released with a normal mutex_unlock in this call.
  17078. + *
  17079. + * This should never happen, always use ww_mutex_unlock.
  17080. + */
  17081. + DEBUG_LOCKS_WARN_ON(ww->ctx);
  17082. +
  17083. + /*
  17084. + * Not quite done after calling ww_acquire_done() ?
  17085. + */
  17086. + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
  17087. +
  17088. + if (ww_ctx->contending_lock) {
  17089. + /*
  17090. + * After -EDEADLK you tried to
  17091. + * acquire a different ww_mutex? Bad!
  17092. + */
  17093. + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
  17094. +
  17095. + /*
  17096. + * You called ww_mutex_lock after receiving -EDEADLK,
  17097. + * but 'forgot' to unlock everything else first?
  17098. + */
  17099. + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
  17100. + ww_ctx->contending_lock = NULL;
  17101. + }
  17102. +
  17103. + /*
  17104. + * Naughty, using a different class will lead to undefined behavior!
  17105. + */
  17106. + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
  17107. +#endif
  17108. + ww_ctx->acquired++;
  17109. +}
  17110. +
  17111. +#ifdef CONFIG_PREEMPT_RT_FULL
  17112. +static void ww_mutex_account_lock(struct rt_mutex *lock,
  17113. + struct ww_acquire_ctx *ww_ctx)
  17114. +{
  17115. + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
  17116. + struct rt_mutex_waiter *waiter, *n;
  17117. +
  17118. + /*
  17119. + * This branch gets optimized out for the common case,
  17120. + * and is only important for ww_mutex_lock.
  17121. + */
  17122. + ww_mutex_lock_acquired(ww, ww_ctx);
  17123. + ww->ctx = ww_ctx;
  17124. +
  17125. + /*
  17126. + * Give any possible sleeping processes the chance to wake up,
  17127. + * so they can recheck if they have to back off.
  17128. + */
  17129. + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters,
  17130. + tree_entry) {
  17131. + /* XXX debug rt mutex waiter wakeup */
  17132. +
  17133. + BUG_ON(waiter->lock != lock);
  17134. + rt_mutex_wake_waiter(waiter);
  17135. + }
  17136. +}
  17137. +
  17138. +#else
  17139. +
  17140. +static void ww_mutex_account_lock(struct rt_mutex *lock,
  17141. + struct ww_acquire_ctx *ww_ctx)
  17142. +{
  17143. + BUG();
  17144. +}
  17145. +#endif
  17146. +
  17147. /*
  17148. * Slow path lock function:
  17149. */
  17150. static int __sched
  17151. rt_mutex_slowlock(struct rt_mutex *lock, int state,
  17152. struct hrtimer_sleeper *timeout,
  17153. - enum rtmutex_chainwalk chwalk)
  17154. + enum rtmutex_chainwalk chwalk,
  17155. + struct ww_acquire_ctx *ww_ctx)
  17156. {
  17157. struct rt_mutex_waiter waiter;
  17158. int ret = 0;
  17159. - debug_rt_mutex_init_waiter(&waiter);
  17160. - RB_CLEAR_NODE(&waiter.pi_tree_entry);
  17161. - RB_CLEAR_NODE(&waiter.tree_entry);
  17162. + rt_mutex_init_waiter(&waiter, false);
  17163. raw_spin_lock(&lock->wait_lock);
  17164. /* Try to acquire the lock again: */
  17165. if (try_to_take_rt_mutex(lock, current, NULL)) {
  17166. + if (ww_ctx)
  17167. + ww_mutex_account_lock(lock, ww_ctx);
  17168. raw_spin_unlock(&lock->wait_lock);
  17169. return 0;
  17170. }
  17171. @@ -1192,13 +1678,23 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
  17172. if (likely(!ret))
  17173. /* sleep on the mutex */
  17174. - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
  17175. + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
  17176. + ww_ctx);
  17177. + else if (ww_ctx) {
  17178. + /* ww_mutex received EDEADLK, let it become EALREADY */
  17179. + ret = __mutex_lock_check_stamp(lock, ww_ctx);
  17180. + BUG_ON(!ret);
  17181. + }
  17182. if (unlikely(ret)) {
  17183. __set_current_state(TASK_RUNNING);
  17184. if (rt_mutex_has_waiters(lock))
  17185. remove_waiter(lock, &waiter);
  17186. - rt_mutex_handle_deadlock(ret, chwalk, &waiter);
  17187. + /* ww_mutex want to report EDEADLK/EALREADY, let them */
  17188. + if (!ww_ctx)
  17189. + rt_mutex_handle_deadlock(ret, chwalk, &waiter);
  17190. + } else if (ww_ctx) {
  17191. + ww_mutex_account_lock(lock, ww_ctx);
  17192. }
  17193. /*
  17194. @@ -1255,7 +1751,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
  17195. /*
  17196. * Slow path to release a rt-mutex:
  17197. */
  17198. -static void __sched
  17199. +static bool __sched
  17200. rt_mutex_slowunlock(struct rt_mutex *lock)
  17201. {
  17202. raw_spin_lock(&lock->wait_lock);
  17203. @@ -1298,7 +1794,7 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
  17204. while (!rt_mutex_has_waiters(lock)) {
  17205. /* Drops lock->wait_lock ! */
  17206. if (unlock_rt_mutex_safe(lock) == true)
  17207. - return;
  17208. + return false;
  17209. /* Relock the rtmutex and try again */
  17210. raw_spin_lock(&lock->wait_lock);
  17211. }
  17212. @@ -1311,8 +1807,7 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
  17213. raw_spin_unlock(&lock->wait_lock);
  17214. - /* Undo pi boosting if necessary: */
  17215. - rt_mutex_adjust_prio(current);
  17216. + return true;
  17217. }
  17218. /*
  17219. @@ -1323,31 +1818,36 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
  17220. */
  17221. static inline int
  17222. rt_mutex_fastlock(struct rt_mutex *lock, int state,
  17223. + struct ww_acquire_ctx *ww_ctx,
  17224. int (*slowfn)(struct rt_mutex *lock, int state,
  17225. struct hrtimer_sleeper *timeout,
  17226. - enum rtmutex_chainwalk chwalk))
  17227. + enum rtmutex_chainwalk chwalk,
  17228. + struct ww_acquire_ctx *ww_ctx))
  17229. {
  17230. if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
  17231. rt_mutex_deadlock_account_lock(lock, current);
  17232. return 0;
  17233. } else
  17234. - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
  17235. + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK,
  17236. + ww_ctx);
  17237. }
  17238. static inline int
  17239. rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
  17240. struct hrtimer_sleeper *timeout,
  17241. enum rtmutex_chainwalk chwalk,
  17242. + struct ww_acquire_ctx *ww_ctx,
  17243. int (*slowfn)(struct rt_mutex *lock, int state,
  17244. struct hrtimer_sleeper *timeout,
  17245. - enum rtmutex_chainwalk chwalk))
  17246. + enum rtmutex_chainwalk chwalk,
  17247. + struct ww_acquire_ctx *ww_ctx))
  17248. {
  17249. if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
  17250. likely(rt_mutex_cmpxchg(lock, NULL, current))) {
  17251. rt_mutex_deadlock_account_lock(lock, current);
  17252. return 0;
  17253. } else
  17254. - return slowfn(lock, state, timeout, chwalk);
  17255. + return slowfn(lock, state, timeout, chwalk, ww_ctx);
  17256. }
  17257. static inline int
  17258. @@ -1363,12 +1863,14 @@ rt_mutex_fasttrylock(struct rt_mutex *lock,
  17259. static inline void
  17260. rt_mutex_fastunlock(struct rt_mutex *lock,
  17261. - void (*slowfn)(struct rt_mutex *lock))
  17262. + bool (*slowfn)(struct rt_mutex *lock))
  17263. {
  17264. - if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
  17265. + if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
  17266. rt_mutex_deadlock_account_unlock(current);
  17267. - else
  17268. - slowfn(lock);
  17269. + } else if (slowfn(lock)) {
  17270. + /* Undo pi boosting if necessary: */
  17271. + rt_mutex_adjust_prio(current);
  17272. + }
  17273. }
  17274. /**
  17275. @@ -1380,7 +1882,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock)
  17276. {
  17277. might_sleep();
  17278. - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
  17279. + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock);
  17280. }
  17281. EXPORT_SYMBOL_GPL(rt_mutex_lock);
  17282. @@ -1397,7 +1899,7 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
  17283. {
  17284. might_sleep();
  17285. - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
  17286. + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock);
  17287. }
  17288. EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
  17289. @@ -1410,11 +1912,30 @@ int rt_mutex_timed_futex_lock(struct rt_mutex *lock,
  17290. might_sleep();
  17291. return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  17292. - RT_MUTEX_FULL_CHAINWALK,
  17293. + RT_MUTEX_FULL_CHAINWALK, NULL,
  17294. rt_mutex_slowlock);
  17295. }
  17296. /**
  17297. + * rt_mutex_lock_killable - lock a rt_mutex killable
  17298. + *
  17299. + * @lock: the rt_mutex to be locked
  17300. + * @detect_deadlock: deadlock detection on/off
  17301. + *
  17302. + * Returns:
  17303. + * 0 on success
  17304. + * -EINTR when interrupted by a signal
  17305. + * -EDEADLK when the lock would deadlock (when deadlock detection is on)
  17306. + */
  17307. +int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
  17308. +{
  17309. + might_sleep();
  17310. +
  17311. + return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock);
  17312. +}
  17313. +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
  17314. +
  17315. +/**
  17316. * rt_mutex_timed_lock - lock a rt_mutex interruptible
  17317. * the timeout structure is provided
  17318. * by the caller
  17319. @@ -1434,6 +1955,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
  17320. return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  17321. RT_MUTEX_MIN_CHAINWALK,
  17322. + NULL,
  17323. rt_mutex_slowlock);
  17324. }
  17325. EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  17326. @@ -1463,6 +1985,22 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock)
  17327. EXPORT_SYMBOL_GPL(rt_mutex_unlock);
  17328. /**
  17329. + * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock
  17330. + * @lock: the rt_mutex to be unlocked
  17331. + *
  17332. + * Returns: true/false indicating whether priority adjustment is
  17333. + * required or not.
  17334. + */
  17335. +bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
  17336. +{
  17337. + if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
  17338. + rt_mutex_deadlock_account_unlock(current);
  17339. + return false;
  17340. + }
  17341. + return rt_mutex_slowunlock(lock);
  17342. +}
  17343. +
  17344. +/**
  17345. * rt_mutex_destroy - mark a mutex unusable
  17346. * @lock: the mutex to be destroyed
  17347. *
  17348. @@ -1492,13 +2030,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
  17349. void __rt_mutex_init(struct rt_mutex *lock, const char *name)
  17350. {
  17351. lock->owner = NULL;
  17352. - raw_spin_lock_init(&lock->wait_lock);
  17353. lock->waiters = RB_ROOT;
  17354. lock->waiters_leftmost = NULL;
  17355. debug_rt_mutex_init(lock, name);
  17356. }
  17357. -EXPORT_SYMBOL_GPL(__rt_mutex_init);
  17358. +EXPORT_SYMBOL(__rt_mutex_init);
  17359. /**
  17360. * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
  17361. @@ -1513,7 +2050,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
  17362. void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  17363. struct task_struct *proxy_owner)
  17364. {
  17365. - __rt_mutex_init(lock, NULL);
  17366. + rt_mutex_init(lock);
  17367. debug_rt_mutex_proxy_lock(lock, proxy_owner);
  17368. rt_mutex_set_owner(lock, proxy_owner);
  17369. rt_mutex_deadlock_account_lock(lock, proxy_owner);
  17370. @@ -1561,6 +2098,35 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  17371. return 1;
  17372. }
  17373. +#ifdef CONFIG_PREEMPT_RT_FULL
  17374. + /*
  17375. + * In PREEMPT_RT there's an added race.
  17376. + * If the task, that we are about to requeue, times out,
  17377. + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
  17378. + * to skip this task. But right after the task sets
  17379. + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
  17380. + * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
  17381. + * This will replace the PI_WAKEUP_INPROGRESS with the actual
  17382. + * lock that it blocks on. We *must not* place this task
  17383. + * on this proxy lock in that case.
  17384. + *
  17385. + * To prevent this race, we first take the task's pi_lock
  17386. + * and check if it has updated its pi_blocked_on. If it has,
  17387. + * we assume that it woke up and we return -EAGAIN.
  17388. + * Otherwise, we set the task's pi_blocked_on to
  17389. + * PI_REQUEUE_INPROGRESS, so that if the task is waking up
  17390. + * it will know that we are in the process of requeuing it.
  17391. + */
  17392. + raw_spin_lock_irq(&task->pi_lock);
  17393. + if (task->pi_blocked_on) {
  17394. + raw_spin_unlock_irq(&task->pi_lock);
  17395. + raw_spin_unlock(&lock->wait_lock);
  17396. + return -EAGAIN;
  17397. + }
  17398. + task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
  17399. + raw_spin_unlock_irq(&task->pi_lock);
  17400. +#endif
  17401. +
  17402. /* We enforce deadlock detection for futexes */
  17403. ret = task_blocks_on_rt_mutex(lock, waiter, task,
  17404. RT_MUTEX_FULL_CHAINWALK);
  17405. @@ -1575,7 +2141,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  17406. ret = 0;
  17407. }
  17408. - if (unlikely(ret))
  17409. + if (ret && rt_mutex_has_waiters(lock))
  17410. remove_waiter(lock, waiter);
  17411. raw_spin_unlock(&lock->wait_lock);
  17412. @@ -1631,7 +2197,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
  17413. set_current_state(TASK_INTERRUPTIBLE);
  17414. /* sleep on the mutex */
  17415. - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
  17416. + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
  17417. if (unlikely(ret))
  17418. remove_waiter(lock, waiter);
  17419. @@ -1646,3 +2212,89 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
  17420. return ret;
  17421. }
  17422. +
  17423. +static inline int
  17424. +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
  17425. +{
  17426. +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
  17427. + unsigned tmp;
  17428. +
  17429. + if (ctx->deadlock_inject_countdown-- == 0) {
  17430. + tmp = ctx->deadlock_inject_interval;
  17431. + if (tmp > UINT_MAX/4)
  17432. + tmp = UINT_MAX;
  17433. + else
  17434. + tmp = tmp*2 + tmp + tmp/2;
  17435. +
  17436. + ctx->deadlock_inject_interval = tmp;
  17437. + ctx->deadlock_inject_countdown = tmp;
  17438. + ctx->contending_lock = lock;
  17439. +
  17440. + ww_mutex_unlock(lock);
  17441. +
  17442. + return -EDEADLK;
  17443. + }
  17444. +#endif
  17445. +
  17446. + return 0;
  17447. +}
  17448. +
  17449. +#ifdef CONFIG_PREEMPT_RT_FULL
  17450. +int __sched
  17451. +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
  17452. +{
  17453. + int ret;
  17454. +
  17455. + might_sleep();
  17456. +
  17457. + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
  17458. + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx);
  17459. + if (ret)
  17460. + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
  17461. + else if (!ret && ww_ctx->acquired > 1)
  17462. + return ww_mutex_deadlock_injection(lock, ww_ctx);
  17463. +
  17464. + return ret;
  17465. +}
  17466. +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
  17467. +
  17468. +int __sched
  17469. +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
  17470. +{
  17471. + int ret;
  17472. +
  17473. + might_sleep();
  17474. +
  17475. + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
  17476. + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx);
  17477. + if (ret)
  17478. + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
  17479. + else if (!ret && ww_ctx->acquired > 1)
  17480. + return ww_mutex_deadlock_injection(lock, ww_ctx);
  17481. +
  17482. + return ret;
  17483. +}
  17484. +EXPORT_SYMBOL_GPL(__ww_mutex_lock);
  17485. +
  17486. +void __sched ww_mutex_unlock(struct ww_mutex *lock)
  17487. +{
  17488. + int nest = !!lock->ctx;
  17489. +
  17490. + /*
  17491. + * The unlocking fastpath is the 0->1 transition from 'locked'
  17492. + * into 'unlocked' state:
  17493. + */
  17494. + if (nest) {
  17495. +#ifdef CONFIG_DEBUG_MUTEXES
  17496. + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
  17497. +#endif
  17498. + if (lock->ctx->acquired > 0)
  17499. + lock->ctx->acquired--;
  17500. + lock->ctx = NULL;
  17501. + }
  17502. +
  17503. + mutex_release(&lock->base.dep_map, nest, _RET_IP_);
  17504. + rt_mutex_unlock(&lock->base.lock);
  17505. +}
  17506. +EXPORT_SYMBOL(ww_mutex_unlock);
  17507. +#endif
  17508. diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
  17509. index 855212501407..4d317e9a5d0f 100644
  17510. --- a/kernel/locking/rtmutex_common.h
  17511. +++ b/kernel/locking/rtmutex_common.h
  17512. @@ -49,6 +49,7 @@ struct rt_mutex_waiter {
  17513. struct rb_node pi_tree_entry;
  17514. struct task_struct *task;
  17515. struct rt_mutex *lock;
  17516. + bool savestate;
  17517. #ifdef CONFIG_DEBUG_RT_MUTEXES
  17518. unsigned long ip;
  17519. struct pid *deadlock_task_pid;
  17520. @@ -119,6 +120,9 @@ enum rtmutex_chainwalk {
  17521. /*
  17522. * PI-futex support (proxy locking functions, etc.):
  17523. */
  17524. +#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
  17525. +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
  17526. +
  17527. extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
  17528. extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  17529. struct task_struct *proxy_owner);
  17530. @@ -132,10 +136,24 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
  17531. struct rt_mutex_waiter *waiter);
  17532. extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
  17533. +extern bool rt_mutex_futex_unlock(struct rt_mutex *lock);
  17534. +
  17535. +extern void rt_mutex_adjust_prio(struct task_struct *task);
  17536. +
  17537. #ifdef CONFIG_DEBUG_RT_MUTEXES
  17538. # include "rtmutex-debug.h"
  17539. #else
  17540. # include "rtmutex.h"
  17541. #endif
  17542. +static inline void
  17543. +rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
  17544. +{
  17545. + debug_rt_mutex_init_waiter(waiter);
  17546. + waiter->task = NULL;
  17547. + waiter->savestate = savestate;
  17548. + RB_CLEAR_NODE(&waiter->pi_tree_entry);
  17549. + RB_CLEAR_NODE(&waiter->tree_entry);
  17550. +}
  17551. +
  17552. #endif
  17553. diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
  17554. index db3ccb1dd614..909779647bd1 100644
  17555. --- a/kernel/locking/spinlock.c
  17556. +++ b/kernel/locking/spinlock.c
  17557. @@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
  17558. * __[spin|read|write]_lock_bh()
  17559. */
  17560. BUILD_LOCK_OPS(spin, raw_spinlock);
  17561. +
  17562. +#ifndef CONFIG_PREEMPT_RT_FULL
  17563. BUILD_LOCK_OPS(read, rwlock);
  17564. BUILD_LOCK_OPS(write, rwlock);
  17565. +#endif
  17566. #endif
  17567. @@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
  17568. EXPORT_SYMBOL(_raw_spin_unlock_bh);
  17569. #endif
  17570. +#ifndef CONFIG_PREEMPT_RT_FULL
  17571. +
  17572. #ifndef CONFIG_INLINE_READ_TRYLOCK
  17573. int __lockfunc _raw_read_trylock(rwlock_t *lock)
  17574. {
  17575. @@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
  17576. EXPORT_SYMBOL(_raw_write_unlock_bh);
  17577. #endif
  17578. +#endif /* !PREEMPT_RT_FULL */
  17579. +
  17580. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  17581. void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
  17582. diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
  17583. index 0374a596cffa..94970338d518 100644
  17584. --- a/kernel/locking/spinlock_debug.c
  17585. +++ b/kernel/locking/spinlock_debug.c
  17586. @@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
  17587. EXPORT_SYMBOL(__raw_spin_lock_init);
  17588. +#ifndef CONFIG_PREEMPT_RT_FULL
  17589. void __rwlock_init(rwlock_t *lock, const char *name,
  17590. struct lock_class_key *key)
  17591. {
  17592. @@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
  17593. }
  17594. EXPORT_SYMBOL(__rwlock_init);
  17595. +#endif
  17596. static void spin_dump(raw_spinlock_t *lock, const char *msg)
  17597. {
  17598. @@ -159,6 +161,7 @@ void do_raw_spin_unlock(raw_spinlock_t *lock)
  17599. arch_spin_unlock(&lock->raw_lock);
  17600. }
  17601. +#ifndef CONFIG_PREEMPT_RT_FULL
  17602. static void rwlock_bug(rwlock_t *lock, const char *msg)
  17603. {
  17604. if (!debug_locks_off())
  17605. @@ -300,3 +303,5 @@ void do_raw_write_unlock(rwlock_t *lock)
  17606. debug_write_unlock(lock);
  17607. arch_write_unlock(&lock->raw_lock);
  17608. }
  17609. +
  17610. +#endif
  17611. diff --git a/kernel/panic.c b/kernel/panic.c
  17612. index a4f7820f5930..cd91cec1f29a 100644
  17613. --- a/kernel/panic.c
  17614. +++ b/kernel/panic.c
  17615. @@ -399,9 +399,11 @@ static u64 oops_id;
  17616. static int init_oops_id(void)
  17617. {
  17618. +#ifndef CONFIG_PREEMPT_RT_FULL
  17619. if (!oops_id)
  17620. get_random_bytes(&oops_id, sizeof(oops_id));
  17621. else
  17622. +#endif
  17623. oops_id++;
  17624. return 0;
  17625. diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
  17626. index 2329daae5255..b8f41a3635fd 100644
  17627. --- a/kernel/power/hibernate.c
  17628. +++ b/kernel/power/hibernate.c
  17629. @@ -285,6 +285,8 @@ static int create_image(int platform_mode)
  17630. local_irq_disable();
  17631. + system_state = SYSTEM_SUSPEND;
  17632. +
  17633. error = syscore_suspend();
  17634. if (error) {
  17635. printk(KERN_ERR "PM: Some system devices failed to power down, "
  17636. @@ -314,6 +316,7 @@ static int create_image(int platform_mode)
  17637. syscore_resume();
  17638. Enable_irqs:
  17639. + system_state = SYSTEM_RUNNING;
  17640. local_irq_enable();
  17641. Enable_cpus:
  17642. @@ -437,6 +440,7 @@ static int resume_target_kernel(bool platform_mode)
  17643. goto Enable_cpus;
  17644. local_irq_disable();
  17645. + system_state = SYSTEM_SUSPEND;
  17646. error = syscore_suspend();
  17647. if (error)
  17648. @@ -470,6 +474,7 @@ static int resume_target_kernel(bool platform_mode)
  17649. syscore_resume();
  17650. Enable_irqs:
  17651. + system_state = SYSTEM_RUNNING;
  17652. local_irq_enable();
  17653. Enable_cpus:
  17654. @@ -555,6 +560,7 @@ int hibernation_platform_enter(void)
  17655. goto Platform_finish;
  17656. local_irq_disable();
  17657. + system_state = SYSTEM_SUSPEND;
  17658. syscore_suspend();
  17659. if (pm_wakeup_pending()) {
  17660. error = -EAGAIN;
  17661. @@ -567,6 +573,7 @@ int hibernation_platform_enter(void)
  17662. Power_up:
  17663. syscore_resume();
  17664. + system_state = SYSTEM_RUNNING;
  17665. local_irq_enable();
  17666. enable_nonboot_cpus();
  17667. diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
  17668. index 8d7a1ef72758..db920b1704b1 100644
  17669. --- a/kernel/power/suspend.c
  17670. +++ b/kernel/power/suspend.c
  17671. @@ -356,6 +356,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
  17672. arch_suspend_disable_irqs();
  17673. BUG_ON(!irqs_disabled());
  17674. + system_state = SYSTEM_SUSPEND;
  17675. +
  17676. error = syscore_suspend();
  17677. if (!error) {
  17678. *wakeup = pm_wakeup_pending();
  17679. @@ -370,6 +372,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
  17680. syscore_resume();
  17681. }
  17682. + system_state = SYSTEM_RUNNING;
  17683. +
  17684. arch_suspend_enable_irqs();
  17685. BUG_ON(irqs_disabled());
  17686. diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
  17687. index 3c1aca0c3543..398bf2bbd3bc 100644
  17688. --- a/kernel/printk/printk.c
  17689. +++ b/kernel/printk/printk.c
  17690. @@ -1163,6 +1163,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  17691. {
  17692. char *text;
  17693. int len = 0;
  17694. + int attempts = 0;
  17695. text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
  17696. if (!text)
  17697. @@ -1174,7 +1175,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  17698. u64 seq;
  17699. u32 idx;
  17700. enum log_flags prev;
  17701. -
  17702. + int num_msg;
  17703. +try_again:
  17704. + attempts++;
  17705. + if (attempts > 10) {
  17706. + len = -EBUSY;
  17707. + goto out;
  17708. + }
  17709. + num_msg = 0;
  17710. if (clear_seq < log_first_seq) {
  17711. /* messages are gone, move to first available one */
  17712. clear_seq = log_first_seq;
  17713. @@ -1195,6 +1203,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  17714. prev = msg->flags;
  17715. idx = log_next(idx);
  17716. seq++;
  17717. + num_msg++;
  17718. + if (num_msg > 5) {
  17719. + num_msg = 0;
  17720. + raw_spin_unlock_irq(&logbuf_lock);
  17721. + raw_spin_lock_irq(&logbuf_lock);
  17722. + if (clear_seq < log_first_seq)
  17723. + goto try_again;
  17724. + }
  17725. }
  17726. /* move first record forward until length fits into the buffer */
  17727. @@ -1208,6 +1224,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  17728. prev = msg->flags;
  17729. idx = log_next(idx);
  17730. seq++;
  17731. + num_msg++;
  17732. + if (num_msg > 5) {
  17733. + num_msg = 0;
  17734. + raw_spin_unlock_irq(&logbuf_lock);
  17735. + raw_spin_lock_irq(&logbuf_lock);
  17736. + if (clear_seq < log_first_seq)
  17737. + goto try_again;
  17738. + }
  17739. }
  17740. /* last message fitting into this dump */
  17741. @@ -1248,6 +1272,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  17742. clear_seq = log_next_seq;
  17743. clear_idx = log_next_idx;
  17744. }
  17745. +out:
  17746. raw_spin_unlock_irq(&logbuf_lock);
  17747. kfree(text);
  17748. @@ -1401,6 +1426,12 @@ static void call_console_drivers(int level, const char *text, size_t len)
  17749. if (!console_drivers)
  17750. return;
  17751. + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
  17752. + if (in_irq() || in_nmi())
  17753. + return;
  17754. + }
  17755. +
  17756. + migrate_disable();
  17757. for_each_console(con) {
  17758. if (exclusive_console && con != exclusive_console)
  17759. continue;
  17760. @@ -1413,6 +1444,7 @@ static void call_console_drivers(int level, const char *text, size_t len)
  17761. continue;
  17762. con->write(con, text, len);
  17763. }
  17764. + migrate_enable();
  17765. }
  17766. /*
  17767. @@ -1473,6 +1505,15 @@ static inline int can_use_console(unsigned int cpu)
  17768. static int console_trylock_for_printk(void)
  17769. {
  17770. unsigned int cpu = smp_processor_id();
  17771. +#ifdef CONFIG_PREEMPT_RT_FULL
  17772. + int lock = !early_boot_irqs_disabled && (preempt_count() == 0) &&
  17773. + !irqs_disabled();
  17774. +#else
  17775. + int lock = 1;
  17776. +#endif
  17777. +
  17778. + if (!lock)
  17779. + return 0;
  17780. if (!console_trylock())
  17781. return 0;
  17782. @@ -1607,6 +1648,62 @@ static size_t cont_print_text(char *text, size_t size)
  17783. return textlen;
  17784. }
  17785. +#ifdef CONFIG_EARLY_PRINTK
  17786. +struct console *early_console;
  17787. +
  17788. +static void early_vprintk(const char *fmt, va_list ap)
  17789. +{
  17790. + if (early_console) {
  17791. + char buf[512];
  17792. + int n = vscnprintf(buf, sizeof(buf), fmt, ap);
  17793. +
  17794. + early_console->write(early_console, buf, n);
  17795. + }
  17796. +}
  17797. +
  17798. +asmlinkage void early_printk(const char *fmt, ...)
  17799. +{
  17800. + va_list ap;
  17801. +
  17802. + va_start(ap, fmt);
  17803. + early_vprintk(fmt, ap);
  17804. + va_end(ap);
  17805. +}
  17806. +
  17807. +/*
  17808. + * This is independent of any log levels - a global
  17809. + * kill switch that turns off all of printk.
  17810. + *
  17811. + * Used by the NMI watchdog if early-printk is enabled.
  17812. + */
  17813. +static bool __read_mostly printk_killswitch;
  17814. +
  17815. +static int __init force_early_printk_setup(char *str)
  17816. +{
  17817. + printk_killswitch = true;
  17818. + return 0;
  17819. +}
  17820. +early_param("force_early_printk", force_early_printk_setup);
  17821. +
  17822. +void printk_kill(void)
  17823. +{
  17824. + printk_killswitch = true;
  17825. +}
  17826. +
  17827. +static int forced_early_printk(const char *fmt, va_list ap)
  17828. +{
  17829. + if (!printk_killswitch)
  17830. + return 0;
  17831. + early_vprintk(fmt, ap);
  17832. + return 1;
  17833. +}
  17834. +#else
  17835. +static inline int forced_early_printk(const char *fmt, va_list ap)
  17836. +{
  17837. + return 0;
  17838. +}
  17839. +#endif
  17840. +
  17841. asmlinkage int vprintk_emit(int facility, int level,
  17842. const char *dict, size_t dictlen,
  17843. const char *fmt, va_list args)
  17844. @@ -1623,6 +1720,13 @@ asmlinkage int vprintk_emit(int facility, int level,
  17845. /* cpu currently holding logbuf_lock in this function */
  17846. static unsigned int logbuf_cpu = UINT_MAX;
  17847. + /*
  17848. + * Fall back to early_printk if a debugging subsystem has
  17849. + * killed printk output
  17850. + */
  17851. + if (unlikely(forced_early_printk(fmt, args)))
  17852. + return 1;
  17853. +
  17854. if (level == LOGLEVEL_SCHED) {
  17855. level = LOGLEVEL_DEFAULT;
  17856. in_sched = true;
  17857. @@ -1764,8 +1868,7 @@ asmlinkage int vprintk_emit(int facility, int level,
  17858. * console_sem which would prevent anyone from printing to
  17859. * console
  17860. */
  17861. - preempt_disable();
  17862. -
  17863. + migrate_disable();
  17864. /*
  17865. * Try to acquire and then immediately release the console
  17866. * semaphore. The release will print out buffers and wake up
  17867. @@ -1773,7 +1876,7 @@ asmlinkage int vprintk_emit(int facility, int level,
  17868. */
  17869. if (console_trylock_for_printk())
  17870. console_unlock();
  17871. - preempt_enable();
  17872. + migrate_enable();
  17873. lockdep_on();
  17874. }
  17875. @@ -1902,26 +2005,6 @@ DEFINE_PER_CPU(printk_func_t, printk_func);
  17876. #endif /* CONFIG_PRINTK */
  17877. -#ifdef CONFIG_EARLY_PRINTK
  17878. -struct console *early_console;
  17879. -
  17880. -asmlinkage __visible void early_printk(const char *fmt, ...)
  17881. -{
  17882. - va_list ap;
  17883. - char buf[512];
  17884. - int n;
  17885. -
  17886. - if (!early_console)
  17887. - return;
  17888. -
  17889. - va_start(ap, fmt);
  17890. - n = vscnprintf(buf, sizeof(buf), fmt, ap);
  17891. - va_end(ap);
  17892. -
  17893. - early_console->write(early_console, buf, n);
  17894. -}
  17895. -#endif
  17896. -
  17897. static int __add_preferred_console(char *name, int idx, char *options,
  17898. char *brl_options)
  17899. {
  17900. @@ -2143,11 +2226,16 @@ static void console_cont_flush(char *text, size_t size)
  17901. goto out;
  17902. len = cont_print_text(text, size);
  17903. +#ifndef CONFIG_PREEMPT_RT_FULL
  17904. raw_spin_unlock(&logbuf_lock);
  17905. stop_critical_timings();
  17906. call_console_drivers(cont.level, text, len);
  17907. start_critical_timings();
  17908. local_irq_restore(flags);
  17909. +#else
  17910. + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  17911. + call_console_drivers(cont.level, text, len);
  17912. +#endif
  17913. return;
  17914. out:
  17915. raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  17916. @@ -2246,12 +2334,17 @@ skip:
  17917. console_idx = log_next(console_idx);
  17918. console_seq++;
  17919. console_prev = msg->flags;
  17920. +#ifdef CONFIG_PREEMPT_RT_FULL
  17921. + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  17922. + call_console_drivers(level, text, len);
  17923. +#else
  17924. raw_spin_unlock(&logbuf_lock);
  17925. stop_critical_timings(); /* don't trace print latency */
  17926. call_console_drivers(level, text, len);
  17927. start_critical_timings();
  17928. local_irq_restore(flags);
  17929. +#endif
  17930. if (do_cond_resched)
  17931. cond_resched();
  17932. @@ -2304,6 +2397,11 @@ void console_unblank(void)
  17933. {
  17934. struct console *c;
  17935. + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
  17936. + if (in_irq() || in_nmi())
  17937. + return;
  17938. + }
  17939. +
  17940. /*
  17941. * console_unblank can no longer be called in interrupt context unless
  17942. * oops_in_progress is set to 1..
  17943. diff --git a/kernel/ptrace.c b/kernel/ptrace.c
  17944. index 261ee21e62db..e27549ebd299 100644
  17945. --- a/kernel/ptrace.c
  17946. +++ b/kernel/ptrace.c
  17947. @@ -129,7 +129,14 @@ static bool ptrace_freeze_traced(struct task_struct *task)
  17948. spin_lock_irq(&task->sighand->siglock);
  17949. if (task_is_traced(task) && !__fatal_signal_pending(task)) {
  17950. - task->state = __TASK_TRACED;
  17951. + unsigned long flags;
  17952. +
  17953. + raw_spin_lock_irqsave(&task->pi_lock, flags);
  17954. + if (task->state & __TASK_TRACED)
  17955. + task->state = __TASK_TRACED;
  17956. + else
  17957. + task->saved_state = __TASK_TRACED;
  17958. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17959. ret = true;
  17960. }
  17961. spin_unlock_irq(&task->sighand->siglock);
  17962. diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
  17963. index 8dbe27611ec3..7b6170a46409 100644
  17964. --- a/kernel/rcu/rcutorture.c
  17965. +++ b/kernel/rcu/rcutorture.c
  17966. @@ -389,6 +389,7 @@ static struct rcu_torture_ops rcu_ops = {
  17967. .name = "rcu"
  17968. };
  17969. +#ifndef CONFIG_PREEMPT_RT_FULL
  17970. /*
  17971. * Definitions for rcu_bh torture testing.
  17972. */
  17973. @@ -428,6 +429,12 @@ static struct rcu_torture_ops rcu_bh_ops = {
  17974. .name = "rcu_bh"
  17975. };
  17976. +#else
  17977. +static struct rcu_torture_ops rcu_bh_ops = {
  17978. + .ttype = INVALID_RCU_FLAVOR,
  17979. +};
  17980. +#endif
  17981. +
  17982. /*
  17983. * Don't even think about trying any of these in real life!!!
  17984. * The names includes "busted", and they really means it!
  17985. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
  17986. index 8cf7304b2867..965df22d96ad 100644
  17987. --- a/kernel/rcu/tree.c
  17988. +++ b/kernel/rcu/tree.c
  17989. @@ -56,6 +56,11 @@
  17990. #include <linux/random.h>
  17991. #include <linux/ftrace_event.h>
  17992. #include <linux/suspend.h>
  17993. +#include <linux/delay.h>
  17994. +#include <linux/gfp.h>
  17995. +#include <linux/oom.h>
  17996. +#include <linux/smpboot.h>
  17997. +#include "../time/tick-internal.h"
  17998. #include "tree.h"
  17999. #include "rcu.h"
  18000. @@ -220,6 +225,19 @@ void rcu_sched_qs(void)
  18001. }
  18002. }
  18003. +#ifdef CONFIG_PREEMPT_RT_FULL
  18004. +static void rcu_preempt_qs(void);
  18005. +
  18006. +void rcu_bh_qs(void)
  18007. +{
  18008. + unsigned long flags;
  18009. +
  18010. + /* Callers to this function, rcu_preempt_qs(), must disable irqs. */
  18011. + local_irq_save(flags);
  18012. + rcu_preempt_qs();
  18013. + local_irq_restore(flags);
  18014. +}
  18015. +#else
  18016. void rcu_bh_qs(void)
  18017. {
  18018. if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) {
  18019. @@ -229,6 +247,7 @@ void rcu_bh_qs(void)
  18020. __this_cpu_write(rcu_bh_data.passed_quiesce, 1);
  18021. }
  18022. }
  18023. +#endif
  18024. static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
  18025. @@ -404,6 +423,7 @@ unsigned long rcu_batches_completed_sched(void)
  18026. }
  18027. EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
  18028. +#ifndef CONFIG_PREEMPT_RT_FULL
  18029. /*
  18030. * Return the number of RCU BH batches completed thus far for debug & stats.
  18031. */
  18032. @@ -431,6 +451,13 @@ void rcu_bh_force_quiescent_state(void)
  18033. }
  18034. EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
  18035. +#else
  18036. +void rcu_force_quiescent_state(void)
  18037. +{
  18038. +}
  18039. +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
  18040. +#endif
  18041. +
  18042. /*
  18043. * Force a quiescent state for RCU-sched.
  18044. */
  18045. @@ -1545,7 +1572,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
  18046. !ACCESS_ONCE(rsp->gp_flags) ||
  18047. !rsp->gp_kthread)
  18048. return;
  18049. - wake_up(&rsp->gp_wq);
  18050. + swait_wake(&rsp->gp_wq);
  18051. }
  18052. /*
  18053. @@ -1986,7 +2013,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
  18054. ACCESS_ONCE(rsp->gpnum),
  18055. TPS("reqwait"));
  18056. rsp->gp_state = RCU_GP_WAIT_GPS;
  18057. - wait_event_interruptible(rsp->gp_wq,
  18058. + swait_event_interruptible(rsp->gp_wq,
  18059. ACCESS_ONCE(rsp->gp_flags) &
  18060. RCU_GP_FLAG_INIT);
  18061. /* Locking provides needed memory barrier. */
  18062. @@ -2015,7 +2042,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
  18063. ACCESS_ONCE(rsp->gpnum),
  18064. TPS("fqswait"));
  18065. rsp->gp_state = RCU_GP_WAIT_FQS;
  18066. - ret = wait_event_interruptible_timeout(rsp->gp_wq,
  18067. + ret = swait_event_interruptible_timeout(rsp->gp_wq,
  18068. ((gf = ACCESS_ONCE(rsp->gp_flags)) &
  18069. RCU_GP_FLAG_FQS) ||
  18070. (!ACCESS_ONCE(rnp->qsmask) &&
  18071. @@ -2860,18 +2887,17 @@ __rcu_process_callbacks(struct rcu_state *rsp)
  18072. /*
  18073. * Do RCU core processing for the current CPU.
  18074. */
  18075. -static void rcu_process_callbacks(struct softirq_action *unused)
  18076. +static void rcu_process_callbacks(void)
  18077. {
  18078. struct rcu_state *rsp;
  18079. if (cpu_is_offline(smp_processor_id()))
  18080. return;
  18081. - trace_rcu_utilization(TPS("Start RCU core"));
  18082. for_each_rcu_flavor(rsp)
  18083. __rcu_process_callbacks(rsp);
  18084. - trace_rcu_utilization(TPS("End RCU core"));
  18085. }
  18086. +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
  18087. /*
  18088. * Schedule RCU callback invocation. If the specified type of RCU
  18089. * does not support RCU priority boosting, just do a direct call,
  18090. @@ -2883,18 +2909,105 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
  18091. {
  18092. if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
  18093. return;
  18094. - if (likely(!rsp->boost)) {
  18095. - rcu_do_batch(rsp, rdp);
  18096. + rcu_do_batch(rsp, rdp);
  18097. +}
  18098. +
  18099. +static void rcu_wake_cond(struct task_struct *t, int status)
  18100. +{
  18101. + /*
  18102. + * If the thread is yielding, only wake it when this
  18103. + * is invoked from idle
  18104. + */
  18105. + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
  18106. + wake_up_process(t);
  18107. +}
  18108. +
  18109. +/*
  18110. + * Wake up this CPU's rcuc kthread to do RCU core processing.
  18111. + */
  18112. +static void invoke_rcu_core(void)
  18113. +{
  18114. + unsigned long flags;
  18115. + struct task_struct *t;
  18116. +
  18117. + if (!cpu_online(smp_processor_id()))
  18118. return;
  18119. + local_irq_save(flags);
  18120. + __this_cpu_write(rcu_cpu_has_work, 1);
  18121. + t = __this_cpu_read(rcu_cpu_kthread_task);
  18122. + if (t != NULL && current != t)
  18123. + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
  18124. + local_irq_restore(flags);
  18125. +}
  18126. +
  18127. +static void rcu_cpu_kthread_park(unsigned int cpu)
  18128. +{
  18129. + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  18130. +}
  18131. +
  18132. +static int rcu_cpu_kthread_should_run(unsigned int cpu)
  18133. +{
  18134. + return __this_cpu_read(rcu_cpu_has_work);
  18135. +}
  18136. +
  18137. +/*
  18138. + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  18139. + * RCU softirq used in flavors and configurations of RCU that do not
  18140. + * support RCU priority boosting.
  18141. + */
  18142. +static void rcu_cpu_kthread(unsigned int cpu)
  18143. +{
  18144. + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
  18145. + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
  18146. + int spincnt;
  18147. +
  18148. + for (spincnt = 0; spincnt < 10; spincnt++) {
  18149. + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
  18150. + local_bh_disable();
  18151. + *statusp = RCU_KTHREAD_RUNNING;
  18152. + this_cpu_inc(rcu_cpu_kthread_loops);
  18153. + local_irq_disable();
  18154. + work = *workp;
  18155. + *workp = 0;
  18156. + local_irq_enable();
  18157. + if (work)
  18158. + rcu_process_callbacks();
  18159. + local_bh_enable();
  18160. + if (*workp == 0) {
  18161. + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
  18162. + *statusp = RCU_KTHREAD_WAITING;
  18163. + return;
  18164. + }
  18165. }
  18166. - invoke_rcu_callbacks_kthread();
  18167. + *statusp = RCU_KTHREAD_YIELDING;
  18168. + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
  18169. + schedule_timeout_interruptible(2);
  18170. + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
  18171. + *statusp = RCU_KTHREAD_WAITING;
  18172. }
  18173. -static void invoke_rcu_core(void)
  18174. +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  18175. + .store = &rcu_cpu_kthread_task,
  18176. + .thread_should_run = rcu_cpu_kthread_should_run,
  18177. + .thread_fn = rcu_cpu_kthread,
  18178. + .thread_comm = "rcuc/%u",
  18179. + .setup = rcu_cpu_kthread_setup,
  18180. + .park = rcu_cpu_kthread_park,
  18181. +};
  18182. +
  18183. +/*
  18184. + * Spawn per-CPU RCU core processing kthreads.
  18185. + */
  18186. +static int __init rcu_spawn_core_kthreads(void)
  18187. {
  18188. - if (cpu_online(smp_processor_id()))
  18189. - raise_softirq(RCU_SOFTIRQ);
  18190. + int cpu;
  18191. +
  18192. + for_each_possible_cpu(cpu)
  18193. + per_cpu(rcu_cpu_has_work, cpu) = 0;
  18194. + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  18195. + return 0;
  18196. }
  18197. +early_initcall(rcu_spawn_core_kthreads);
  18198. /*
  18199. * Handle any core-RCU processing required by a call_rcu() invocation.
  18200. @@ -3040,6 +3153,7 @@ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
  18201. }
  18202. EXPORT_SYMBOL_GPL(call_rcu_sched);
  18203. +#ifndef CONFIG_PREEMPT_RT_FULL
  18204. /*
  18205. * Queue an RCU callback for invocation after a quicker grace period.
  18206. */
  18207. @@ -3048,6 +3162,7 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
  18208. __call_rcu(head, func, &rcu_bh_state, -1, 0);
  18209. }
  18210. EXPORT_SYMBOL_GPL(call_rcu_bh);
  18211. +#endif
  18212. /*
  18213. * Queue an RCU callback for lazy invocation after a grace period.
  18214. @@ -3139,6 +3254,7 @@ void synchronize_sched(void)
  18215. }
  18216. EXPORT_SYMBOL_GPL(synchronize_sched);
  18217. +#ifndef CONFIG_PREEMPT_RT_FULL
  18218. /**
  18219. * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
  18220. *
  18221. @@ -3165,6 +3281,7 @@ void synchronize_rcu_bh(void)
  18222. wait_rcu_gp(call_rcu_bh);
  18223. }
  18224. EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
  18225. +#endif
  18226. /**
  18227. * get_state_synchronize_rcu - Snapshot current RCU state
  18228. @@ -3677,6 +3794,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
  18229. mutex_unlock(&rsp->barrier_mutex);
  18230. }
  18231. +#ifndef CONFIG_PREEMPT_RT_FULL
  18232. /**
  18233. * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
  18234. */
  18235. @@ -3685,6 +3803,7 @@ void rcu_barrier_bh(void)
  18236. _rcu_barrier(&rcu_bh_state);
  18237. }
  18238. EXPORT_SYMBOL_GPL(rcu_barrier_bh);
  18239. +#endif
  18240. /**
  18241. * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
  18242. @@ -4021,7 +4140,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
  18243. }
  18244. }
  18245. - init_waitqueue_head(&rsp->gp_wq);
  18246. + init_swait_head(&rsp->gp_wq);
  18247. rnp = rsp->level[rcu_num_lvls - 1];
  18248. for_each_possible_cpu(i) {
  18249. while (i > rnp->grphi)
  18250. @@ -4120,7 +4239,6 @@ void __init rcu_init(void)
  18251. rcu_init_one(&rcu_bh_state, &rcu_bh_data);
  18252. rcu_init_one(&rcu_sched_state, &rcu_sched_data);
  18253. __rcu_init_preempt();
  18254. - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
  18255. /*
  18256. * We don't need protection against CPU-hotplug here because
  18257. diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
  18258. index a69d3dab2ec4..8a9f0d3640de 100644
  18259. --- a/kernel/rcu/tree.h
  18260. +++ b/kernel/rcu/tree.h
  18261. @@ -27,6 +27,7 @@
  18262. #include <linux/threads.h>
  18263. #include <linux/cpumask.h>
  18264. #include <linux/seqlock.h>
  18265. +#include <linux/wait-simple.h>
  18266. /*
  18267. * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
  18268. @@ -210,7 +211,7 @@ struct rcu_node {
  18269. /* This can happen due to race conditions. */
  18270. #endif /* #ifdef CONFIG_RCU_BOOST */
  18271. #ifdef CONFIG_RCU_NOCB_CPU
  18272. - wait_queue_head_t nocb_gp_wq[2];
  18273. + struct swait_head nocb_gp_wq[2];
  18274. /* Place for rcu_nocb_kthread() to wait GP. */
  18275. #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
  18276. int need_future_gp[2];
  18277. @@ -349,7 +350,7 @@ struct rcu_data {
  18278. atomic_long_t nocb_q_count_lazy; /* invocation (all stages). */
  18279. struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */
  18280. struct rcu_head **nocb_follower_tail;
  18281. - wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
  18282. + struct swait_head nocb_wq; /* For nocb kthreads to sleep on. */
  18283. struct task_struct *nocb_kthread;
  18284. int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
  18285. @@ -438,7 +439,7 @@ struct rcu_state {
  18286. unsigned long gpnum; /* Current gp number. */
  18287. unsigned long completed; /* # of last completed gp. */
  18288. struct task_struct *gp_kthread; /* Task for grace periods. */
  18289. - wait_queue_head_t gp_wq; /* Where GP task waits. */
  18290. + struct swait_head gp_wq; /* Where GP task waits. */
  18291. short gp_flags; /* Commands for GP task. */
  18292. short gp_state; /* GP kthread sleep state. */
  18293. @@ -529,12 +530,10 @@ extern struct rcu_state rcu_preempt_state;
  18294. DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
  18295. #endif /* #ifdef CONFIG_PREEMPT_RCU */
  18296. -#ifdef CONFIG_RCU_BOOST
  18297. DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  18298. DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
  18299. DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  18300. DECLARE_PER_CPU(char, rcu_cpu_has_work);
  18301. -#endif /* #ifdef CONFIG_RCU_BOOST */
  18302. #ifndef RCU_TREE_NONCORE
  18303. @@ -553,10 +552,9 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
  18304. static void __init __rcu_init_preempt(void);
  18305. static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
  18306. static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
  18307. -static void invoke_rcu_callbacks_kthread(void);
  18308. static bool rcu_is_callbacks_kthread(void);
  18309. +static void rcu_cpu_kthread_setup(unsigned int cpu);
  18310. #ifdef CONFIG_RCU_BOOST
  18311. -static void rcu_preempt_do_callbacks(void);
  18312. static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
  18313. struct rcu_node *rnp);
  18314. #endif /* #ifdef CONFIG_RCU_BOOST */
  18315. diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
  18316. index 8c0ec0f5a027..54da8f44d586 100644
  18317. --- a/kernel/rcu/tree_plugin.h
  18318. +++ b/kernel/rcu/tree_plugin.h
  18319. @@ -24,27 +24,20 @@
  18320. * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  18321. */
  18322. -#include <linux/delay.h>
  18323. -#include <linux/gfp.h>
  18324. -#include <linux/oom.h>
  18325. -#include <linux/smpboot.h>
  18326. -#include "../time/tick-internal.h"
  18327. -
  18328. #ifdef CONFIG_RCU_BOOST
  18329. #include "../locking/rtmutex_common.h"
  18330. +#endif /* #ifdef CONFIG_RCU_BOOST */
  18331. +
  18332. /*
  18333. * Control variables for per-CPU and per-rcu_node kthreads. These
  18334. * handle all flavors of RCU.
  18335. */
  18336. -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
  18337. DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  18338. DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  18339. DEFINE_PER_CPU(char, rcu_cpu_has_work);
  18340. -#endif /* #ifdef CONFIG_RCU_BOOST */
  18341. -
  18342. #ifdef CONFIG_RCU_NOCB_CPU
  18343. static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
  18344. static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
  18345. @@ -291,7 +284,7 @@ void rcu_read_unlock_special(struct task_struct *t)
  18346. }
  18347. /* Hardware IRQ handlers cannot block, complain if they get here. */
  18348. - if (in_irq() || in_serving_softirq()) {
  18349. + if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) {
  18350. lockdep_rcu_suspicious(__FILE__, __LINE__,
  18351. "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
  18352. pr_alert("->rcu_read_unlock_special: %#x (b: %d, nq: %d)\n",
  18353. @@ -496,15 +489,6 @@ static void rcu_preempt_check_callbacks(void)
  18354. t->rcu_read_unlock_special.b.need_qs = true;
  18355. }
  18356. -#ifdef CONFIG_RCU_BOOST
  18357. -
  18358. -static void rcu_preempt_do_callbacks(void)
  18359. -{
  18360. - rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
  18361. -}
  18362. -
  18363. -#endif /* #ifdef CONFIG_RCU_BOOST */
  18364. -
  18365. /*
  18366. * Queue a preemptible-RCU callback for invocation after a grace period.
  18367. */
  18368. @@ -939,6 +923,19 @@ void exit_rcu(void)
  18369. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  18370. +/*
  18371. + * If boosting, set rcuc kthreads to realtime priority.
  18372. + */
  18373. +static void rcu_cpu_kthread_setup(unsigned int cpu)
  18374. +{
  18375. +#ifdef CONFIG_RCU_BOOST
  18376. + struct sched_param sp;
  18377. +
  18378. + sp.sched_priority = kthread_prio;
  18379. + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  18380. +#endif /* #ifdef CONFIG_RCU_BOOST */
  18381. +}
  18382. +
  18383. #ifdef CONFIG_RCU_BOOST
  18384. #include "../locking/rtmutex_common.h"
  18385. @@ -970,16 +967,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp)
  18386. #endif /* #else #ifdef CONFIG_RCU_TRACE */
  18387. -static void rcu_wake_cond(struct task_struct *t, int status)
  18388. -{
  18389. - /*
  18390. - * If the thread is yielding, only wake it when this
  18391. - * is invoked from idle
  18392. - */
  18393. - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
  18394. - wake_up_process(t);
  18395. -}
  18396. -
  18397. /*
  18398. * Carry out RCU priority boosting on the task indicated by ->exp_tasks
  18399. * or ->boost_tasks, advancing the pointer to the next task in the
  18400. @@ -1125,23 +1112,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
  18401. }
  18402. /*
  18403. - * Wake up the per-CPU kthread to invoke RCU callbacks.
  18404. - */
  18405. -static void invoke_rcu_callbacks_kthread(void)
  18406. -{
  18407. - unsigned long flags;
  18408. -
  18409. - local_irq_save(flags);
  18410. - __this_cpu_write(rcu_cpu_has_work, 1);
  18411. - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
  18412. - current != __this_cpu_read(rcu_cpu_kthread_task)) {
  18413. - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
  18414. - __this_cpu_read(rcu_cpu_kthread_status));
  18415. - }
  18416. - local_irq_restore(flags);
  18417. -}
  18418. -
  18419. -/*
  18420. * Is the current CPU running the RCU-callbacks kthread?
  18421. * Caller must have preemption disabled.
  18422. */
  18423. @@ -1196,67 +1166,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
  18424. return 0;
  18425. }
  18426. -static void rcu_kthread_do_work(void)
  18427. -{
  18428. - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
  18429. - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
  18430. - rcu_preempt_do_callbacks();
  18431. -}
  18432. -
  18433. -static void rcu_cpu_kthread_setup(unsigned int cpu)
  18434. -{
  18435. - struct sched_param sp;
  18436. -
  18437. - sp.sched_priority = kthread_prio;
  18438. - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  18439. -}
  18440. -
  18441. -static void rcu_cpu_kthread_park(unsigned int cpu)
  18442. -{
  18443. - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  18444. -}
  18445. -
  18446. -static int rcu_cpu_kthread_should_run(unsigned int cpu)
  18447. -{
  18448. - return __this_cpu_read(rcu_cpu_has_work);
  18449. -}
  18450. -
  18451. -/*
  18452. - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  18453. - * RCU softirq used in flavors and configurations of RCU that do not
  18454. - * support RCU priority boosting.
  18455. - */
  18456. -static void rcu_cpu_kthread(unsigned int cpu)
  18457. -{
  18458. - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
  18459. - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
  18460. - int spincnt;
  18461. -
  18462. - for (spincnt = 0; spincnt < 10; spincnt++) {
  18463. - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
  18464. - local_bh_disable();
  18465. - *statusp = RCU_KTHREAD_RUNNING;
  18466. - this_cpu_inc(rcu_cpu_kthread_loops);
  18467. - local_irq_disable();
  18468. - work = *workp;
  18469. - *workp = 0;
  18470. - local_irq_enable();
  18471. - if (work)
  18472. - rcu_kthread_do_work();
  18473. - local_bh_enable();
  18474. - if (*workp == 0) {
  18475. - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
  18476. - *statusp = RCU_KTHREAD_WAITING;
  18477. - return;
  18478. - }
  18479. - }
  18480. - *statusp = RCU_KTHREAD_YIELDING;
  18481. - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
  18482. - schedule_timeout_interruptible(2);
  18483. - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
  18484. - *statusp = RCU_KTHREAD_WAITING;
  18485. -}
  18486. -
  18487. /*
  18488. * Set the per-rcu_node kthread's affinity to cover all CPUs that are
  18489. * served by the rcu_node in question. The CPU hotplug lock is still
  18490. @@ -1286,26 +1195,12 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
  18491. free_cpumask_var(cm);
  18492. }
  18493. -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  18494. - .store = &rcu_cpu_kthread_task,
  18495. - .thread_should_run = rcu_cpu_kthread_should_run,
  18496. - .thread_fn = rcu_cpu_kthread,
  18497. - .thread_comm = "rcuc/%u",
  18498. - .setup = rcu_cpu_kthread_setup,
  18499. - .park = rcu_cpu_kthread_park,
  18500. -};
  18501. -
  18502. /*
  18503. * Spawn boost kthreads -- called as soon as the scheduler is running.
  18504. */
  18505. static void __init rcu_spawn_boost_kthreads(void)
  18506. {
  18507. struct rcu_node *rnp;
  18508. - int cpu;
  18509. -
  18510. - for_each_possible_cpu(cpu)
  18511. - per_cpu(rcu_cpu_has_work, cpu) = 0;
  18512. - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  18513. rcu_for_each_leaf_node(rcu_state_p, rnp)
  18514. (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
  18515. }
  18516. @@ -1328,11 +1223,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
  18517. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  18518. }
  18519. -static void invoke_rcu_callbacks_kthread(void)
  18520. -{
  18521. - WARN_ON_ONCE(1);
  18522. -}
  18523. -
  18524. static bool rcu_is_callbacks_kthread(void)
  18525. {
  18526. return false;
  18527. @@ -1356,7 +1246,7 @@ static void rcu_prepare_kthreads(int cpu)
  18528. #endif /* #else #ifdef CONFIG_RCU_BOOST */
  18529. -#if !defined(CONFIG_RCU_FAST_NO_HZ)
  18530. +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL)
  18531. /*
  18532. * Check to see if any future RCU-related work will need to be done
  18533. @@ -1374,7 +1264,9 @@ int rcu_needs_cpu(unsigned long *delta_jiffies)
  18534. return rcu_cpu_has_callbacks(NULL);
  18535. }
  18536. #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
  18537. +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */
  18538. +#if !defined(CONFIG_RCU_FAST_NO_HZ)
  18539. /*
  18540. * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
  18541. * after it.
  18542. @@ -1472,6 +1364,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
  18543. return cbs_ready;
  18544. }
  18545. +#ifndef CONFIG_PREEMPT_RT_FULL
  18546. +
  18547. /*
  18548. * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
  18549. * to invoke. If the CPU has callbacks, try to advance them. Tell the
  18550. @@ -1512,7 +1406,7 @@ int rcu_needs_cpu(unsigned long *dj)
  18551. return 0;
  18552. }
  18553. #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
  18554. -
  18555. +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */
  18556. /*
  18557. * Prepare a CPU for idle from an RCU perspective. The first major task
  18558. * is to sense whether nohz mode has been enabled or disabled via sysfs.
  18559. @@ -1859,7 +1753,7 @@ early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
  18560. */
  18561. static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
  18562. {
  18563. - wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
  18564. + swait_wake_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
  18565. }
  18566. /*
  18567. @@ -1877,8 +1771,8 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
  18568. static void rcu_init_one_nocb(struct rcu_node *rnp)
  18569. {
  18570. - init_waitqueue_head(&rnp->nocb_gp_wq[0]);
  18571. - init_waitqueue_head(&rnp->nocb_gp_wq[1]);
  18572. + init_swait_head(&rnp->nocb_gp_wq[0]);
  18573. + init_swait_head(&rnp->nocb_gp_wq[1]);
  18574. }
  18575. #ifndef CONFIG_RCU_NOCB_CPU_ALL
  18576. @@ -1903,7 +1797,7 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
  18577. if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) {
  18578. /* Prior smp_mb__after_atomic() orders against prior enqueue. */
  18579. ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false;
  18580. - wake_up(&rdp_leader->nocb_wq);
  18581. + swait_wake(&rdp_leader->nocb_wq);
  18582. }
  18583. }
  18584. @@ -2116,7 +2010,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
  18585. */
  18586. trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
  18587. for (;;) {
  18588. - wait_event_interruptible(
  18589. + swait_event_interruptible(
  18590. rnp->nocb_gp_wq[c & 0x1],
  18591. (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
  18592. if (likely(d))
  18593. @@ -2144,7 +2038,7 @@ wait_again:
  18594. /* Wait for callbacks to appear. */
  18595. if (!rcu_nocb_poll) {
  18596. trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
  18597. - wait_event_interruptible(my_rdp->nocb_wq,
  18598. + swait_event_interruptible(my_rdp->nocb_wq,
  18599. !ACCESS_ONCE(my_rdp->nocb_leader_sleep));
  18600. /* Memory barrier handled by smp_mb() calls below and repoll. */
  18601. } else if (firsttime) {
  18602. @@ -2219,7 +2113,7 @@ wait_again:
  18603. * List was empty, wake up the follower.
  18604. * Memory barriers supplied by atomic_long_add().
  18605. */
  18606. - wake_up(&rdp->nocb_wq);
  18607. + swait_wake(&rdp->nocb_wq);
  18608. }
  18609. }
  18610. @@ -2240,7 +2134,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
  18611. if (!rcu_nocb_poll) {
  18612. trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
  18613. "FollowerSleep");
  18614. - wait_event_interruptible(rdp->nocb_wq,
  18615. + swait_event_interruptible(rdp->nocb_wq,
  18616. ACCESS_ONCE(rdp->nocb_follower_head));
  18617. } else if (firsttime) {
  18618. /* Don't drown trace log with "Poll"! */
  18619. @@ -2399,7 +2293,7 @@ void __init rcu_init_nohz(void)
  18620. static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
  18621. {
  18622. rdp->nocb_tail = &rdp->nocb_head;
  18623. - init_waitqueue_head(&rdp->nocb_wq);
  18624. + init_swait_head(&rdp->nocb_wq);
  18625. rdp->nocb_follower_tail = &rdp->nocb_follower_head;
  18626. }
  18627. diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
  18628. index 1f133350da01..1718c4fe9bce 100644
  18629. --- a/kernel/rcu/update.c
  18630. +++ b/kernel/rcu/update.c
  18631. @@ -227,6 +227,7 @@ int rcu_read_lock_held(void)
  18632. }
  18633. EXPORT_SYMBOL_GPL(rcu_read_lock_held);
  18634. +#ifndef CONFIG_PREEMPT_RT_FULL
  18635. /**
  18636. * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
  18637. *
  18638. @@ -253,6 +254,7 @@ int rcu_read_lock_bh_held(void)
  18639. return in_softirq() || irqs_disabled();
  18640. }
  18641. EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
  18642. +#endif
  18643. #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
  18644. diff --git a/kernel/relay.c b/kernel/relay.c
  18645. index e9dbaeb8fd65..509f68fb91e8 100644
  18646. --- a/kernel/relay.c
  18647. +++ b/kernel/relay.c
  18648. @@ -339,6 +339,10 @@ static void wakeup_readers(unsigned long data)
  18649. {
  18650. struct rchan_buf *buf = (struct rchan_buf *)data;
  18651. wake_up_interruptible(&buf->read_wait);
  18652. + /*
  18653. + * Stupid polling for now:
  18654. + */
  18655. + mod_timer(&buf->timer, jiffies + 1);
  18656. }
  18657. /**
  18658. @@ -356,6 +360,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init)
  18659. init_waitqueue_head(&buf->read_wait);
  18660. kref_init(&buf->kref);
  18661. setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
  18662. + mod_timer(&buf->timer, jiffies + 1);
  18663. } else
  18664. del_timer_sync(&buf->timer);
  18665. @@ -739,15 +744,6 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
  18666. else
  18667. buf->early_bytes += buf->chan->subbuf_size -
  18668. buf->padding[old_subbuf];
  18669. - smp_mb();
  18670. - if (waitqueue_active(&buf->read_wait))
  18671. - /*
  18672. - * Calling wake_up_interruptible() from here
  18673. - * will deadlock if we happen to be logging
  18674. - * from the scheduler (trying to re-grab
  18675. - * rq->lock), so defer it.
  18676. - */
  18677. - mod_timer(&buf->timer, jiffies + 1);
  18678. }
  18679. old = buf->data;
  18680. diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
  18681. index 46be87024875..3944d32a044d 100644
  18682. --- a/kernel/sched/Makefile
  18683. +++ b/kernel/sched/Makefile
  18684. @@ -13,7 +13,7 @@ endif
  18685. obj-y += core.o proc.o clock.o cputime.o
  18686. obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
  18687. -obj-y += wait.o completion.o idle.o
  18688. +obj-y += wait.o wait-simple.o work-simple.o completion.o idle.o
  18689. obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
  18690. obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
  18691. obj-$(CONFIG_SCHEDSTATS) += stats.o
  18692. diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
  18693. index 8d0f35debf35..45ebcffd9feb 100644
  18694. --- a/kernel/sched/completion.c
  18695. +++ b/kernel/sched/completion.c
  18696. @@ -30,10 +30,10 @@ void complete(struct completion *x)
  18697. {
  18698. unsigned long flags;
  18699. - spin_lock_irqsave(&x->wait.lock, flags);
  18700. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  18701. x->done++;
  18702. - __wake_up_locked(&x->wait, TASK_NORMAL, 1);
  18703. - spin_unlock_irqrestore(&x->wait.lock, flags);
  18704. + __swait_wake_locked(&x->wait, TASK_NORMAL, 1);
  18705. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  18706. }
  18707. EXPORT_SYMBOL(complete);
  18708. @@ -50,10 +50,10 @@ void complete_all(struct completion *x)
  18709. {
  18710. unsigned long flags;
  18711. - spin_lock_irqsave(&x->wait.lock, flags);
  18712. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  18713. x->done += UINT_MAX/2;
  18714. - __wake_up_locked(&x->wait, TASK_NORMAL, 0);
  18715. - spin_unlock_irqrestore(&x->wait.lock, flags);
  18716. + __swait_wake_locked(&x->wait, TASK_NORMAL, 0);
  18717. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  18718. }
  18719. EXPORT_SYMBOL(complete_all);
  18720. @@ -62,20 +62,20 @@ do_wait_for_common(struct completion *x,
  18721. long (*action)(long), long timeout, int state)
  18722. {
  18723. if (!x->done) {
  18724. - DECLARE_WAITQUEUE(wait, current);
  18725. + DEFINE_SWAITER(wait);
  18726. - __add_wait_queue_tail_exclusive(&x->wait, &wait);
  18727. + swait_prepare_locked(&x->wait, &wait);
  18728. do {
  18729. if (signal_pending_state(state, current)) {
  18730. timeout = -ERESTARTSYS;
  18731. break;
  18732. }
  18733. __set_current_state(state);
  18734. - spin_unlock_irq(&x->wait.lock);
  18735. + raw_spin_unlock_irq(&x->wait.lock);
  18736. timeout = action(timeout);
  18737. - spin_lock_irq(&x->wait.lock);
  18738. + raw_spin_lock_irq(&x->wait.lock);
  18739. } while (!x->done && timeout);
  18740. - __remove_wait_queue(&x->wait, &wait);
  18741. + swait_finish_locked(&x->wait, &wait);
  18742. if (!x->done)
  18743. return timeout;
  18744. }
  18745. @@ -89,9 +89,9 @@ __wait_for_common(struct completion *x,
  18746. {
  18747. might_sleep();
  18748. - spin_lock_irq(&x->wait.lock);
  18749. + raw_spin_lock_irq(&x->wait.lock);
  18750. timeout = do_wait_for_common(x, action, timeout, state);
  18751. - spin_unlock_irq(&x->wait.lock);
  18752. + raw_spin_unlock_irq(&x->wait.lock);
  18753. return timeout;
  18754. }
  18755. @@ -277,12 +277,12 @@ bool try_wait_for_completion(struct completion *x)
  18756. if (!READ_ONCE(x->done))
  18757. return 0;
  18758. - spin_lock_irqsave(&x->wait.lock, flags);
  18759. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  18760. if (!x->done)
  18761. ret = 0;
  18762. else
  18763. x->done--;
  18764. - spin_unlock_irqrestore(&x->wait.lock, flags);
  18765. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  18766. return ret;
  18767. }
  18768. EXPORT_SYMBOL(try_wait_for_completion);
  18769. @@ -311,7 +311,7 @@ bool completion_done(struct completion *x)
  18770. * after it's acquired the lock.
  18771. */
  18772. smp_rmb();
  18773. - spin_unlock_wait(&x->wait.lock);
  18774. + raw_spin_unlock_wait(&x->wait.lock);
  18775. return true;
  18776. }
  18777. EXPORT_SYMBOL(completion_done);
  18778. diff --git a/kernel/sched/core.c b/kernel/sched/core.c
  18779. index 6cb5f00696f5..0d3a40b24304 100644
  18780. --- a/kernel/sched/core.c
  18781. +++ b/kernel/sched/core.c
  18782. @@ -282,7 +282,11 @@ late_initcall(sched_init_debug);
  18783. * Number of tasks to iterate in a single balance run.
  18784. * Limited because this is done with IRQs disabled.
  18785. */
  18786. +#ifndef CONFIG_PREEMPT_RT_FULL
  18787. const_debug unsigned int sysctl_sched_nr_migrate = 32;
  18788. +#else
  18789. +const_debug unsigned int sysctl_sched_nr_migrate = 8;
  18790. +#endif
  18791. /*
  18792. * period over which we average the RT time consumption, measured
  18793. @@ -461,6 +465,7 @@ static void init_rq_hrtick(struct rq *rq)
  18794. hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  18795. rq->hrtick_timer.function = hrtick;
  18796. + rq->hrtick_timer.irqsafe = 1;
  18797. }
  18798. #else /* CONFIG_SCHED_HRTICK */
  18799. static inline void hrtick_clear(struct rq *rq)
  18800. @@ -541,6 +546,52 @@ static bool set_nr_if_polling(struct task_struct *p)
  18801. #endif
  18802. #endif
  18803. +void wake_q_add(struct wake_q_head *head, struct task_struct *task)
  18804. +{
  18805. + struct wake_q_node *node = &task->wake_q;
  18806. +
  18807. + /*
  18808. + * Atomically grab the task, if ->wake_q is !nil already it means
  18809. + * its already queued (either by us or someone else) and will get the
  18810. + * wakeup due to that.
  18811. + *
  18812. + * This cmpxchg() implies a full barrier, which pairs with the write
  18813. + * barrier implied by the wakeup in wake_up_list().
  18814. + */
  18815. + if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
  18816. + return;
  18817. +
  18818. + get_task_struct(task);
  18819. +
  18820. + /*
  18821. + * The head is context local, there can be no concurrency.
  18822. + */
  18823. + *head->lastp = node;
  18824. + head->lastp = &node->next;
  18825. +}
  18826. +
  18827. +void wake_up_q(struct wake_q_head *head)
  18828. +{
  18829. + struct wake_q_node *node = head->first;
  18830. +
  18831. + while (node != WAKE_Q_TAIL) {
  18832. + struct task_struct *task;
  18833. +
  18834. + task = container_of(node, struct task_struct, wake_q);
  18835. + BUG_ON(!task);
  18836. + /* task can safely be re-inserted now */
  18837. + node = node->next;
  18838. + task->wake_q.next = NULL;
  18839. +
  18840. + /*
  18841. + * wake_up_process() implies a wmb() to pair with the queueing
  18842. + * in wake_q_add() so as not to miss wakeups.
  18843. + */
  18844. + wake_up_process(task);
  18845. + put_task_struct(task);
  18846. + }
  18847. +}
  18848. +
  18849. /*
  18850. * resched_curr - mark rq's current task 'to be rescheduled now'.
  18851. *
  18852. @@ -572,6 +623,38 @@ void resched_curr(struct rq *rq)
  18853. trace_sched_wake_idle_without_ipi(cpu);
  18854. }
  18855. +#ifdef CONFIG_PREEMPT_LAZY
  18856. +void resched_curr_lazy(struct rq *rq)
  18857. +{
  18858. + struct task_struct *curr = rq->curr;
  18859. + int cpu;
  18860. +
  18861. + if (!sched_feat(PREEMPT_LAZY)) {
  18862. + resched_curr(rq);
  18863. + return;
  18864. + }
  18865. +
  18866. + lockdep_assert_held(&rq->lock);
  18867. +
  18868. + if (test_tsk_need_resched(curr))
  18869. + return;
  18870. +
  18871. + if (test_tsk_need_resched_lazy(curr))
  18872. + return;
  18873. +
  18874. + set_tsk_need_resched_lazy(curr);
  18875. +
  18876. + cpu = cpu_of(rq);
  18877. + if (cpu == smp_processor_id())
  18878. + return;
  18879. +
  18880. + /* NEED_RESCHED_LAZY must be visible before we test polling */
  18881. + smp_mb();
  18882. + if (!tsk_is_polling(curr))
  18883. + smp_send_reschedule(cpu);
  18884. +}
  18885. +#endif
  18886. +
  18887. void resched_cpu(int cpu)
  18888. {
  18889. struct rq *rq = cpu_rq(cpu);
  18890. @@ -595,12 +678,14 @@ void resched_cpu(int cpu)
  18891. */
  18892. int get_nohz_timer_target(int pinned)
  18893. {
  18894. - int cpu = smp_processor_id();
  18895. + int cpu;
  18896. int i;
  18897. struct sched_domain *sd;
  18898. + preempt_disable_rt();
  18899. + cpu = smp_processor_id();
  18900. if (pinned || !get_sysctl_timer_migration() || !idle_cpu(cpu))
  18901. - return cpu;
  18902. + goto preempt_en_rt;
  18903. rcu_read_lock();
  18904. for_each_domain(cpu, sd) {
  18905. @@ -613,6 +698,8 @@ int get_nohz_timer_target(int pinned)
  18906. }
  18907. unlock:
  18908. rcu_read_unlock();
  18909. +preempt_en_rt:
  18910. + preempt_enable_rt();
  18911. return cpu;
  18912. }
  18913. /*
  18914. @@ -1164,6 +1251,18 @@ struct migration_arg {
  18915. static int migration_cpu_stop(void *data);
  18916. +static bool check_task_state(struct task_struct *p, long match_state)
  18917. +{
  18918. + bool match = false;
  18919. +
  18920. + raw_spin_lock_irq(&p->pi_lock);
  18921. + if (p->state == match_state || p->saved_state == match_state)
  18922. + match = true;
  18923. + raw_spin_unlock_irq(&p->pi_lock);
  18924. +
  18925. + return match;
  18926. +}
  18927. +
  18928. /*
  18929. * wait_task_inactive - wait for a thread to unschedule.
  18930. *
  18931. @@ -1208,7 +1307,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
  18932. * is actually now running somewhere else!
  18933. */
  18934. while (task_running(rq, p)) {
  18935. - if (match_state && unlikely(p->state != match_state))
  18936. + if (match_state && !check_task_state(p, match_state))
  18937. return 0;
  18938. cpu_relax();
  18939. }
  18940. @@ -1223,7 +1322,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
  18941. running = task_running(rq, p);
  18942. queued = task_on_rq_queued(p);
  18943. ncsw = 0;
  18944. - if (!match_state || p->state == match_state)
  18945. + if (!match_state || p->state == match_state ||
  18946. + p->saved_state == match_state)
  18947. ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
  18948. task_rq_unlock(rq, p, &flags);
  18949. @@ -1449,10 +1549,6 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
  18950. {
  18951. activate_task(rq, p, en_flags);
  18952. p->on_rq = TASK_ON_RQ_QUEUED;
  18953. -
  18954. - /* if a worker is waking up, notify workqueue */
  18955. - if (p->flags & PF_WQ_WORKER)
  18956. - wq_worker_waking_up(p, cpu_of(rq));
  18957. }
  18958. /*
  18959. @@ -1462,9 +1558,9 @@ static void
  18960. ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
  18961. {
  18962. check_preempt_curr(rq, p, wake_flags);
  18963. - trace_sched_wakeup(p, true);
  18964. -
  18965. p->state = TASK_RUNNING;
  18966. + trace_sched_wakeup(p);
  18967. +
  18968. #ifdef CONFIG_SMP
  18969. if (p->sched_class->task_woken)
  18970. p->sched_class->task_woken(rq, p);
  18971. @@ -1666,8 +1762,29 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
  18972. */
  18973. smp_mb__before_spinlock();
  18974. raw_spin_lock_irqsave(&p->pi_lock, flags);
  18975. - if (!(p->state & state))
  18976. + if (!(p->state & state)) {
  18977. + /*
  18978. + * The task might be running due to a spinlock sleeper
  18979. + * wakeup. Check the saved state and set it to running
  18980. + * if the wakeup condition is true.
  18981. + */
  18982. + if (!(wake_flags & WF_LOCK_SLEEPER)) {
  18983. + if (p->saved_state & state) {
  18984. + p->saved_state = TASK_RUNNING;
  18985. + success = 1;
  18986. + }
  18987. + }
  18988. goto out;
  18989. + }
  18990. +
  18991. + /*
  18992. + * If this is a regular wakeup, then we can unconditionally
  18993. + * clear the saved state of a "lock sleeper".
  18994. + */
  18995. + if (!(wake_flags & WF_LOCK_SLEEPER))
  18996. + p->saved_state = TASK_RUNNING;
  18997. +
  18998. + trace_sched_waking(p);
  18999. success = 1; /* we're going to change ->state */
  19000. cpu = task_cpu(p);
  19001. @@ -1732,42 +1849,6 @@ out:
  19002. }
  19003. /**
  19004. - * try_to_wake_up_local - try to wake up a local task with rq lock held
  19005. - * @p: the thread to be awakened
  19006. - *
  19007. - * Put @p on the run-queue if it's not already there. The caller must
  19008. - * ensure that this_rq() is locked, @p is bound to this_rq() and not
  19009. - * the current task.
  19010. - */
  19011. -static void try_to_wake_up_local(struct task_struct *p)
  19012. -{
  19013. - struct rq *rq = task_rq(p);
  19014. -
  19015. - if (WARN_ON_ONCE(rq != this_rq()) ||
  19016. - WARN_ON_ONCE(p == current))
  19017. - return;
  19018. -
  19019. - lockdep_assert_held(&rq->lock);
  19020. -
  19021. - if (!raw_spin_trylock(&p->pi_lock)) {
  19022. - raw_spin_unlock(&rq->lock);
  19023. - raw_spin_lock(&p->pi_lock);
  19024. - raw_spin_lock(&rq->lock);
  19025. - }
  19026. -
  19027. - if (!(p->state & TASK_NORMAL))
  19028. - goto out;
  19029. -
  19030. - if (!task_on_rq_queued(p))
  19031. - ttwu_activate(rq, p, ENQUEUE_WAKEUP);
  19032. -
  19033. - ttwu_do_wakeup(rq, p, 0);
  19034. - ttwu_stat(p, smp_processor_id(), 0);
  19035. -out:
  19036. - raw_spin_unlock(&p->pi_lock);
  19037. -}
  19038. -
  19039. -/**
  19040. * wake_up_process - Wake up a specific process
  19041. * @p: The process to be woken up.
  19042. *
  19043. @@ -1781,11 +1862,23 @@ out:
  19044. */
  19045. int wake_up_process(struct task_struct *p)
  19046. {
  19047. - WARN_ON(task_is_stopped_or_traced(p));
  19048. + WARN_ON(__task_is_stopped_or_traced(p));
  19049. return try_to_wake_up(p, TASK_NORMAL, 0);
  19050. }
  19051. EXPORT_SYMBOL(wake_up_process);
  19052. +/**
  19053. + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
  19054. + * @p: The process to be woken up.
  19055. + *
  19056. + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
  19057. + * the nature of the wakeup.
  19058. + */
  19059. +int wake_up_lock_sleeper(struct task_struct *p)
  19060. +{
  19061. + return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
  19062. +}
  19063. +
  19064. int wake_up_state(struct task_struct *p, unsigned int state)
  19065. {
  19066. return try_to_wake_up(p, state, 0);
  19067. @@ -1981,6 +2074,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
  19068. p->on_cpu = 0;
  19069. #endif
  19070. init_task_preempt_count(p);
  19071. +#ifdef CONFIG_HAVE_PREEMPT_LAZY
  19072. + task_thread_info(p)->preempt_lazy_count = 0;
  19073. +#endif
  19074. #ifdef CONFIG_SMP
  19075. plist_node_init(&p->pushable_tasks, MAX_PRIO);
  19076. RB_CLEAR_NODE(&p->pushable_dl_tasks);
  19077. @@ -2116,7 +2212,7 @@ void wake_up_new_task(struct task_struct *p)
  19078. rq = __task_rq_lock(p);
  19079. activate_task(rq, p, 0);
  19080. p->on_rq = TASK_ON_RQ_QUEUED;
  19081. - trace_sched_wakeup_new(p, true);
  19082. + trace_sched_wakeup_new(p);
  19083. check_preempt_curr(rq, p, WF_FORK);
  19084. #ifdef CONFIG_SMP
  19085. if (p->sched_class->task_woken)
  19086. @@ -2253,8 +2349,12 @@ static struct rq *finish_task_switch(struct task_struct *prev)
  19087. finish_arch_post_lock_switch();
  19088. fire_sched_in_preempt_notifiers(current);
  19089. + /*
  19090. + * We use mmdrop_delayed() here so we don't have to do the
  19091. + * full __mmdrop() when we are the last user.
  19092. + */
  19093. if (mm)
  19094. - mmdrop(mm);
  19095. + mmdrop_delayed(mm);
  19096. if (unlikely(prev_state == TASK_DEAD)) {
  19097. if (prev->sched_class->task_dead)
  19098. prev->sched_class->task_dead(prev);
  19099. @@ -2565,16 +2665,6 @@ u64 scheduler_tick_max_deferment(void)
  19100. }
  19101. #endif
  19102. -notrace unsigned long get_parent_ip(unsigned long addr)
  19103. -{
  19104. - if (in_lock_functions(addr)) {
  19105. - addr = CALLER_ADDR2;
  19106. - if (in_lock_functions(addr))
  19107. - addr = CALLER_ADDR3;
  19108. - }
  19109. - return addr;
  19110. -}
  19111. -
  19112. #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
  19113. defined(CONFIG_PREEMPT_TRACER))
  19114. @@ -2596,7 +2686,7 @@ void preempt_count_add(int val)
  19115. PREEMPT_MASK - 10);
  19116. #endif
  19117. if (preempt_count() == val) {
  19118. - unsigned long ip = get_parent_ip(CALLER_ADDR1);
  19119. + unsigned long ip = get_lock_parent_ip();
  19120. #ifdef CONFIG_DEBUG_PREEMPT
  19121. current->preempt_disable_ip = ip;
  19122. #endif
  19123. @@ -2623,7 +2713,7 @@ void preempt_count_sub(int val)
  19124. #endif
  19125. if (preempt_count() == val)
  19126. - trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
  19127. + trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
  19128. __preempt_count_sub(val);
  19129. }
  19130. EXPORT_SYMBOL(preempt_count_sub);
  19131. @@ -2679,6 +2769,133 @@ static inline void schedule_debug(struct task_struct *prev)
  19132. schedstat_inc(this_rq(), sched_count);
  19133. }
  19134. +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
  19135. +#define MIGRATE_DISABLE_SET_AFFIN (1<<30) /* Can't make a negative */
  19136. +#define migrate_disabled_updated(p) ((p)->migrate_disable & MIGRATE_DISABLE_SET_AFFIN)
  19137. +#define migrate_disable_count(p) ((p)->migrate_disable & ~MIGRATE_DISABLE_SET_AFFIN)
  19138. +
  19139. +static inline void update_migrate_disable(struct task_struct *p)
  19140. +{
  19141. + const struct cpumask *mask;
  19142. +
  19143. + if (likely(!p->migrate_disable))
  19144. + return;
  19145. +
  19146. + /* Did we already update affinity? */
  19147. + if (unlikely(migrate_disabled_updated(p)))
  19148. + return;
  19149. +
  19150. + /*
  19151. + * Since this is always current we can get away with only locking
  19152. + * rq->lock, the ->cpus_allowed value can normally only be changed
  19153. + * while holding both p->pi_lock and rq->lock, but seeing that this
  19154. + * is current, we cannot actually be waking up, so all code that
  19155. + * relies on serialization against p->pi_lock is out of scope.
  19156. + *
  19157. + * Having rq->lock serializes us against things like
  19158. + * set_cpus_allowed_ptr() that can still happen concurrently.
  19159. + */
  19160. + mask = tsk_cpus_allowed(p);
  19161. +
  19162. + if (p->sched_class->set_cpus_allowed)
  19163. + p->sched_class->set_cpus_allowed(p, mask);
  19164. + /* mask==cpumask_of(task_cpu(p)) which has a cpumask_weight==1 */
  19165. + p->nr_cpus_allowed = 1;
  19166. +
  19167. + /* Let migrate_enable know to fix things back up */
  19168. + p->migrate_disable |= MIGRATE_DISABLE_SET_AFFIN;
  19169. +}
  19170. +
  19171. +void migrate_disable(void)
  19172. +{
  19173. + struct task_struct *p = current;
  19174. +
  19175. + if (in_atomic() || irqs_disabled()) {
  19176. +#ifdef CONFIG_SCHED_DEBUG
  19177. + p->migrate_disable_atomic++;
  19178. +#endif
  19179. + return;
  19180. + }
  19181. +
  19182. +#ifdef CONFIG_SCHED_DEBUG
  19183. + if (unlikely(p->migrate_disable_atomic)) {
  19184. + tracing_off();
  19185. + WARN_ON_ONCE(1);
  19186. + }
  19187. +#endif
  19188. +
  19189. + if (p->migrate_disable) {
  19190. + p->migrate_disable++;
  19191. + return;
  19192. + }
  19193. +
  19194. + preempt_disable();
  19195. + preempt_lazy_disable();
  19196. + pin_current_cpu();
  19197. + p->migrate_disable = 1;
  19198. + preempt_enable();
  19199. +}
  19200. +EXPORT_SYMBOL(migrate_disable);
  19201. +
  19202. +void migrate_enable(void)
  19203. +{
  19204. + struct task_struct *p = current;
  19205. + const struct cpumask *mask;
  19206. + unsigned long flags;
  19207. + struct rq *rq;
  19208. +
  19209. + if (in_atomic() || irqs_disabled()) {
  19210. +#ifdef CONFIG_SCHED_DEBUG
  19211. + p->migrate_disable_atomic--;
  19212. +#endif
  19213. + return;
  19214. + }
  19215. +
  19216. +#ifdef CONFIG_SCHED_DEBUG
  19217. + if (unlikely(p->migrate_disable_atomic)) {
  19218. + tracing_off();
  19219. + WARN_ON_ONCE(1);
  19220. + }
  19221. +#endif
  19222. + WARN_ON_ONCE(p->migrate_disable <= 0);
  19223. +
  19224. + if (migrate_disable_count(p) > 1) {
  19225. + p->migrate_disable--;
  19226. + return;
  19227. + }
  19228. +
  19229. + preempt_disable();
  19230. + if (unlikely(migrate_disabled_updated(p))) {
  19231. + /*
  19232. + * Undo whatever update_migrate_disable() did, also see there
  19233. + * about locking.
  19234. + */
  19235. + rq = this_rq();
  19236. + raw_spin_lock_irqsave(&rq->lock, flags);
  19237. +
  19238. + /*
  19239. + * Clearing migrate_disable causes tsk_cpus_allowed to
  19240. + * show the tasks original cpu affinity.
  19241. + */
  19242. + p->migrate_disable = 0;
  19243. + mask = tsk_cpus_allowed(p);
  19244. + if (p->sched_class->set_cpus_allowed)
  19245. + p->sched_class->set_cpus_allowed(p, mask);
  19246. + p->nr_cpus_allowed = cpumask_weight(mask);
  19247. + raw_spin_unlock_irqrestore(&rq->lock, flags);
  19248. + } else
  19249. + p->migrate_disable = 0;
  19250. +
  19251. + unpin_current_cpu();
  19252. + preempt_enable();
  19253. + preempt_lazy_enable();
  19254. +}
  19255. +EXPORT_SYMBOL(migrate_enable);
  19256. +#else
  19257. +static inline void update_migrate_disable(struct task_struct *p) { }
  19258. +#define migrate_disabled_updated(p) 0
  19259. +#endif
  19260. +
  19261. /*
  19262. * Pick up the highest-prio task:
  19263. */
  19264. @@ -2785,6 +3002,8 @@ static void __sched __schedule(void)
  19265. smp_mb__before_spinlock();
  19266. raw_spin_lock_irq(&rq->lock);
  19267. + update_migrate_disable(prev);
  19268. +
  19269. rq->clock_skip_update <<= 1; /* promote REQ to ACT */
  19270. switch_count = &prev->nivcsw;
  19271. @@ -2794,19 +3013,6 @@ static void __sched __schedule(void)
  19272. } else {
  19273. deactivate_task(rq, prev, DEQUEUE_SLEEP);
  19274. prev->on_rq = 0;
  19275. -
  19276. - /*
  19277. - * If a worker went to sleep, notify and ask workqueue
  19278. - * whether it wants to wake up a task to maintain
  19279. - * concurrency.
  19280. - */
  19281. - if (prev->flags & PF_WQ_WORKER) {
  19282. - struct task_struct *to_wakeup;
  19283. -
  19284. - to_wakeup = wq_worker_sleeping(prev, cpu);
  19285. - if (to_wakeup)
  19286. - try_to_wake_up_local(to_wakeup);
  19287. - }
  19288. }
  19289. switch_count = &prev->nvcsw;
  19290. }
  19291. @@ -2816,6 +3022,7 @@ static void __sched __schedule(void)
  19292. next = pick_next_task(rq, prev);
  19293. clear_tsk_need_resched(prev);
  19294. + clear_tsk_need_resched_lazy(prev);
  19295. clear_preempt_need_resched();
  19296. rq->clock_skip_update = 0;
  19297. @@ -2836,8 +3043,19 @@ static void __sched __schedule(void)
  19298. static inline void sched_submit_work(struct task_struct *tsk)
  19299. {
  19300. - if (!tsk->state || tsk_is_pi_blocked(tsk))
  19301. + if (!tsk->state)
  19302. + return;
  19303. + /*
  19304. + * If a worker went to sleep, notify and ask workqueue whether
  19305. + * it wants to wake up a task to maintain concurrency.
  19306. + */
  19307. + if (tsk->flags & PF_WQ_WORKER)
  19308. + wq_worker_sleeping(tsk);
  19309. +
  19310. +
  19311. + if (tsk_is_pi_blocked(tsk))
  19312. return;
  19313. +
  19314. /*
  19315. * If we are going to sleep and we have plugged IO queued,
  19316. * make sure to submit it to avoid deadlocks.
  19317. @@ -2846,6 +3064,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
  19318. blk_schedule_flush_plug(tsk);
  19319. }
  19320. +static void sched_update_worker(struct task_struct *tsk)
  19321. +{
  19322. + if (tsk->flags & PF_WQ_WORKER)
  19323. + wq_worker_running(tsk);
  19324. +}
  19325. +
  19326. asmlinkage __visible void __sched schedule(void)
  19327. {
  19328. struct task_struct *tsk = current;
  19329. @@ -2854,6 +3078,7 @@ asmlinkage __visible void __sched schedule(void)
  19330. do {
  19331. __schedule();
  19332. } while (need_resched());
  19333. + sched_update_worker(tsk);
  19334. }
  19335. EXPORT_SYMBOL(schedule);
  19336. @@ -2903,6 +3128,30 @@ static void __sched notrace preempt_schedule_common(void)
  19337. } while (need_resched());
  19338. }
  19339. +#ifdef CONFIG_PREEMPT_LAZY
  19340. +/*
  19341. + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
  19342. + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
  19343. + * preempt_lazy_count counter >0.
  19344. + */
  19345. +static __always_inline int preemptible_lazy(void)
  19346. +{
  19347. + if (test_thread_flag(TIF_NEED_RESCHED))
  19348. + return 1;
  19349. + if (current_thread_info()->preempt_lazy_count)
  19350. + return 0;
  19351. + return 1;
  19352. +}
  19353. +
  19354. +#else
  19355. +
  19356. +static inline int preemptible_lazy(void)
  19357. +{
  19358. + return 1;
  19359. +}
  19360. +
  19361. +#endif
  19362. +
  19363. #ifdef CONFIG_PREEMPT
  19364. /*
  19365. * this is the entry point to schedule() from in-kernel preemption
  19366. @@ -2917,6 +3166,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
  19367. */
  19368. if (likely(!preemptible()))
  19369. return;
  19370. + if (!preemptible_lazy())
  19371. + return;
  19372. preempt_schedule_common();
  19373. }
  19374. @@ -2944,6 +3195,8 @@ asmlinkage __visible void __sched notrace preempt_schedule_context(void)
  19375. if (likely(!preemptible()))
  19376. return;
  19377. + if (!preemptible_lazy())
  19378. + return;
  19379. do {
  19380. __preempt_count_add(PREEMPT_ACTIVE);
  19381. @@ -2953,7 +3206,16 @@ asmlinkage __visible void __sched notrace preempt_schedule_context(void)
  19382. * an infinite recursion.
  19383. */
  19384. prev_ctx = exception_enter();
  19385. + /*
  19386. + * The add/subtract must not be traced by the function
  19387. + * tracer. But we still want to account for the
  19388. + * preempt off latency tracer. Since the _notrace versions
  19389. + * of add/subtract skip the accounting for latency tracer
  19390. + * we must force it manually.
  19391. + */
  19392. + start_critical_timings();
  19393. __schedule();
  19394. + stop_critical_timings();
  19395. exception_exit(prev_ctx);
  19396. __preempt_count_sub(PREEMPT_ACTIVE);
  19397. @@ -4290,6 +4552,7 @@ int __cond_resched_lock(spinlock_t *lock)
  19398. }
  19399. EXPORT_SYMBOL(__cond_resched_lock);
  19400. +#ifndef CONFIG_PREEMPT_RT_FULL
  19401. int __sched __cond_resched_softirq(void)
  19402. {
  19403. BUG_ON(!in_softirq());
  19404. @@ -4303,6 +4566,7 @@ int __sched __cond_resched_softirq(void)
  19405. return 0;
  19406. }
  19407. EXPORT_SYMBOL(__cond_resched_softirq);
  19408. +#endif
  19409. /**
  19410. * yield - yield the current processor to other threads.
  19411. @@ -4659,7 +4923,9 @@ void init_idle(struct task_struct *idle, int cpu)
  19412. /* Set the preempt count _outside_ the spinlocks! */
  19413. init_idle_preempt_count(idle, cpu);
  19414. -
  19415. +#ifdef CONFIG_HAVE_PREEMPT_LAZY
  19416. + task_thread_info(idle)->preempt_lazy_count = 0;
  19417. +#endif
  19418. /*
  19419. * The idle tasks have their own, simple scheduling class:
  19420. */
  19421. @@ -4779,11 +5045,91 @@ static struct rq *move_queued_task(struct task_struct *p, int new_cpu)
  19422. void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  19423. {
  19424. - if (p->sched_class->set_cpus_allowed)
  19425. - p->sched_class->set_cpus_allowed(p, new_mask);
  19426. + if (!migrate_disabled_updated(p)) {
  19427. + if (p->sched_class->set_cpus_allowed)
  19428. + p->sched_class->set_cpus_allowed(p, new_mask);
  19429. + p->nr_cpus_allowed = cpumask_weight(new_mask);
  19430. + }
  19431. cpumask_copy(&p->cpus_allowed, new_mask);
  19432. - p->nr_cpus_allowed = cpumask_weight(new_mask);
  19433. +}
  19434. +
  19435. +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
  19436. +static DEFINE_MUTEX(sched_down_mutex);
  19437. +static cpumask_t sched_down_cpumask;
  19438. +
  19439. +void tell_sched_cpu_down_begin(int cpu)
  19440. +{
  19441. + mutex_lock(&sched_down_mutex);
  19442. + cpumask_set_cpu(cpu, &sched_down_cpumask);
  19443. + mutex_unlock(&sched_down_mutex);
  19444. +}
  19445. +
  19446. +void tell_sched_cpu_down_done(int cpu)
  19447. +{
  19448. + mutex_lock(&sched_down_mutex);
  19449. + cpumask_clear_cpu(cpu, &sched_down_cpumask);
  19450. + mutex_unlock(&sched_down_mutex);
  19451. +}
  19452. +
  19453. +/**
  19454. + * migrate_me - try to move the current task off this cpu
  19455. + *
  19456. + * Used by the pin_current_cpu() code to try to get tasks
  19457. + * to move off the current CPU as it is going down.
  19458. + * It will only move the task if the task isn't pinned to
  19459. + * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
  19460. + * and the task has to be in a RUNNING state. Otherwise the
  19461. + * movement of the task will wake it up (change its state
  19462. + * to running) when the task did not expect it.
  19463. + *
  19464. + * Returns 1 if it succeeded in moving the current task
  19465. + * 0 otherwise.
  19466. + */
  19467. +int migrate_me(void)
  19468. +{
  19469. + struct task_struct *p = current;
  19470. + struct migration_arg arg;
  19471. + struct cpumask *cpumask;
  19472. + struct cpumask *mask;
  19473. + unsigned long flags;
  19474. + unsigned int dest_cpu;
  19475. + struct rq *rq;
  19476. +
  19477. + /*
  19478. + * We can not migrate tasks bounded to a CPU or tasks not
  19479. + * running. The movement of the task will wake it up.
  19480. + */
  19481. + if (p->flags & PF_NO_SETAFFINITY || p->state)
  19482. + return 0;
  19483. +
  19484. + mutex_lock(&sched_down_mutex);
  19485. + rq = task_rq_lock(p, &flags);
  19486. +
  19487. + cpumask = this_cpu_ptr(&sched_cpumasks);
  19488. + mask = &p->cpus_allowed;
  19489. +
  19490. + cpumask_andnot(cpumask, mask, &sched_down_cpumask);
  19491. +
  19492. + if (!cpumask_weight(cpumask)) {
  19493. + /* It's only on this CPU? */
  19494. + task_rq_unlock(rq, p, &flags);
  19495. + mutex_unlock(&sched_down_mutex);
  19496. + return 0;
  19497. + }
  19498. +
  19499. + dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
  19500. +
  19501. + arg.task = p;
  19502. + arg.dest_cpu = dest_cpu;
  19503. +
  19504. + task_rq_unlock(rq, p, &flags);
  19505. +
  19506. + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
  19507. + tlb_migrate_finish(p->mm);
  19508. + mutex_unlock(&sched_down_mutex);
  19509. +
  19510. + return 1;
  19511. }
  19512. /*
  19513. @@ -4829,7 +5175,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
  19514. do_set_cpus_allowed(p, new_mask);
  19515. /* Can the task run on the task's current CPU? If so, we're done */
  19516. - if (cpumask_test_cpu(task_cpu(p), new_mask))
  19517. + if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
  19518. goto out;
  19519. dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
  19520. @@ -4969,6 +5315,8 @@ static int migration_cpu_stop(void *data)
  19521. #ifdef CONFIG_HOTPLUG_CPU
  19522. +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
  19523. +
  19524. /*
  19525. * Ensures that the idle task is using init_mm right before its cpu goes
  19526. * offline.
  19527. @@ -4983,7 +5331,11 @@ void idle_task_exit(void)
  19528. switch_mm(mm, &init_mm, current);
  19529. finish_arch_post_lock_switch();
  19530. }
  19531. - mmdrop(mm);
  19532. + /*
  19533. + * Defer the cleanup to an alive cpu. On RT we can neither
  19534. + * call mmdrop() nor mmdrop_delayed() from here.
  19535. + */
  19536. + per_cpu(idle_last_mm, smp_processor_id()) = mm;
  19537. }
  19538. /*
  19539. @@ -5326,6 +5678,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
  19540. case CPU_DEAD:
  19541. calc_load_migrate(rq);
  19542. + if (per_cpu(idle_last_mm, cpu)) {
  19543. + mmdrop(per_cpu(idle_last_mm, cpu));
  19544. + per_cpu(idle_last_mm, cpu) = NULL;
  19545. + }
  19546. break;
  19547. #endif
  19548. }
  19549. @@ -7305,7 +7661,8 @@ void __init sched_init(void)
  19550. #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  19551. static inline int preempt_count_equals(int preempt_offset)
  19552. {
  19553. - int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
  19554. + int nested = (preempt_count() & ~PREEMPT_ACTIVE) +
  19555. + sched_rcu_preempt_depth();
  19556. return (nested == preempt_offset);
  19557. }
  19558. diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
  19559. index 87b8576cbd50..2ee44eb30f2b 100644
  19560. --- a/kernel/sched/cputime.c
  19561. +++ b/kernel/sched/cputime.c
  19562. @@ -675,37 +675,45 @@ static void __vtime_account_system(struct task_struct *tsk)
  19563. void vtime_account_system(struct task_struct *tsk)
  19564. {
  19565. - write_seqlock(&tsk->vtime_seqlock);
  19566. + raw_spin_lock(&tsk->vtime_lock);
  19567. + write_seqcount_begin(&tsk->vtime_seq);
  19568. __vtime_account_system(tsk);
  19569. - write_sequnlock(&tsk->vtime_seqlock);
  19570. + write_seqcount_end(&tsk->vtime_seq);
  19571. + raw_spin_unlock(&tsk->vtime_lock);
  19572. }
  19573. void vtime_gen_account_irq_exit(struct task_struct *tsk)
  19574. {
  19575. - write_seqlock(&tsk->vtime_seqlock);
  19576. + raw_spin_lock(&tsk->vtime_lock);
  19577. + write_seqcount_begin(&tsk->vtime_seq);
  19578. __vtime_account_system(tsk);
  19579. if (context_tracking_in_user())
  19580. tsk->vtime_snap_whence = VTIME_USER;
  19581. - write_sequnlock(&tsk->vtime_seqlock);
  19582. + write_seqcount_end(&tsk->vtime_seq);
  19583. + raw_spin_unlock(&tsk->vtime_lock);
  19584. }
  19585. void vtime_account_user(struct task_struct *tsk)
  19586. {
  19587. cputime_t delta_cpu;
  19588. - write_seqlock(&tsk->vtime_seqlock);
  19589. + raw_spin_lock(&tsk->vtime_lock);
  19590. + write_seqcount_begin(&tsk->vtime_seq);
  19591. delta_cpu = get_vtime_delta(tsk);
  19592. tsk->vtime_snap_whence = VTIME_SYS;
  19593. account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
  19594. - write_sequnlock(&tsk->vtime_seqlock);
  19595. + write_seqcount_end(&tsk->vtime_seq);
  19596. + raw_spin_unlock(&tsk->vtime_lock);
  19597. }
  19598. void vtime_user_enter(struct task_struct *tsk)
  19599. {
  19600. - write_seqlock(&tsk->vtime_seqlock);
  19601. + raw_spin_lock(&tsk->vtime_lock);
  19602. + write_seqcount_begin(&tsk->vtime_seq);
  19603. __vtime_account_system(tsk);
  19604. tsk->vtime_snap_whence = VTIME_USER;
  19605. - write_sequnlock(&tsk->vtime_seqlock);
  19606. + write_seqcount_end(&tsk->vtime_seq);
  19607. + raw_spin_unlock(&tsk->vtime_lock);
  19608. }
  19609. void vtime_guest_enter(struct task_struct *tsk)
  19610. @@ -717,19 +725,23 @@ void vtime_guest_enter(struct task_struct *tsk)
  19611. * synchronization against the reader (task_gtime())
  19612. * that can thus safely catch up with a tickless delta.
  19613. */
  19614. - write_seqlock(&tsk->vtime_seqlock);
  19615. + raw_spin_lock(&tsk->vtime_lock);
  19616. + write_seqcount_begin(&tsk->vtime_seq);
  19617. __vtime_account_system(tsk);
  19618. current->flags |= PF_VCPU;
  19619. - write_sequnlock(&tsk->vtime_seqlock);
  19620. + write_seqcount_end(&tsk->vtime_seq);
  19621. + raw_spin_unlock(&tsk->vtime_lock);
  19622. }
  19623. EXPORT_SYMBOL_GPL(vtime_guest_enter);
  19624. void vtime_guest_exit(struct task_struct *tsk)
  19625. {
  19626. - write_seqlock(&tsk->vtime_seqlock);
  19627. + raw_spin_lock(&tsk->vtime_lock);
  19628. + write_seqcount_begin(&tsk->vtime_seq);
  19629. __vtime_account_system(tsk);
  19630. current->flags &= ~PF_VCPU;
  19631. - write_sequnlock(&tsk->vtime_seqlock);
  19632. + write_seqcount_end(&tsk->vtime_seq);
  19633. + raw_spin_unlock(&tsk->vtime_lock);
  19634. }
  19635. EXPORT_SYMBOL_GPL(vtime_guest_exit);
  19636. @@ -742,24 +754,30 @@ void vtime_account_idle(struct task_struct *tsk)
  19637. void arch_vtime_task_switch(struct task_struct *prev)
  19638. {
  19639. - write_seqlock(&prev->vtime_seqlock);
  19640. + raw_spin_lock(&prev->vtime_lock);
  19641. + write_seqcount_begin(&prev->vtime_seq);
  19642. prev->vtime_snap_whence = VTIME_SLEEPING;
  19643. - write_sequnlock(&prev->vtime_seqlock);
  19644. + write_seqcount_end(&prev->vtime_seq);
  19645. + raw_spin_unlock(&prev->vtime_lock);
  19646. - write_seqlock(&current->vtime_seqlock);
  19647. + raw_spin_lock(&current->vtime_lock);
  19648. + write_seqcount_begin(&current->vtime_seq);
  19649. current->vtime_snap_whence = VTIME_SYS;
  19650. current->vtime_snap = sched_clock_cpu(smp_processor_id());
  19651. - write_sequnlock(&current->vtime_seqlock);
  19652. + write_seqcount_end(&current->vtime_seq);
  19653. + raw_spin_unlock(&current->vtime_lock);
  19654. }
  19655. void vtime_init_idle(struct task_struct *t, int cpu)
  19656. {
  19657. unsigned long flags;
  19658. - write_seqlock_irqsave(&t->vtime_seqlock, flags);
  19659. + raw_spin_lock_irqsave(&t->vtime_lock, flags);
  19660. + write_seqcount_begin(&t->vtime_seq);
  19661. t->vtime_snap_whence = VTIME_SYS;
  19662. t->vtime_snap = sched_clock_cpu(cpu);
  19663. - write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
  19664. + write_seqcount_end(&t->vtime_seq);
  19665. + raw_spin_unlock_irqrestore(&t->vtime_lock, flags);
  19666. }
  19667. cputime_t task_gtime(struct task_struct *t)
  19668. @@ -768,13 +786,13 @@ cputime_t task_gtime(struct task_struct *t)
  19669. cputime_t gtime;
  19670. do {
  19671. - seq = read_seqbegin(&t->vtime_seqlock);
  19672. + seq = read_seqcount_begin(&t->vtime_seq);
  19673. gtime = t->gtime;
  19674. if (t->flags & PF_VCPU)
  19675. gtime += vtime_delta(t);
  19676. - } while (read_seqretry(&t->vtime_seqlock, seq));
  19677. + } while (read_seqcount_retry(&t->vtime_seq, seq));
  19678. return gtime;
  19679. }
  19680. @@ -797,7 +815,7 @@ fetch_task_cputime(struct task_struct *t,
  19681. *udelta = 0;
  19682. *sdelta = 0;
  19683. - seq = read_seqbegin(&t->vtime_seqlock);
  19684. + seq = read_seqcount_begin(&t->vtime_seq);
  19685. if (u_dst)
  19686. *u_dst = *u_src;
  19687. @@ -821,7 +839,7 @@ fetch_task_cputime(struct task_struct *t,
  19688. if (t->vtime_snap_whence == VTIME_SYS)
  19689. *sdelta = delta;
  19690. }
  19691. - } while (read_seqretry(&t->vtime_seqlock, seq));
  19692. + } while (read_seqcount_retry(&t->vtime_seq, seq));
  19693. }
  19694. diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
  19695. index 5e95145088fd..0c261c5114e0 100644
  19696. --- a/kernel/sched/deadline.c
  19697. +++ b/kernel/sched/deadline.c
  19698. @@ -637,6 +637,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
  19699. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  19700. timer->function = dl_task_timer;
  19701. + timer->irqsafe = 1;
  19702. }
  19703. static
  19704. diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
  19705. index a245c1fc6f0a..34b00001b00a 100644
  19706. --- a/kernel/sched/debug.c
  19707. +++ b/kernel/sched/debug.c
  19708. @@ -260,6 +260,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
  19709. P(rt_throttled);
  19710. PN(rt_time);
  19711. PN(rt_runtime);
  19712. +#ifdef CONFIG_SMP
  19713. + P(rt_nr_migratory);
  19714. +#endif
  19715. #undef PN
  19716. #undef P
  19717. @@ -648,6 +651,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
  19718. #endif
  19719. P(policy);
  19720. P(prio);
  19721. +#ifdef CONFIG_PREEMPT_RT_FULL
  19722. + P(migrate_disable);
  19723. +#endif
  19724. + P(nr_cpus_allowed);
  19725. #undef PN
  19726. #undef __PN
  19727. #undef P
  19728. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  19729. index 77690b653ca9..7aae8d27611e 100644
  19730. --- a/kernel/sched/fair.c
  19731. +++ b/kernel/sched/fair.c
  19732. @@ -3201,7 +3201,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
  19733. ideal_runtime = sched_slice(cfs_rq, curr);
  19734. delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
  19735. if (delta_exec > ideal_runtime) {
  19736. - resched_curr(rq_of(cfs_rq));
  19737. + resched_curr_lazy(rq_of(cfs_rq));
  19738. /*
  19739. * The current task ran long enough, ensure it doesn't get
  19740. * re-elected due to buddy favours.
  19741. @@ -3225,7 +3225,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
  19742. return;
  19743. if (delta > ideal_runtime)
  19744. - resched_curr(rq_of(cfs_rq));
  19745. + resched_curr_lazy(rq_of(cfs_rq));
  19746. }
  19747. static void
  19748. @@ -3366,7 +3366,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
  19749. * validating it and just reschedule.
  19750. */
  19751. if (queued) {
  19752. - resched_curr(rq_of(cfs_rq));
  19753. + resched_curr_lazy(rq_of(cfs_rq));
  19754. return;
  19755. }
  19756. /*
  19757. @@ -3557,7 +3557,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
  19758. * hierarchy can be throttled
  19759. */
  19760. if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
  19761. - resched_curr(rq_of(cfs_rq));
  19762. + resched_curr_lazy(rq_of(cfs_rq));
  19763. }
  19764. static __always_inline
  19765. @@ -4180,7 +4180,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
  19766. if (delta < 0) {
  19767. if (rq->curr == p)
  19768. - resched_curr(rq);
  19769. + resched_curr_lazy(rq);
  19770. return;
  19771. }
  19772. hrtick_start(rq, delta);
  19773. @@ -5076,7 +5076,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
  19774. return;
  19775. preempt:
  19776. - resched_curr(rq);
  19777. + resched_curr_lazy(rq);
  19778. /*
  19779. * Only set the backward buddy when the current task is still
  19780. * on the rq. This can happen when a wakeup gets interleaved
  19781. @@ -7869,7 +7869,7 @@ static void task_fork_fair(struct task_struct *p)
  19782. * 'current' within the tree based on its new key value.
  19783. */
  19784. swap(curr->vruntime, se->vruntime);
  19785. - resched_curr(rq);
  19786. + resched_curr_lazy(rq);
  19787. }
  19788. se->vruntime -= cfs_rq->min_vruntime;
  19789. @@ -7894,7 +7894,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
  19790. */
  19791. if (rq->curr == p) {
  19792. if (p->prio > oldprio)
  19793. - resched_curr(rq);
  19794. + resched_curr_lazy(rq);
  19795. } else
  19796. check_preempt_curr(rq, p, 0);
  19797. }
  19798. diff --git a/kernel/sched/features.h b/kernel/sched/features.h
  19799. index 91e33cd485f6..0ea4e37751d7 100644
  19800. --- a/kernel/sched/features.h
  19801. +++ b/kernel/sched/features.h
  19802. @@ -50,11 +50,19 @@ SCHED_FEAT(LB_BIAS, true)
  19803. */
  19804. SCHED_FEAT(NONTASK_CAPACITY, true)
  19805. +#ifdef CONFIG_PREEMPT_RT_FULL
  19806. +SCHED_FEAT(TTWU_QUEUE, false)
  19807. +# ifdef CONFIG_PREEMPT_LAZY
  19808. +SCHED_FEAT(PREEMPT_LAZY, true)
  19809. +# endif
  19810. +#else
  19811. +
  19812. /*
  19813. * Queue remote wakeups on the target CPU and process them
  19814. * using the scheduler IPI. Reduces rq->lock contention/bounces.
  19815. */
  19816. SCHED_FEAT(TTWU_QUEUE, true)
  19817. +#endif
  19818. #ifdef HAVE_RT_PUSH_IPI
  19819. /*
  19820. diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
  19821. index 575da76a3874..637aa208a58d 100644
  19822. --- a/kernel/sched/rt.c
  19823. +++ b/kernel/sched/rt.c
  19824. @@ -44,6 +44,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
  19825. hrtimer_init(&rt_b->rt_period_timer,
  19826. CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  19827. + rt_b->rt_period_timer.irqsafe = 1;
  19828. rt_b->rt_period_timer.function = sched_rt_period_timer;
  19829. }
  19830. @@ -89,6 +90,7 @@ void init_rt_rq(struct rt_rq *rt_rq)
  19831. rt_rq->push_cpu = nr_cpu_ids;
  19832. raw_spin_lock_init(&rt_rq->push_lock);
  19833. init_irq_work(&rt_rq->push_work, push_irq_work_func);
  19834. + rt_rq->push_work.flags |= IRQ_WORK_HARD_IRQ;
  19835. #endif
  19836. #endif /* CONFIG_SMP */
  19837. /* We start is dequeued state, because no RT tasks are queued */
  19838. diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
  19839. index aa1f059de4f7..1bdd1e5f056d 100644
  19840. --- a/kernel/sched/sched.h
  19841. +++ b/kernel/sched/sched.h
  19842. @@ -1093,6 +1093,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
  19843. #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
  19844. #define WF_FORK 0x02 /* child wakeup after fork */
  19845. #define WF_MIGRATED 0x4 /* internal use, task got migrated */
  19846. +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */
  19847. /*
  19848. * To aid in avoiding the subversion of "niceness" due to uneven distribution
  19849. @@ -1290,6 +1291,15 @@ extern void init_sched_dl_class(void);
  19850. extern void resched_curr(struct rq *rq);
  19851. extern void resched_cpu(int cpu);
  19852. +#ifdef CONFIG_PREEMPT_LAZY
  19853. +extern void resched_curr_lazy(struct rq *rq);
  19854. +#else
  19855. +static inline void resched_curr_lazy(struct rq *rq)
  19856. +{
  19857. + resched_curr(rq);
  19858. +}
  19859. +#endif
  19860. +
  19861. extern struct rt_bandwidth def_rt_bandwidth;
  19862. extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
  19863. diff --git a/kernel/sched/wait-simple.c b/kernel/sched/wait-simple.c
  19864. new file mode 100644
  19865. index 000000000000..7dfa86d1f654
  19866. --- /dev/null
  19867. +++ b/kernel/sched/wait-simple.c
  19868. @@ -0,0 +1,115 @@
  19869. +/*
  19870. + * Simple waitqueues without fancy flags and callbacks
  19871. + *
  19872. + * (C) 2011 Thomas Gleixner <tglx@linutronix.de>
  19873. + *
  19874. + * Based on kernel/wait.c
  19875. + *
  19876. + * For licencing details see kernel-base/COPYING
  19877. + */
  19878. +#include <linux/init.h>
  19879. +#include <linux/export.h>
  19880. +#include <linux/sched.h>
  19881. +#include <linux/wait-simple.h>
  19882. +
  19883. +/* Adds w to head->list. Must be called with head->lock locked. */
  19884. +static inline void __swait_enqueue(struct swait_head *head, struct swaiter *w)
  19885. +{
  19886. + list_add(&w->node, &head->list);
  19887. + /* We can't let the condition leak before the setting of head */
  19888. + smp_mb();
  19889. +}
  19890. +
  19891. +/* Removes w from head->list. Must be called with head->lock locked. */
  19892. +static inline void __swait_dequeue(struct swaiter *w)
  19893. +{
  19894. + list_del_init(&w->node);
  19895. +}
  19896. +
  19897. +void __init_swait_head(struct swait_head *head, struct lock_class_key *key)
  19898. +{
  19899. + raw_spin_lock_init(&head->lock);
  19900. + lockdep_set_class(&head->lock, key);
  19901. + INIT_LIST_HEAD(&head->list);
  19902. +}
  19903. +EXPORT_SYMBOL(__init_swait_head);
  19904. +
  19905. +void swait_prepare_locked(struct swait_head *head, struct swaiter *w)
  19906. +{
  19907. + w->task = current;
  19908. + if (list_empty(&w->node))
  19909. + __swait_enqueue(head, w);
  19910. +}
  19911. +
  19912. +void swait_prepare(struct swait_head *head, struct swaiter *w, int state)
  19913. +{
  19914. + unsigned long flags;
  19915. +
  19916. + raw_spin_lock_irqsave(&head->lock, flags);
  19917. + swait_prepare_locked(head, w);
  19918. + __set_current_state(state);
  19919. + raw_spin_unlock_irqrestore(&head->lock, flags);
  19920. +}
  19921. +EXPORT_SYMBOL(swait_prepare);
  19922. +
  19923. +void swait_finish_locked(struct swait_head *head, struct swaiter *w)
  19924. +{
  19925. + __set_current_state(TASK_RUNNING);
  19926. + if (w->task)
  19927. + __swait_dequeue(w);
  19928. +}
  19929. +
  19930. +void swait_finish(struct swait_head *head, struct swaiter *w)
  19931. +{
  19932. + unsigned long flags;
  19933. +
  19934. + __set_current_state(TASK_RUNNING);
  19935. + if (w->task) {
  19936. + raw_spin_lock_irqsave(&head->lock, flags);
  19937. + __swait_dequeue(w);
  19938. + raw_spin_unlock_irqrestore(&head->lock, flags);
  19939. + }
  19940. +}
  19941. +EXPORT_SYMBOL(swait_finish);
  19942. +
  19943. +unsigned int
  19944. +__swait_wake_locked(struct swait_head *head, unsigned int state, unsigned int num)
  19945. +{
  19946. + struct swaiter *curr, *next;
  19947. + int woken = 0;
  19948. +
  19949. + list_for_each_entry_safe(curr, next, &head->list, node) {
  19950. + if (wake_up_state(curr->task, state)) {
  19951. + __swait_dequeue(curr);
  19952. + /*
  19953. + * The waiting task can free the waiter as
  19954. + * soon as curr->task = NULL is written,
  19955. + * without taking any locks. A memory barrier
  19956. + * is required here to prevent the following
  19957. + * store to curr->task from getting ahead of
  19958. + * the dequeue operation.
  19959. + */
  19960. + smp_wmb();
  19961. + curr->task = NULL;
  19962. + if (++woken == num)
  19963. + break;
  19964. + }
  19965. + }
  19966. + return woken;
  19967. +}
  19968. +
  19969. +unsigned int
  19970. +__swait_wake(struct swait_head *head, unsigned int state, unsigned int num)
  19971. +{
  19972. + unsigned long flags;
  19973. + int woken;
  19974. +
  19975. + if (!swaitqueue_active(head))
  19976. + return 0;
  19977. +
  19978. + raw_spin_lock_irqsave(&head->lock, flags);
  19979. + woken = __swait_wake_locked(head, state, num);
  19980. + raw_spin_unlock_irqrestore(&head->lock, flags);
  19981. + return woken;
  19982. +}
  19983. +EXPORT_SYMBOL(__swait_wake);
  19984. diff --git a/kernel/sched/work-simple.c b/kernel/sched/work-simple.c
  19985. new file mode 100644
  19986. index 000000000000..e57a0522573f
  19987. --- /dev/null
  19988. +++ b/kernel/sched/work-simple.c
  19989. @@ -0,0 +1,173 @@
  19990. +/*
  19991. + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de
  19992. + *
  19993. + * Provides a framework for enqueuing callbacks from irq context
  19994. + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context.
  19995. + */
  19996. +
  19997. +#include <linux/wait-simple.h>
  19998. +#include <linux/work-simple.h>
  19999. +#include <linux/kthread.h>
  20000. +#include <linux/slab.h>
  20001. +#include <linux/spinlock.h>
  20002. +#include <linux/export.h>
  20003. +
  20004. +#define SWORK_EVENT_PENDING (1 << 0)
  20005. +
  20006. +static DEFINE_MUTEX(worker_mutex);
  20007. +static struct sworker *glob_worker;
  20008. +
  20009. +struct sworker {
  20010. + struct list_head events;
  20011. + struct swait_head wq;
  20012. +
  20013. + raw_spinlock_t lock;
  20014. +
  20015. + struct task_struct *task;
  20016. + int refs;
  20017. +};
  20018. +
  20019. +static bool swork_readable(struct sworker *worker)
  20020. +{
  20021. + bool r;
  20022. +
  20023. + if (kthread_should_stop())
  20024. + return true;
  20025. +
  20026. + raw_spin_lock_irq(&worker->lock);
  20027. + r = !list_empty(&worker->events);
  20028. + raw_spin_unlock_irq(&worker->lock);
  20029. +
  20030. + return r;
  20031. +}
  20032. +
  20033. +static int swork_kthread(void *arg)
  20034. +{
  20035. + struct sworker *worker = arg;
  20036. +
  20037. + for (;;) {
  20038. + swait_event_interruptible(worker->wq,
  20039. + swork_readable(worker));
  20040. + if (kthread_should_stop())
  20041. + break;
  20042. +
  20043. + raw_spin_lock_irq(&worker->lock);
  20044. + while (!list_empty(&worker->events)) {
  20045. + struct swork_event *sev;
  20046. +
  20047. + sev = list_first_entry(&worker->events,
  20048. + struct swork_event, item);
  20049. + list_del(&sev->item);
  20050. + raw_spin_unlock_irq(&worker->lock);
  20051. +
  20052. + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING,
  20053. + &sev->flags));
  20054. + sev->func(sev);
  20055. + raw_spin_lock_irq(&worker->lock);
  20056. + }
  20057. + raw_spin_unlock_irq(&worker->lock);
  20058. + }
  20059. + return 0;
  20060. +}
  20061. +
  20062. +static struct sworker *swork_create(void)
  20063. +{
  20064. + struct sworker *worker;
  20065. +
  20066. + worker = kzalloc(sizeof(*worker), GFP_KERNEL);
  20067. + if (!worker)
  20068. + return ERR_PTR(-ENOMEM);
  20069. +
  20070. + INIT_LIST_HEAD(&worker->events);
  20071. + raw_spin_lock_init(&worker->lock);
  20072. + init_swait_head(&worker->wq);
  20073. +
  20074. + worker->task = kthread_run(swork_kthread, worker, "kswork");
  20075. + if (IS_ERR(worker->task)) {
  20076. + kfree(worker);
  20077. + return ERR_PTR(-ENOMEM);
  20078. + }
  20079. +
  20080. + return worker;
  20081. +}
  20082. +
  20083. +static void swork_destroy(struct sworker *worker)
  20084. +{
  20085. + kthread_stop(worker->task);
  20086. +
  20087. + WARN_ON(!list_empty(&worker->events));
  20088. + kfree(worker);
  20089. +}
  20090. +
  20091. +/**
  20092. + * swork_queue - queue swork
  20093. + *
  20094. + * Returns %false if @work was already on a queue, %true otherwise.
  20095. + *
  20096. + * The work is queued and processed on a random CPU
  20097. + */
  20098. +bool swork_queue(struct swork_event *sev)
  20099. +{
  20100. + unsigned long flags;
  20101. +
  20102. + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags))
  20103. + return false;
  20104. +
  20105. + raw_spin_lock_irqsave(&glob_worker->lock, flags);
  20106. + list_add_tail(&sev->item, &glob_worker->events);
  20107. + raw_spin_unlock_irqrestore(&glob_worker->lock, flags);
  20108. +
  20109. + swait_wake(&glob_worker->wq);
  20110. + return true;
  20111. +}
  20112. +EXPORT_SYMBOL_GPL(swork_queue);
  20113. +
  20114. +/**
  20115. + * swork_get - get an instance of the sworker
  20116. + *
  20117. + * Returns an negative error code if the initialization if the worker did not
  20118. + * work, %0 otherwise.
  20119. + *
  20120. + */
  20121. +int swork_get(void)
  20122. +{
  20123. + struct sworker *worker;
  20124. +
  20125. + mutex_lock(&worker_mutex);
  20126. + if (!glob_worker) {
  20127. + worker = swork_create();
  20128. + if (IS_ERR(worker)) {
  20129. + mutex_unlock(&worker_mutex);
  20130. + return -ENOMEM;
  20131. + }
  20132. +
  20133. + glob_worker = worker;
  20134. + }
  20135. +
  20136. + glob_worker->refs++;
  20137. + mutex_unlock(&worker_mutex);
  20138. +
  20139. + return 0;
  20140. +}
  20141. +EXPORT_SYMBOL_GPL(swork_get);
  20142. +
  20143. +/**
  20144. + * swork_put - puts an instance of the sworker
  20145. + *
  20146. + * Will destroy the sworker thread. This function must not be called until all
  20147. + * queued events have been completed.
  20148. + */
  20149. +void swork_put(void)
  20150. +{
  20151. + mutex_lock(&worker_mutex);
  20152. +
  20153. + glob_worker->refs--;
  20154. + if (glob_worker->refs > 0)
  20155. + goto out;
  20156. +
  20157. + swork_destroy(glob_worker);
  20158. + glob_worker = NULL;
  20159. +out:
  20160. + mutex_unlock(&worker_mutex);
  20161. +}
  20162. +EXPORT_SYMBOL_GPL(swork_put);
  20163. diff --git a/kernel/signal.c b/kernel/signal.c
  20164. index 0206be728dac..1336e4c016ba 100644
  20165. --- a/kernel/signal.c
  20166. +++ b/kernel/signal.c
  20167. @@ -14,6 +14,7 @@
  20168. #include <linux/export.h>
  20169. #include <linux/init.h>
  20170. #include <linux/sched.h>
  20171. +#include <linux/sched/rt.h>
  20172. #include <linux/fs.h>
  20173. #include <linux/tty.h>
  20174. #include <linux/binfmts.h>
  20175. @@ -352,13 +353,45 @@ static bool task_participate_group_stop(struct task_struct *task)
  20176. return false;
  20177. }
  20178. +#ifdef __HAVE_ARCH_CMPXCHG
  20179. +static inline struct sigqueue *get_task_cache(struct task_struct *t)
  20180. +{
  20181. + struct sigqueue *q = t->sigqueue_cache;
  20182. +
  20183. + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
  20184. + return NULL;
  20185. + return q;
  20186. +}
  20187. +
  20188. +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
  20189. +{
  20190. + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
  20191. + return 0;
  20192. + return 1;
  20193. +}
  20194. +
  20195. +#else
  20196. +
  20197. +static inline struct sigqueue *get_task_cache(struct task_struct *t)
  20198. +{
  20199. + return NULL;
  20200. +}
  20201. +
  20202. +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
  20203. +{
  20204. + return 1;
  20205. +}
  20206. +
  20207. +#endif
  20208. +
  20209. /*
  20210. * allocate a new signal queue record
  20211. * - this may be called without locks if and only if t == current, otherwise an
  20212. * appropriate lock must be held to stop the target task from exiting
  20213. */
  20214. static struct sigqueue *
  20215. -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
  20216. +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
  20217. + int override_rlimit, int fromslab)
  20218. {
  20219. struct sigqueue *q = NULL;
  20220. struct user_struct *user;
  20221. @@ -375,7 +408,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
  20222. if (override_rlimit ||
  20223. atomic_read(&user->sigpending) <=
  20224. task_rlimit(t, RLIMIT_SIGPENDING)) {
  20225. - q = kmem_cache_alloc(sigqueue_cachep, flags);
  20226. + if (!fromslab)
  20227. + q = get_task_cache(t);
  20228. + if (!q)
  20229. + q = kmem_cache_alloc(sigqueue_cachep, flags);
  20230. } else {
  20231. print_dropped_signal(sig);
  20232. }
  20233. @@ -392,6 +428,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
  20234. return q;
  20235. }
  20236. +static struct sigqueue *
  20237. +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
  20238. + int override_rlimit)
  20239. +{
  20240. + return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
  20241. +}
  20242. +
  20243. static void __sigqueue_free(struct sigqueue *q)
  20244. {
  20245. if (q->flags & SIGQUEUE_PREALLOC)
  20246. @@ -401,6 +444,21 @@ static void __sigqueue_free(struct sigqueue *q)
  20247. kmem_cache_free(sigqueue_cachep, q);
  20248. }
  20249. +static void sigqueue_free_current(struct sigqueue *q)
  20250. +{
  20251. + struct user_struct *up;
  20252. +
  20253. + if (q->flags & SIGQUEUE_PREALLOC)
  20254. + return;
  20255. +
  20256. + up = q->user;
  20257. + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
  20258. + atomic_dec(&up->sigpending);
  20259. + free_uid(up);
  20260. + } else
  20261. + __sigqueue_free(q);
  20262. +}
  20263. +
  20264. void flush_sigqueue(struct sigpending *queue)
  20265. {
  20266. struct sigqueue *q;
  20267. @@ -414,6 +472,21 @@ void flush_sigqueue(struct sigpending *queue)
  20268. }
  20269. /*
  20270. + * Called from __exit_signal. Flush tsk->pending and
  20271. + * tsk->sigqueue_cache
  20272. + */
  20273. +void flush_task_sigqueue(struct task_struct *tsk)
  20274. +{
  20275. + struct sigqueue *q;
  20276. +
  20277. + flush_sigqueue(&tsk->pending);
  20278. +
  20279. + q = get_task_cache(tsk);
  20280. + if (q)
  20281. + kmem_cache_free(sigqueue_cachep, q);
  20282. +}
  20283. +
  20284. +/*
  20285. * Flush all pending signals for a task.
  20286. */
  20287. void __flush_signals(struct task_struct *t)
  20288. @@ -565,7 +638,7 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
  20289. still_pending:
  20290. list_del_init(&first->list);
  20291. copy_siginfo(info, &first->info);
  20292. - __sigqueue_free(first);
  20293. + sigqueue_free_current(first);
  20294. } else {
  20295. /*
  20296. * Ok, it wasn't in the queue. This must be
  20297. @@ -611,6 +684,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
  20298. {
  20299. int signr;
  20300. + WARN_ON_ONCE(tsk != current);
  20301. +
  20302. /* We only dequeue private signals from ourselves, we don't let
  20303. * signalfd steal them
  20304. */
  20305. @@ -1207,8 +1282,8 @@ int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
  20306. * We don't want to have recursive SIGSEGV's etc, for example,
  20307. * that is why we also clear SIGNAL_UNKILLABLE.
  20308. */
  20309. -int
  20310. -force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  20311. +static int
  20312. +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  20313. {
  20314. unsigned long int flags;
  20315. int ret, blocked, ignored;
  20316. @@ -1233,6 +1308,39 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  20317. return ret;
  20318. }
  20319. +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  20320. +{
  20321. +/*
  20322. + * On some archs, PREEMPT_RT has to delay sending a signal from a trap
  20323. + * since it can not enable preemption, and the signal code's spin_locks
  20324. + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
  20325. + * send the signal on exit of the trap.
  20326. + */
  20327. +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
  20328. + if (in_atomic()) {
  20329. + if (WARN_ON_ONCE(t != current))
  20330. + return 0;
  20331. + if (WARN_ON_ONCE(t->forced_info.si_signo))
  20332. + return 0;
  20333. +
  20334. + if (is_si_special(info)) {
  20335. + WARN_ON_ONCE(info != SEND_SIG_PRIV);
  20336. + t->forced_info.si_signo = sig;
  20337. + t->forced_info.si_errno = 0;
  20338. + t->forced_info.si_code = SI_KERNEL;
  20339. + t->forced_info.si_pid = 0;
  20340. + t->forced_info.si_uid = 0;
  20341. + } else {
  20342. + t->forced_info = *info;
  20343. + }
  20344. +
  20345. + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
  20346. + return 0;
  20347. + }
  20348. +#endif
  20349. + return do_force_sig_info(sig, info, t);
  20350. +}
  20351. +
  20352. /*
  20353. * Nuke all other threads in the group.
  20354. */
  20355. @@ -1267,12 +1375,12 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
  20356. * Disable interrupts early to avoid deadlocks.
  20357. * See rcu_read_unlock() comment header for details.
  20358. */
  20359. - local_irq_save(*flags);
  20360. + local_irq_save_nort(*flags);
  20361. rcu_read_lock();
  20362. sighand = rcu_dereference(tsk->sighand);
  20363. if (unlikely(sighand == NULL)) {
  20364. rcu_read_unlock();
  20365. - local_irq_restore(*flags);
  20366. + local_irq_restore_nort(*flags);
  20367. break;
  20368. }
  20369. /*
  20370. @@ -1293,7 +1401,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
  20371. }
  20372. spin_unlock(&sighand->siglock);
  20373. rcu_read_unlock();
  20374. - local_irq_restore(*flags);
  20375. + local_irq_restore_nort(*flags);
  20376. }
  20377. return sighand;
  20378. @@ -1536,7 +1644,8 @@ EXPORT_SYMBOL(kill_pid);
  20379. */
  20380. struct sigqueue *sigqueue_alloc(void)
  20381. {
  20382. - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
  20383. + /* Preallocated sigqueue objects always from the slabcache ! */
  20384. + struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
  20385. if (q)
  20386. q->flags |= SIGQUEUE_PREALLOC;
  20387. @@ -1897,15 +2006,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
  20388. if (gstop_done && ptrace_reparented(current))
  20389. do_notify_parent_cldstop(current, false, why);
  20390. - /*
  20391. - * Don't want to allow preemption here, because
  20392. - * sys_ptrace() needs this task to be inactive.
  20393. - *
  20394. - * XXX: implement read_unlock_no_resched().
  20395. - */
  20396. - preempt_disable();
  20397. read_unlock(&tasklist_lock);
  20398. - preempt_enable_no_resched();
  20399. freezable_schedule();
  20400. } else {
  20401. /*
  20402. diff --git a/kernel/softirq.c b/kernel/softirq.c
  20403. index 479e4436f787..cb9c1d5dee10 100644
  20404. --- a/kernel/softirq.c
  20405. +++ b/kernel/softirq.c
  20406. @@ -21,10 +21,12 @@
  20407. #include <linux/freezer.h>
  20408. #include <linux/kthread.h>
  20409. #include <linux/rcupdate.h>
  20410. +#include <linux/delay.h>
  20411. #include <linux/ftrace.h>
  20412. #include <linux/smp.h>
  20413. #include <linux/smpboot.h>
  20414. #include <linux/tick.h>
  20415. +#include <linux/locallock.h>
  20416. #include <linux/irq.h>
  20417. #define CREATE_TRACE_POINTS
  20418. @@ -56,12 +58,108 @@ EXPORT_SYMBOL(irq_stat);
  20419. static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
  20420. DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
  20421. +#ifdef CONFIG_PREEMPT_RT_FULL
  20422. +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ))
  20423. +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd);
  20424. +#endif
  20425. const char * const softirq_to_name[NR_SOFTIRQS] = {
  20426. "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
  20427. "TASKLET", "SCHED", "HRTIMER", "RCU"
  20428. };
  20429. +#ifdef CONFIG_NO_HZ_COMMON
  20430. +# ifdef CONFIG_PREEMPT_RT_FULL
  20431. +
  20432. +struct softirq_runner {
  20433. + struct task_struct *runner[NR_SOFTIRQS];
  20434. +};
  20435. +
  20436. +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners);
  20437. +
  20438. +static inline void softirq_set_runner(unsigned int sirq)
  20439. +{
  20440. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  20441. +
  20442. + sr->runner[sirq] = current;
  20443. +}
  20444. +
  20445. +static inline void softirq_clr_runner(unsigned int sirq)
  20446. +{
  20447. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  20448. +
  20449. + sr->runner[sirq] = NULL;
  20450. +}
  20451. +
  20452. +/*
  20453. + * On preempt-rt a softirq running context might be blocked on a
  20454. + * lock. There might be no other runnable task on this CPU because the
  20455. + * lock owner runs on some other CPU. So we have to go into idle with
  20456. + * the pending bit set. Therefor we need to check this otherwise we
  20457. + * warn about false positives which confuses users and defeats the
  20458. + * whole purpose of this test.
  20459. + *
  20460. + * This code is called with interrupts disabled.
  20461. + */
  20462. +void softirq_check_pending_idle(void)
  20463. +{
  20464. + static int rate_limit;
  20465. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  20466. + u32 warnpending;
  20467. + int i;
  20468. +
  20469. + if (rate_limit >= 10)
  20470. + return;
  20471. +
  20472. + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
  20473. + for (i = 0; i < NR_SOFTIRQS; i++) {
  20474. + struct task_struct *tsk = sr->runner[i];
  20475. +
  20476. + /*
  20477. + * The wakeup code in rtmutex.c wakes up the task
  20478. + * _before_ it sets pi_blocked_on to NULL under
  20479. + * tsk->pi_lock. So we need to check for both: state
  20480. + * and pi_blocked_on.
  20481. + */
  20482. + if (tsk) {
  20483. + raw_spin_lock(&tsk->pi_lock);
  20484. + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
  20485. + /* Clear all bits pending in that task */
  20486. + warnpending &= ~(tsk->softirqs_raised);
  20487. + warnpending &= ~(1 << i);
  20488. + }
  20489. + raw_spin_unlock(&tsk->pi_lock);
  20490. + }
  20491. + }
  20492. +
  20493. + if (warnpending) {
  20494. + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
  20495. + warnpending);
  20496. + rate_limit++;
  20497. + }
  20498. +}
  20499. +# else
  20500. +/*
  20501. + * On !PREEMPT_RT we just printk rate limited:
  20502. + */
  20503. +void softirq_check_pending_idle(void)
  20504. +{
  20505. + static int rate_limit;
  20506. +
  20507. + if (rate_limit < 10 &&
  20508. + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  20509. + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
  20510. + local_softirq_pending());
  20511. + rate_limit++;
  20512. + }
  20513. +}
  20514. +# endif
  20515. +
  20516. +#else /* !CONFIG_NO_HZ_COMMON */
  20517. +static inline void softirq_set_runner(unsigned int sirq) { }
  20518. +static inline void softirq_clr_runner(unsigned int sirq) { }
  20519. +#endif
  20520. +
  20521. /*
  20522. * we cannot loop indefinitely here to avoid userspace starvation,
  20523. * but we also don't want to introduce a worst case 1/HZ latency
  20524. @@ -77,6 +175,79 @@ static void wakeup_softirqd(void)
  20525. wake_up_process(tsk);
  20526. }
  20527. +#ifdef CONFIG_PREEMPT_RT_FULL
  20528. +static void wakeup_timer_softirqd(void)
  20529. +{
  20530. + /* Interrupts are disabled: no need to stop preemption */
  20531. + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd);
  20532. +
  20533. + if (tsk && tsk->state != TASK_RUNNING)
  20534. + wake_up_process(tsk);
  20535. +}
  20536. +#endif
  20537. +
  20538. +static void handle_softirq(unsigned int vec_nr)
  20539. +{
  20540. + struct softirq_action *h = softirq_vec + vec_nr;
  20541. + int prev_count;
  20542. +
  20543. + prev_count = preempt_count();
  20544. +
  20545. + kstat_incr_softirqs_this_cpu(vec_nr);
  20546. +
  20547. + trace_softirq_entry(vec_nr);
  20548. + h->action(h);
  20549. + trace_softirq_exit(vec_nr);
  20550. + if (unlikely(prev_count != preempt_count())) {
  20551. + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
  20552. + vec_nr, softirq_to_name[vec_nr], h->action,
  20553. + prev_count, preempt_count());
  20554. + preempt_count_set(prev_count);
  20555. + }
  20556. +}
  20557. +
  20558. +#ifndef CONFIG_PREEMPT_RT_FULL
  20559. +static inline int ksoftirqd_softirq_pending(void)
  20560. +{
  20561. + return local_softirq_pending();
  20562. +}
  20563. +
  20564. +static void handle_pending_softirqs(u32 pending)
  20565. +{
  20566. + struct softirq_action *h = softirq_vec;
  20567. + int softirq_bit;
  20568. +
  20569. + local_irq_enable();
  20570. +
  20571. + h = softirq_vec;
  20572. +
  20573. + while ((softirq_bit = ffs(pending))) {
  20574. + unsigned int vec_nr;
  20575. +
  20576. + h += softirq_bit - 1;
  20577. + vec_nr = h - softirq_vec;
  20578. + handle_softirq(vec_nr);
  20579. +
  20580. + h++;
  20581. + pending >>= softirq_bit;
  20582. + }
  20583. +
  20584. + rcu_bh_qs();
  20585. + local_irq_disable();
  20586. +}
  20587. +
  20588. +static void run_ksoftirqd(unsigned int cpu)
  20589. +{
  20590. + local_irq_disable();
  20591. + if (ksoftirqd_softirq_pending()) {
  20592. + __do_softirq();
  20593. + local_irq_enable();
  20594. + cond_resched_rcu_qs();
  20595. + return;
  20596. + }
  20597. + local_irq_enable();
  20598. +}
  20599. +
  20600. /*
  20601. * preempt_count and SOFTIRQ_OFFSET usage:
  20602. * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
  20603. @@ -116,9 +287,9 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
  20604. if (preempt_count() == cnt) {
  20605. #ifdef CONFIG_DEBUG_PREEMPT
  20606. - current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
  20607. + current->preempt_disable_ip = get_lock_parent_ip();
  20608. #endif
  20609. - trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
  20610. + trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
  20611. }
  20612. }
  20613. EXPORT_SYMBOL(__local_bh_disable_ip);
  20614. @@ -232,10 +403,8 @@ asmlinkage __visible void __do_softirq(void)
  20615. unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
  20616. unsigned long old_flags = current->flags;
  20617. int max_restart = MAX_SOFTIRQ_RESTART;
  20618. - struct softirq_action *h;
  20619. bool in_hardirq;
  20620. __u32 pending;
  20621. - int softirq_bit;
  20622. /*
  20623. * Mask out PF_MEMALLOC s current task context is borrowed for the
  20624. @@ -254,36 +423,7 @@ restart:
  20625. /* Reset the pending bitmask before enabling irqs */
  20626. set_softirq_pending(0);
  20627. - local_irq_enable();
  20628. -
  20629. - h = softirq_vec;
  20630. -
  20631. - while ((softirq_bit = ffs(pending))) {
  20632. - unsigned int vec_nr;
  20633. - int prev_count;
  20634. -
  20635. - h += softirq_bit - 1;
  20636. -
  20637. - vec_nr = h - softirq_vec;
  20638. - prev_count = preempt_count();
  20639. -
  20640. - kstat_incr_softirqs_this_cpu(vec_nr);
  20641. -
  20642. - trace_softirq_entry(vec_nr);
  20643. - h->action(h);
  20644. - trace_softirq_exit(vec_nr);
  20645. - if (unlikely(prev_count != preempt_count())) {
  20646. - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
  20647. - vec_nr, softirq_to_name[vec_nr], h->action,
  20648. - prev_count, preempt_count());
  20649. - preempt_count_set(prev_count);
  20650. - }
  20651. - h++;
  20652. - pending >>= softirq_bit;
  20653. - }
  20654. -
  20655. - rcu_bh_qs();
  20656. - local_irq_disable();
  20657. + handle_pending_softirqs(pending);
  20658. pending = local_softirq_pending();
  20659. if (pending) {
  20660. @@ -320,6 +460,310 @@ asmlinkage __visible void do_softirq(void)
  20661. }
  20662. /*
  20663. + * This function must run with irqs disabled!
  20664. + */
  20665. +void raise_softirq_irqoff(unsigned int nr)
  20666. +{
  20667. + __raise_softirq_irqoff(nr);
  20668. +
  20669. + /*
  20670. + * If we're in an interrupt or softirq, we're done
  20671. + * (this also catches softirq-disabled code). We will
  20672. + * actually run the softirq once we return from
  20673. + * the irq or softirq.
  20674. + *
  20675. + * Otherwise we wake up ksoftirqd to make sure we
  20676. + * schedule the softirq soon.
  20677. + */
  20678. + if (!in_interrupt())
  20679. + wakeup_softirqd();
  20680. +}
  20681. +
  20682. +void __raise_softirq_irqoff(unsigned int nr)
  20683. +{
  20684. + trace_softirq_raise(nr);
  20685. + or_softirq_pending(1UL << nr);
  20686. +}
  20687. +
  20688. +static inline void local_bh_disable_nort(void) { local_bh_disable(); }
  20689. +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
  20690. +static void ksoftirqd_set_sched_params(unsigned int cpu) { }
  20691. +
  20692. +#else /* !PREEMPT_RT_FULL */
  20693. +
  20694. +/*
  20695. + * On RT we serialize softirq execution with a cpu local lock per softirq
  20696. + */
  20697. +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks);
  20698. +
  20699. +void __init softirq_early_init(void)
  20700. +{
  20701. + int i;
  20702. +
  20703. + for (i = 0; i < NR_SOFTIRQS; i++)
  20704. + local_irq_lock_init(local_softirq_locks[i]);
  20705. +}
  20706. +
  20707. +static void lock_softirq(int which)
  20708. +{
  20709. + local_lock(local_softirq_locks[which]);
  20710. +}
  20711. +
  20712. +static void unlock_softirq(int which)
  20713. +{
  20714. + local_unlock(local_softirq_locks[which]);
  20715. +}
  20716. +
  20717. +static void do_single_softirq(int which)
  20718. +{
  20719. + unsigned long old_flags = current->flags;
  20720. +
  20721. + current->flags &= ~PF_MEMALLOC;
  20722. + vtime_account_irq_enter(current);
  20723. + current->flags |= PF_IN_SOFTIRQ;
  20724. + lockdep_softirq_enter();
  20725. + local_irq_enable();
  20726. + handle_softirq(which);
  20727. + local_irq_disable();
  20728. + lockdep_softirq_exit();
  20729. + current->flags &= ~PF_IN_SOFTIRQ;
  20730. + vtime_account_irq_enter(current);
  20731. + tsk_restore_flags(current, old_flags, PF_MEMALLOC);
  20732. +}
  20733. +
  20734. +/*
  20735. + * Called with interrupts disabled. Process softirqs which were raised
  20736. + * in current context (or on behalf of ksoftirqd).
  20737. + */
  20738. +static void do_current_softirqs(void)
  20739. +{
  20740. + while (current->softirqs_raised) {
  20741. + int i = __ffs(current->softirqs_raised);
  20742. + unsigned int pending, mask = (1U << i);
  20743. +
  20744. + current->softirqs_raised &= ~mask;
  20745. + local_irq_enable();
  20746. +
  20747. + /*
  20748. + * If the lock is contended, we boost the owner to
  20749. + * process the softirq or leave the critical section
  20750. + * now.
  20751. + */
  20752. + lock_softirq(i);
  20753. + local_irq_disable();
  20754. + softirq_set_runner(i);
  20755. + /*
  20756. + * Check with the local_softirq_pending() bits,
  20757. + * whether we need to process this still or if someone
  20758. + * else took care of it.
  20759. + */
  20760. + pending = local_softirq_pending();
  20761. + if (pending & mask) {
  20762. + set_softirq_pending(pending & ~mask);
  20763. + do_single_softirq(i);
  20764. + }
  20765. + softirq_clr_runner(i);
  20766. + WARN_ON(current->softirq_nestcnt != 1);
  20767. + local_irq_enable();
  20768. + unlock_softirq(i);
  20769. + local_irq_disable();
  20770. + }
  20771. +}
  20772. +
  20773. +void __local_bh_disable(void)
  20774. +{
  20775. + if (++current->softirq_nestcnt == 1)
  20776. + migrate_disable();
  20777. +}
  20778. +EXPORT_SYMBOL(__local_bh_disable);
  20779. +
  20780. +void __local_bh_enable(void)
  20781. +{
  20782. + if (WARN_ON(current->softirq_nestcnt == 0))
  20783. + return;
  20784. +
  20785. + local_irq_disable();
  20786. + if (current->softirq_nestcnt == 1 && current->softirqs_raised)
  20787. + do_current_softirqs();
  20788. + local_irq_enable();
  20789. +
  20790. + if (--current->softirq_nestcnt == 0)
  20791. + migrate_enable();
  20792. +}
  20793. +EXPORT_SYMBOL(__local_bh_enable);
  20794. +
  20795. +void _local_bh_enable(void)
  20796. +{
  20797. + if (WARN_ON(current->softirq_nestcnt == 0))
  20798. + return;
  20799. + if (--current->softirq_nestcnt == 0)
  20800. + migrate_enable();
  20801. +}
  20802. +EXPORT_SYMBOL(_local_bh_enable);
  20803. +
  20804. +int in_serving_softirq(void)
  20805. +{
  20806. + return current->flags & PF_IN_SOFTIRQ;
  20807. +}
  20808. +EXPORT_SYMBOL(in_serving_softirq);
  20809. +
  20810. +/* Called with preemption disabled */
  20811. +static void run_ksoftirqd(unsigned int cpu)
  20812. +{
  20813. + local_irq_disable();
  20814. + current->softirq_nestcnt++;
  20815. +
  20816. + do_current_softirqs();
  20817. + current->softirq_nestcnt--;
  20818. + local_irq_enable();
  20819. + cond_resched_rcu_qs();
  20820. +}
  20821. +
  20822. +/*
  20823. + * Called from netif_rx_ni(). Preemption enabled, but migration
  20824. + * disabled. So the cpu can't go away under us.
  20825. + */
  20826. +void thread_do_softirq(void)
  20827. +{
  20828. + if (!in_serving_softirq() && current->softirqs_raised) {
  20829. + current->softirq_nestcnt++;
  20830. + do_current_softirqs();
  20831. + current->softirq_nestcnt--;
  20832. + }
  20833. +}
  20834. +
  20835. +static void do_raise_softirq_irqoff(unsigned int nr)
  20836. +{
  20837. + unsigned int mask;
  20838. +
  20839. + mask = 1UL << nr;
  20840. +
  20841. + trace_softirq_raise(nr);
  20842. + or_softirq_pending(mask);
  20843. +
  20844. + /*
  20845. + * If we are not in a hard interrupt and inside a bh disabled
  20846. + * region, we simply raise the flag on current. local_bh_enable()
  20847. + * will make sure that the softirq is executed. Otherwise we
  20848. + * delegate it to ksoftirqd.
  20849. + */
  20850. + if (!in_irq() && current->softirq_nestcnt)
  20851. + current->softirqs_raised |= mask;
  20852. + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd))
  20853. + return;
  20854. +
  20855. + if (mask & TIMER_SOFTIRQS)
  20856. + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
  20857. + else
  20858. + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
  20859. +}
  20860. +
  20861. +static void wakeup_proper_softirq(unsigned int nr)
  20862. +{
  20863. + if ((1UL << nr) & TIMER_SOFTIRQS)
  20864. + wakeup_timer_softirqd();
  20865. + else
  20866. + wakeup_softirqd();
  20867. +}
  20868. +
  20869. +
  20870. +void __raise_softirq_irqoff(unsigned int nr)
  20871. +{
  20872. + do_raise_softirq_irqoff(nr);
  20873. + if (!in_irq() && !current->softirq_nestcnt)
  20874. + wakeup_proper_softirq(nr);
  20875. +}
  20876. +
  20877. +/*
  20878. + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd
  20879. + */
  20880. +void __raise_softirq_irqoff_ksoft(unsigned int nr)
  20881. +{
  20882. + unsigned int mask;
  20883. +
  20884. + if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) ||
  20885. + !__this_cpu_read(ktimer_softirqd)))
  20886. + return;
  20887. + mask = 1UL << nr;
  20888. +
  20889. + trace_softirq_raise(nr);
  20890. + or_softirq_pending(mask);
  20891. + if (mask & TIMER_SOFTIRQS)
  20892. + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
  20893. + else
  20894. + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
  20895. + wakeup_proper_softirq(nr);
  20896. +}
  20897. +
  20898. +/*
  20899. + * This function must run with irqs disabled!
  20900. + */
  20901. +void raise_softirq_irqoff(unsigned int nr)
  20902. +{
  20903. + do_raise_softirq_irqoff(nr);
  20904. +
  20905. + /*
  20906. + * If we're in an hard interrupt we let irq return code deal
  20907. + * with the wakeup of ksoftirqd.
  20908. + */
  20909. + if (in_irq())
  20910. + return;
  20911. + /*
  20912. + * If we are in thread context but outside of a bh disabled
  20913. + * region, we need to wake ksoftirqd as well.
  20914. + *
  20915. + * CHECKME: Some of the places which do that could be wrapped
  20916. + * into local_bh_disable/enable pairs. Though it's unclear
  20917. + * whether this is worth the effort. To find those places just
  20918. + * raise a WARN() if the condition is met.
  20919. + */
  20920. + if (!current->softirq_nestcnt)
  20921. + wakeup_proper_softirq(nr);
  20922. +}
  20923. +
  20924. +static inline int ksoftirqd_softirq_pending(void)
  20925. +{
  20926. + return current->softirqs_raised;
  20927. +}
  20928. +
  20929. +static inline void local_bh_disable_nort(void) { }
  20930. +static inline void _local_bh_enable_nort(void) { }
  20931. +
  20932. +static inline void ksoftirqd_set_sched_params(unsigned int cpu)
  20933. +{
  20934. + /* Take over all but timer pending softirqs when starting */
  20935. + local_irq_disable();
  20936. + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS;
  20937. + local_irq_enable();
  20938. +}
  20939. +
  20940. +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu)
  20941. +{
  20942. + struct sched_param param = { .sched_priority = 1 };
  20943. +
  20944. + sched_setscheduler(current, SCHED_FIFO, &param);
  20945. +
  20946. + /* Take over timer pending softirqs when starting */
  20947. + local_irq_disable();
  20948. + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS;
  20949. + local_irq_enable();
  20950. +}
  20951. +
  20952. +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu,
  20953. + bool online)
  20954. +{
  20955. + struct sched_param param = { .sched_priority = 0 };
  20956. +
  20957. + sched_setscheduler(current, SCHED_NORMAL, &param);
  20958. +}
  20959. +
  20960. +static int ktimer_softirqd_should_run(unsigned int cpu)
  20961. +{
  20962. + return current->softirqs_raised;
  20963. +}
  20964. +
  20965. +#endif /* PREEMPT_RT_FULL */
  20966. +/*
  20967. * Enter an interrupt context.
  20968. */
  20969. void irq_enter(void)
  20970. @@ -330,9 +774,9 @@ void irq_enter(void)
  20971. * Prevent raise_softirq from needlessly waking up ksoftirqd
  20972. * here, as softirq will be serviced on return from interrupt.
  20973. */
  20974. - local_bh_disable();
  20975. + local_bh_disable_nort();
  20976. tick_irq_enter();
  20977. - _local_bh_enable();
  20978. + _local_bh_enable_nort();
  20979. }
  20980. __irq_enter();
  20981. @@ -340,6 +784,7 @@ void irq_enter(void)
  20982. static inline void invoke_softirq(void)
  20983. {
  20984. +#ifndef CONFIG_PREEMPT_RT_FULL
  20985. if (!force_irqthreads) {
  20986. #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
  20987. /*
  20988. @@ -359,6 +804,18 @@ static inline void invoke_softirq(void)
  20989. } else {
  20990. wakeup_softirqd();
  20991. }
  20992. +#else /* PREEMPT_RT_FULL */
  20993. + unsigned long flags;
  20994. +
  20995. + local_irq_save(flags);
  20996. + if (__this_cpu_read(ksoftirqd) &&
  20997. + __this_cpu_read(ksoftirqd)->softirqs_raised)
  20998. + wakeup_softirqd();
  20999. + if (__this_cpu_read(ktimer_softirqd) &&
  21000. + __this_cpu_read(ktimer_softirqd)->softirqs_raised)
  21001. + wakeup_timer_softirqd();
  21002. + local_irq_restore(flags);
  21003. +#endif
  21004. }
  21005. static inline void tick_irq_exit(void)
  21006. @@ -395,26 +852,6 @@ void irq_exit(void)
  21007. trace_hardirq_exit(); /* must be last! */
  21008. }
  21009. -/*
  21010. - * This function must run with irqs disabled!
  21011. - */
  21012. -inline void raise_softirq_irqoff(unsigned int nr)
  21013. -{
  21014. - __raise_softirq_irqoff(nr);
  21015. -
  21016. - /*
  21017. - * If we're in an interrupt or softirq, we're done
  21018. - * (this also catches softirq-disabled code). We will
  21019. - * actually run the softirq once we return from
  21020. - * the irq or softirq.
  21021. - *
  21022. - * Otherwise we wake up ksoftirqd to make sure we
  21023. - * schedule the softirq soon.
  21024. - */
  21025. - if (!in_interrupt())
  21026. - wakeup_softirqd();
  21027. -}
  21028. -
  21029. void raise_softirq(unsigned int nr)
  21030. {
  21031. unsigned long flags;
  21032. @@ -424,12 +861,6 @@ void raise_softirq(unsigned int nr)
  21033. local_irq_restore(flags);
  21034. }
  21035. -void __raise_softirq_irqoff(unsigned int nr)
  21036. -{
  21037. - trace_softirq_raise(nr);
  21038. - or_softirq_pending(1UL << nr);
  21039. -}
  21040. -
  21041. void open_softirq(int nr, void (*action)(struct softirq_action *))
  21042. {
  21043. softirq_vec[nr].action = action;
  21044. @@ -446,15 +877,45 @@ struct tasklet_head {
  21045. static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
  21046. static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
  21047. +static void inline
  21048. +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
  21049. +{
  21050. + if (tasklet_trylock(t)) {
  21051. +again:
  21052. + /* We may have been preempted before tasklet_trylock
  21053. + * and __tasklet_action may have already run.
  21054. + * So double check the sched bit while the takslet
  21055. + * is locked before adding it to the list.
  21056. + */
  21057. + if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
  21058. + t->next = NULL;
  21059. + *head->tail = t;
  21060. + head->tail = &(t->next);
  21061. + raise_softirq_irqoff(nr);
  21062. + tasklet_unlock(t);
  21063. + } else {
  21064. + /* This is subtle. If we hit the corner case above
  21065. + * It is possible that we get preempted right here,
  21066. + * and another task has successfully called
  21067. + * tasklet_schedule(), then this function, and
  21068. + * failed on the trylock. Thus we must be sure
  21069. + * before releasing the tasklet lock, that the
  21070. + * SCHED_BIT is clear. Otherwise the tasklet
  21071. + * may get its SCHED_BIT set, but not added to the
  21072. + * list
  21073. + */
  21074. + if (!tasklet_tryunlock(t))
  21075. + goto again;
  21076. + }
  21077. + }
  21078. +}
  21079. +
  21080. void __tasklet_schedule(struct tasklet_struct *t)
  21081. {
  21082. unsigned long flags;
  21083. local_irq_save(flags);
  21084. - t->next = NULL;
  21085. - *__this_cpu_read(tasklet_vec.tail) = t;
  21086. - __this_cpu_write(tasklet_vec.tail, &(t->next));
  21087. - raise_softirq_irqoff(TASKLET_SOFTIRQ);
  21088. + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
  21089. local_irq_restore(flags);
  21090. }
  21091. EXPORT_SYMBOL(__tasklet_schedule);
  21092. @@ -464,10 +925,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
  21093. unsigned long flags;
  21094. local_irq_save(flags);
  21095. - t->next = NULL;
  21096. - *__this_cpu_read(tasklet_hi_vec.tail) = t;
  21097. - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
  21098. - raise_softirq_irqoff(HI_SOFTIRQ);
  21099. + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
  21100. local_irq_restore(flags);
  21101. }
  21102. EXPORT_SYMBOL(__tasklet_hi_schedule);
  21103. @@ -476,82 +934,122 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
  21104. {
  21105. BUG_ON(!irqs_disabled());
  21106. - t->next = __this_cpu_read(tasklet_hi_vec.head);
  21107. - __this_cpu_write(tasklet_hi_vec.head, t);
  21108. - __raise_softirq_irqoff(HI_SOFTIRQ);
  21109. + __tasklet_hi_schedule(t);
  21110. }
  21111. EXPORT_SYMBOL(__tasklet_hi_schedule_first);
  21112. -static void tasklet_action(struct softirq_action *a)
  21113. +void tasklet_enable(struct tasklet_struct *t)
  21114. {
  21115. - struct tasklet_struct *list;
  21116. + if (!atomic_dec_and_test(&t->count))
  21117. + return;
  21118. + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
  21119. + tasklet_schedule(t);
  21120. +}
  21121. +EXPORT_SYMBOL(tasklet_enable);
  21122. - local_irq_disable();
  21123. - list = __this_cpu_read(tasklet_vec.head);
  21124. - __this_cpu_write(tasklet_vec.head, NULL);
  21125. - __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
  21126. - local_irq_enable();
  21127. +static void __tasklet_action(struct softirq_action *a,
  21128. + struct tasklet_struct *list)
  21129. +{
  21130. + int loops = 1000000;
  21131. while (list) {
  21132. struct tasklet_struct *t = list;
  21133. list = list->next;
  21134. - if (tasklet_trylock(t)) {
  21135. - if (!atomic_read(&t->count)) {
  21136. - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
  21137. - &t->state))
  21138. - BUG();
  21139. - t->func(t->data);
  21140. - tasklet_unlock(t);
  21141. - continue;
  21142. - }
  21143. - tasklet_unlock(t);
  21144. + /*
  21145. + * Should always succeed - after a tasklist got on the
  21146. + * list (after getting the SCHED bit set from 0 to 1),
  21147. + * nothing but the tasklet softirq it got queued to can
  21148. + * lock it:
  21149. + */
  21150. + if (!tasklet_trylock(t)) {
  21151. + WARN_ON(1);
  21152. + continue;
  21153. }
  21154. - local_irq_disable();
  21155. t->next = NULL;
  21156. - *__this_cpu_read(tasklet_vec.tail) = t;
  21157. - __this_cpu_write(tasklet_vec.tail, &(t->next));
  21158. - __raise_softirq_irqoff(TASKLET_SOFTIRQ);
  21159. - local_irq_enable();
  21160. +
  21161. + /*
  21162. + * If we cannot handle the tasklet because it's disabled,
  21163. + * mark it as pending. tasklet_enable() will later
  21164. + * re-schedule the tasklet.
  21165. + */
  21166. + if (unlikely(atomic_read(&t->count))) {
  21167. +out_disabled:
  21168. + /* implicit unlock: */
  21169. + wmb();
  21170. + t->state = TASKLET_STATEF_PENDING;
  21171. + continue;
  21172. + }
  21173. +
  21174. + /*
  21175. + * After this point on the tasklet might be rescheduled
  21176. + * on another CPU, but it can only be added to another
  21177. + * CPU's tasklet list if we unlock the tasklet (which we
  21178. + * dont do yet).
  21179. + */
  21180. + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
  21181. + WARN_ON(1);
  21182. +
  21183. +again:
  21184. + t->func(t->data);
  21185. +
  21186. + /*
  21187. + * Try to unlock the tasklet. We must use cmpxchg, because
  21188. + * another CPU might have scheduled or disabled the tasklet.
  21189. + * We only allow the STATE_RUN -> 0 transition here.
  21190. + */
  21191. + while (!tasklet_tryunlock(t)) {
  21192. + /*
  21193. + * If it got disabled meanwhile, bail out:
  21194. + */
  21195. + if (atomic_read(&t->count))
  21196. + goto out_disabled;
  21197. + /*
  21198. + * If it got scheduled meanwhile, re-execute
  21199. + * the tasklet function:
  21200. + */
  21201. + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
  21202. + goto again;
  21203. + if (!--loops) {
  21204. + printk("hm, tasklet state: %08lx\n", t->state);
  21205. + WARN_ON(1);
  21206. + tasklet_unlock(t);
  21207. + break;
  21208. + }
  21209. + }
  21210. }
  21211. }
  21212. +static void tasklet_action(struct softirq_action *a)
  21213. +{
  21214. + struct tasklet_struct *list;
  21215. +
  21216. + local_irq_disable();
  21217. +
  21218. + list = __this_cpu_read(tasklet_vec.head);
  21219. + __this_cpu_write(tasklet_vec.head, NULL);
  21220. + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
  21221. +
  21222. + local_irq_enable();
  21223. +
  21224. + __tasklet_action(a, list);
  21225. +}
  21226. +
  21227. static void tasklet_hi_action(struct softirq_action *a)
  21228. {
  21229. struct tasklet_struct *list;
  21230. local_irq_disable();
  21231. +
  21232. list = __this_cpu_read(tasklet_hi_vec.head);
  21233. __this_cpu_write(tasklet_hi_vec.head, NULL);
  21234. __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
  21235. - local_irq_enable();
  21236. - while (list) {
  21237. - struct tasklet_struct *t = list;
  21238. -
  21239. - list = list->next;
  21240. -
  21241. - if (tasklet_trylock(t)) {
  21242. - if (!atomic_read(&t->count)) {
  21243. - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
  21244. - &t->state))
  21245. - BUG();
  21246. - t->func(t->data);
  21247. - tasklet_unlock(t);
  21248. - continue;
  21249. - }
  21250. - tasklet_unlock(t);
  21251. - }
  21252. + local_irq_enable();
  21253. - local_irq_disable();
  21254. - t->next = NULL;
  21255. - *__this_cpu_read(tasklet_hi_vec.tail) = t;
  21256. - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
  21257. - __raise_softirq_irqoff(HI_SOFTIRQ);
  21258. - local_irq_enable();
  21259. - }
  21260. + __tasklet_action(a, list);
  21261. }
  21262. void tasklet_init(struct tasklet_struct *t,
  21263. @@ -572,7 +1070,7 @@ void tasklet_kill(struct tasklet_struct *t)
  21264. while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
  21265. do {
  21266. - yield();
  21267. + msleep(1);
  21268. } while (test_bit(TASKLET_STATE_SCHED, &t->state));
  21269. }
  21270. tasklet_unlock_wait(t);
  21271. @@ -646,25 +1144,26 @@ void __init softirq_init(void)
  21272. open_softirq(HI_SOFTIRQ, tasklet_hi_action);
  21273. }
  21274. -static int ksoftirqd_should_run(unsigned int cpu)
  21275. -{
  21276. - return local_softirq_pending();
  21277. -}
  21278. -
  21279. -static void run_ksoftirqd(unsigned int cpu)
  21280. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  21281. +void tasklet_unlock_wait(struct tasklet_struct *t)
  21282. {
  21283. - local_irq_disable();
  21284. - if (local_softirq_pending()) {
  21285. + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
  21286. /*
  21287. - * We can safely run softirq on inline stack, as we are not deep
  21288. - * in the task stack here.
  21289. + * Hack for now to avoid this busy-loop:
  21290. */
  21291. - __do_softirq();
  21292. - local_irq_enable();
  21293. - cond_resched_rcu_qs();
  21294. - return;
  21295. +#ifdef CONFIG_PREEMPT_RT_FULL
  21296. + msleep(1);
  21297. +#else
  21298. + barrier();
  21299. +#endif
  21300. }
  21301. - local_irq_enable();
  21302. +}
  21303. +EXPORT_SYMBOL(tasklet_unlock_wait);
  21304. +#endif
  21305. +
  21306. +static int ksoftirqd_should_run(unsigned int cpu)
  21307. +{
  21308. + return ksoftirqd_softirq_pending();
  21309. }
  21310. #ifdef CONFIG_HOTPLUG_CPU
  21311. @@ -746,16 +1245,31 @@ static struct notifier_block cpu_nfb = {
  21312. static struct smp_hotplug_thread softirq_threads = {
  21313. .store = &ksoftirqd,
  21314. + .setup = ksoftirqd_set_sched_params,
  21315. .thread_should_run = ksoftirqd_should_run,
  21316. .thread_fn = run_ksoftirqd,
  21317. .thread_comm = "ksoftirqd/%u",
  21318. };
  21319. +#ifdef CONFIG_PREEMPT_RT_FULL
  21320. +static struct smp_hotplug_thread softirq_timer_threads = {
  21321. + .store = &ktimer_softirqd,
  21322. + .setup = ktimer_softirqd_set_sched_params,
  21323. + .cleanup = ktimer_softirqd_clr_sched_params,
  21324. + .thread_should_run = ktimer_softirqd_should_run,
  21325. + .thread_fn = run_ksoftirqd,
  21326. + .thread_comm = "ktimersoftd/%u",
  21327. +};
  21328. +#endif
  21329. +
  21330. static __init int spawn_ksoftirqd(void)
  21331. {
  21332. register_cpu_notifier(&cpu_nfb);
  21333. BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
  21334. +#ifdef CONFIG_PREEMPT_RT_FULL
  21335. + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads));
  21336. +#endif
  21337. return 0;
  21338. }
  21339. diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
  21340. index 695f0c6cd169..d3ea2452e291 100644
  21341. --- a/kernel/stop_machine.c
  21342. +++ b/kernel/stop_machine.c
  21343. @@ -35,7 +35,7 @@ struct cpu_stop_done {
  21344. /* the actual stopper, one per every possible cpu, enabled on online cpus */
  21345. struct cpu_stopper {
  21346. - spinlock_t lock;
  21347. + raw_spinlock_t lock;
  21348. bool enabled; /* is this stopper enabled? */
  21349. struct list_head works; /* list of pending works */
  21350. };
  21351. @@ -78,7 +78,7 @@ static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
  21352. unsigned long flags;
  21353. - spin_lock_irqsave(&stopper->lock, flags);
  21354. + raw_spin_lock_irqsave(&stopper->lock, flags);
  21355. if (stopper->enabled) {
  21356. list_add_tail(&work->list, &stopper->works);
  21357. @@ -86,7 +86,7 @@ static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
  21358. } else
  21359. cpu_stop_signal_done(work->done, false);
  21360. - spin_unlock_irqrestore(&stopper->lock, flags);
  21361. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  21362. }
  21363. /**
  21364. @@ -248,7 +248,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
  21365. struct irq_cpu_stop_queue_work_info call_args;
  21366. struct multi_stop_data msdata;
  21367. - preempt_disable();
  21368. + preempt_disable_nort();
  21369. msdata = (struct multi_stop_data){
  21370. .fn = fn,
  21371. .data = arg,
  21372. @@ -281,7 +281,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
  21373. * This relies on the stopper workqueues to be FIFO.
  21374. */
  21375. if (!cpu_active(cpu1) || !cpu_active(cpu2)) {
  21376. - preempt_enable();
  21377. + preempt_enable_nort();
  21378. return -ENOENT;
  21379. }
  21380. @@ -295,7 +295,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
  21381. &irq_cpu_stop_queue_work,
  21382. &call_args, 1);
  21383. lg_local_unlock(&stop_cpus_lock);
  21384. - preempt_enable();
  21385. + preempt_enable_nort();
  21386. wait_for_completion(&done.completion);
  21387. @@ -329,7 +329,7 @@ static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
  21388. static void queue_stop_cpus_work(const struct cpumask *cpumask,
  21389. cpu_stop_fn_t fn, void *arg,
  21390. - struct cpu_stop_done *done)
  21391. + struct cpu_stop_done *done, bool inactive)
  21392. {
  21393. struct cpu_stop_work *work;
  21394. unsigned int cpu;
  21395. @@ -343,11 +343,13 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
  21396. }
  21397. /*
  21398. - * Disable preemption while queueing to avoid getting
  21399. - * preempted by a stopper which might wait for other stoppers
  21400. - * to enter @fn which can lead to deadlock.
  21401. + * Make sure that all work is queued on all cpus before
  21402. + * any of the cpus can execute it.
  21403. */
  21404. - lg_global_lock(&stop_cpus_lock);
  21405. + if (!inactive)
  21406. + lg_global_lock(&stop_cpus_lock);
  21407. + else
  21408. + lg_global_trylock_relax(&stop_cpus_lock);
  21409. for_each_cpu(cpu, cpumask)
  21410. cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
  21411. lg_global_unlock(&stop_cpus_lock);
  21412. @@ -359,7 +361,7 @@ static int __stop_cpus(const struct cpumask *cpumask,
  21413. struct cpu_stop_done done;
  21414. cpu_stop_init_done(&done, cpumask_weight(cpumask));
  21415. - queue_stop_cpus_work(cpumask, fn, arg, &done);
  21416. + queue_stop_cpus_work(cpumask, fn, arg, &done, false);
  21417. wait_for_completion(&done.completion);
  21418. return done.executed ? done.ret : -ENOENT;
  21419. }
  21420. @@ -439,9 +441,9 @@ static int cpu_stop_should_run(unsigned int cpu)
  21421. unsigned long flags;
  21422. int run;
  21423. - spin_lock_irqsave(&stopper->lock, flags);
  21424. + raw_spin_lock_irqsave(&stopper->lock, flags);
  21425. run = !list_empty(&stopper->works);
  21426. - spin_unlock_irqrestore(&stopper->lock, flags);
  21427. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  21428. return run;
  21429. }
  21430. @@ -453,13 +455,13 @@ static void cpu_stopper_thread(unsigned int cpu)
  21431. repeat:
  21432. work = NULL;
  21433. - spin_lock_irq(&stopper->lock);
  21434. + raw_spin_lock_irq(&stopper->lock);
  21435. if (!list_empty(&stopper->works)) {
  21436. work = list_first_entry(&stopper->works,
  21437. struct cpu_stop_work, list);
  21438. list_del_init(&work->list);
  21439. }
  21440. - spin_unlock_irq(&stopper->lock);
  21441. + raw_spin_unlock_irq(&stopper->lock);
  21442. if (work) {
  21443. cpu_stop_fn_t fn = work->fn;
  21444. @@ -467,6 +469,16 @@ repeat:
  21445. struct cpu_stop_done *done = work->done;
  21446. char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
  21447. + /*
  21448. + * Wait until the stopper finished scheduling on all
  21449. + * cpus
  21450. + */
  21451. + lg_global_lock(&stop_cpus_lock);
  21452. + /*
  21453. + * Let other cpu threads continue as well
  21454. + */
  21455. + lg_global_unlock(&stop_cpus_lock);
  21456. +
  21457. /* cpu stop callbacks are not allowed to sleep */
  21458. preempt_disable();
  21459. @@ -500,20 +512,20 @@ static void cpu_stop_park(unsigned int cpu)
  21460. unsigned long flags;
  21461. /* drain remaining works */
  21462. - spin_lock_irqsave(&stopper->lock, flags);
  21463. + raw_spin_lock_irqsave(&stopper->lock, flags);
  21464. list_for_each_entry(work, &stopper->works, list)
  21465. cpu_stop_signal_done(work->done, false);
  21466. stopper->enabled = false;
  21467. - spin_unlock_irqrestore(&stopper->lock, flags);
  21468. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  21469. }
  21470. static void cpu_stop_unpark(unsigned int cpu)
  21471. {
  21472. struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
  21473. - spin_lock_irq(&stopper->lock);
  21474. + raw_spin_lock_irq(&stopper->lock);
  21475. stopper->enabled = true;
  21476. - spin_unlock_irq(&stopper->lock);
  21477. + raw_spin_unlock_irq(&stopper->lock);
  21478. }
  21479. static struct smp_hotplug_thread cpu_stop_threads = {
  21480. @@ -535,10 +547,12 @@ static int __init cpu_stop_init(void)
  21481. for_each_possible_cpu(cpu) {
  21482. struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
  21483. - spin_lock_init(&stopper->lock);
  21484. + raw_spin_lock_init(&stopper->lock);
  21485. INIT_LIST_HEAD(&stopper->works);
  21486. }
  21487. + lg_lock_init(&stop_cpus_lock, "stop_cpus_lock");
  21488. +
  21489. BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
  21490. stop_machine_initialized = true;
  21491. return 0;
  21492. @@ -634,7 +648,7 @@ int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
  21493. set_state(&msdata, MULTI_STOP_PREPARE);
  21494. cpu_stop_init_done(&done, num_active_cpus());
  21495. queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
  21496. - &done);
  21497. + &done, true);
  21498. ret = multi_cpu_stop(&msdata);
  21499. /* Busy wait for completion. */
  21500. diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
  21501. index 93ef7190bdea..2c6be169bdc7 100644
  21502. --- a/kernel/time/hrtimer.c
  21503. +++ b/kernel/time/hrtimer.c
  21504. @@ -48,11 +48,13 @@
  21505. #include <linux/sched/rt.h>
  21506. #include <linux/sched/deadline.h>
  21507. #include <linux/timer.h>
  21508. +#include <linux/kthread.h>
  21509. #include <linux/freezer.h>
  21510. #include <asm/uaccess.h>
  21511. #include <trace/events/timer.h>
  21512. +#include <trace/events/hist.h>
  21513. #include "tick-internal.h"
  21514. @@ -576,8 +578,7 @@ static int hrtimer_reprogram(struct hrtimer *timer,
  21515. * When the callback is running, we do not reprogram the clock event
  21516. * device. The timer callback is either running on a different CPU or
  21517. * the callback is executed in the hrtimer_interrupt context. The
  21518. - * reprogramming is handled either by the softirq, which called the
  21519. - * callback or at the end of the hrtimer_interrupt.
  21520. + * reprogramming is handled at the end of the hrtimer_interrupt.
  21521. */
  21522. if (hrtimer_callback_running(timer))
  21523. return 0;
  21524. @@ -621,6 +622,9 @@ static int hrtimer_reprogram(struct hrtimer *timer,
  21525. return res;
  21526. }
  21527. +static void __run_hrtimer(struct hrtimer *timer, ktime_t *now);
  21528. +static int hrtimer_rt_defer(struct hrtimer *timer);
  21529. +
  21530. /*
  21531. * Initialize the high resolution related parts of cpu_base
  21532. */
  21533. @@ -630,6 +634,21 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
  21534. base->hres_active = 0;
  21535. }
  21536. +static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
  21537. + struct hrtimer_clock_base *base,
  21538. + int wakeup)
  21539. +{
  21540. + if (!hrtimer_reprogram(timer, base))
  21541. + return 0;
  21542. + if (!wakeup)
  21543. + return -ETIME;
  21544. +#ifdef CONFIG_PREEMPT_RT_BASE
  21545. + if (!hrtimer_rt_defer(timer))
  21546. + return -ETIME;
  21547. +#endif
  21548. + return 1;
  21549. +}
  21550. +
  21551. static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
  21552. {
  21553. ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
  21554. @@ -695,6 +714,44 @@ static void clock_was_set_work(struct work_struct *work)
  21555. static DECLARE_WORK(hrtimer_work, clock_was_set_work);
  21556. +#ifdef CONFIG_PREEMPT_RT_FULL
  21557. +/*
  21558. + * RT can not call schedule_work from real interrupt context.
  21559. + * Need to make a thread to do the real work.
  21560. + */
  21561. +static struct task_struct *clock_set_delay_thread;
  21562. +static bool do_clock_set_delay;
  21563. +
  21564. +static int run_clock_set_delay(void *ignore)
  21565. +{
  21566. + while (!kthread_should_stop()) {
  21567. + set_current_state(TASK_INTERRUPTIBLE);
  21568. + if (do_clock_set_delay) {
  21569. + do_clock_set_delay = false;
  21570. + schedule_work(&hrtimer_work);
  21571. + }
  21572. + schedule();
  21573. + }
  21574. + __set_current_state(TASK_RUNNING);
  21575. + return 0;
  21576. +}
  21577. +
  21578. +void clock_was_set_delayed(void)
  21579. +{
  21580. + do_clock_set_delay = true;
  21581. + /* Make visible before waking up process */
  21582. + smp_wmb();
  21583. + wake_up_process(clock_set_delay_thread);
  21584. +}
  21585. +
  21586. +static __init int create_clock_set_delay_thread(void)
  21587. +{
  21588. + clock_set_delay_thread = kthread_run(run_clock_set_delay, NULL, "kclksetdelayd");
  21589. + BUG_ON(!clock_set_delay_thread);
  21590. + return 0;
  21591. +}
  21592. +early_initcall(create_clock_set_delay_thread);
  21593. +#else /* PREEMPT_RT_FULL */
  21594. /*
  21595. * Called from timekeeping and resume code to reprogramm the hrtimer
  21596. * interrupt device on all cpus.
  21597. @@ -703,6 +760,7 @@ void clock_was_set_delayed(void)
  21598. {
  21599. schedule_work(&hrtimer_work);
  21600. }
  21601. +#endif
  21602. #else
  21603. @@ -711,6 +769,13 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; }
  21604. static inline int hrtimer_switch_to_hres(void) { return 0; }
  21605. static inline void
  21606. hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
  21607. +static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
  21608. + struct hrtimer_clock_base *base,
  21609. + int wakeup)
  21610. +{
  21611. + return 0;
  21612. +}
  21613. +
  21614. static inline int hrtimer_reprogram(struct hrtimer *timer,
  21615. struct hrtimer_clock_base *base)
  21616. {
  21617. @@ -718,7 +783,6 @@ static inline int hrtimer_reprogram(struct hrtimer *timer,
  21618. }
  21619. static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
  21620. static inline void retrigger_next_event(void *arg) { }
  21621. -
  21622. #endif /* CONFIG_HIGH_RES_TIMERS */
  21623. /*
  21624. @@ -836,6 +900,32 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
  21625. }
  21626. EXPORT_SYMBOL_GPL(hrtimer_forward);
  21627. +#ifdef CONFIG_PREEMPT_RT_BASE
  21628. +# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
  21629. +
  21630. +/**
  21631. + * hrtimer_wait_for_timer - Wait for a running timer
  21632. + *
  21633. + * @timer: timer to wait for
  21634. + *
  21635. + * The function waits in case the timers callback function is
  21636. + * currently executed on the waitqueue of the timer base. The
  21637. + * waitqueue is woken up after the timer callback function has
  21638. + * finished execution.
  21639. + */
  21640. +void hrtimer_wait_for_timer(const struct hrtimer *timer)
  21641. +{
  21642. + struct hrtimer_clock_base *base = timer->base;
  21643. +
  21644. + if (base && base->cpu_base && !timer->irqsafe)
  21645. + wait_event(base->cpu_base->wait,
  21646. + !(timer->state & HRTIMER_STATE_CALLBACK));
  21647. +}
  21648. +
  21649. +#else
  21650. +# define wake_up_timer_waiters(b) do { } while (0)
  21651. +#endif
  21652. +
  21653. /*
  21654. * enqueue_hrtimer - internal function to (re)start a timer
  21655. *
  21656. @@ -879,6 +969,11 @@ static void __remove_hrtimer(struct hrtimer *timer,
  21657. if (!(timer->state & HRTIMER_STATE_ENQUEUED))
  21658. goto out;
  21659. + if (unlikely(!list_empty(&timer->cb_entry))) {
  21660. + list_del_init(&timer->cb_entry);
  21661. + goto out;
  21662. + }
  21663. +
  21664. next_timer = timerqueue_getnext(&base->active);
  21665. timerqueue_del(&base->active, &timer->node);
  21666. if (&timer->node == next_timer) {
  21667. @@ -966,7 +1061,16 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
  21668. new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
  21669. timer_stats_hrtimer_set_start_info(timer);
  21670. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  21671. + {
  21672. + ktime_t now = new_base->get_time();
  21673. + if (ktime_to_ns(tim) < ktime_to_ns(now))
  21674. + timer->praecox = now;
  21675. + else
  21676. + timer->praecox = ktime_set(0, 0);
  21677. + }
  21678. +#endif
  21679. leftmost = enqueue_hrtimer(timer, new_base);
  21680. if (!leftmost) {
  21681. @@ -980,15 +1084,26 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
  21682. * on dynticks target.
  21683. */
  21684. wake_up_nohz_cpu(new_base->cpu_base->cpu);
  21685. - } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases) &&
  21686. - hrtimer_reprogram(timer, new_base)) {
  21687. + } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases)) {
  21688. +
  21689. + ret = hrtimer_enqueue_reprogram(timer, new_base, wakeup);
  21690. + if (ret < 0) {
  21691. + /*
  21692. + * In case we failed to reprogram the timer (mostly
  21693. + * because out current timer is already elapsed),
  21694. + * remove it again and report a failure. This avoids
  21695. + * stale base->first entries.
  21696. + */
  21697. + debug_deactivate(timer);
  21698. + __remove_hrtimer(timer, new_base,
  21699. + timer->state & HRTIMER_STATE_CALLBACK, 0);
  21700. + } else if (ret > 0) {
  21701. /*
  21702. * Only allow reprogramming if the new base is on this CPU.
  21703. * (it might still be on another CPU if the timer was pending)
  21704. *
  21705. * XXX send_remote_softirq() ?
  21706. */
  21707. - if (wakeup) {
  21708. /*
  21709. * We need to drop cpu_base->lock to avoid a
  21710. * lock ordering issue vs. rq->lock.
  21711. @@ -996,9 +1111,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
  21712. raw_spin_unlock(&new_base->cpu_base->lock);
  21713. raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  21714. local_irq_restore(flags);
  21715. - return ret;
  21716. - } else {
  21717. - __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  21718. + return 0;
  21719. }
  21720. }
  21721. @@ -1089,7 +1202,7 @@ int hrtimer_cancel(struct hrtimer *timer)
  21722. if (ret >= 0)
  21723. return ret;
  21724. - cpu_relax();
  21725. + hrtimer_wait_for_timer(timer);
  21726. }
  21727. }
  21728. EXPORT_SYMBOL_GPL(hrtimer_cancel);
  21729. @@ -1153,6 +1266,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
  21730. base = hrtimer_clockid_to_base(clock_id);
  21731. timer->base = &cpu_base->clock_base[base];
  21732. + INIT_LIST_HEAD(&timer->cb_entry);
  21733. timerqueue_init(&timer->node);
  21734. #ifdef CONFIG_TIMER_STATS
  21735. @@ -1236,6 +1350,126 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
  21736. timer->state &= ~HRTIMER_STATE_CALLBACK;
  21737. }
  21738. +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
  21739. +
  21740. +#ifdef CONFIG_PREEMPT_RT_BASE
  21741. +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
  21742. + struct hrtimer_clock_base *base)
  21743. +{
  21744. + /*
  21745. + * Note, we clear the callback flag before we requeue the
  21746. + * timer otherwise we trigger the callback_running() check
  21747. + * in hrtimer_reprogram().
  21748. + */
  21749. + timer->state &= ~HRTIMER_STATE_CALLBACK;
  21750. +
  21751. + if (restart != HRTIMER_NORESTART) {
  21752. + BUG_ON(hrtimer_active(timer));
  21753. + /*
  21754. + * Enqueue the timer, if it's the leftmost timer then
  21755. + * we need to reprogram it.
  21756. + */
  21757. + if (!enqueue_hrtimer(timer, base))
  21758. + return;
  21759. +
  21760. +#ifndef CONFIG_HIGH_RES_TIMERS
  21761. + }
  21762. +#else
  21763. + if (base->cpu_base->hres_active &&
  21764. + hrtimer_reprogram(timer, base))
  21765. + goto requeue;
  21766. +
  21767. + } else if (hrtimer_active(timer)) {
  21768. + /*
  21769. + * If the timer was rearmed on another CPU, reprogram
  21770. + * the event device.
  21771. + */
  21772. + if (&timer->node == base->active.next &&
  21773. + base->cpu_base->hres_active &&
  21774. + hrtimer_reprogram(timer, base))
  21775. + goto requeue;
  21776. + }
  21777. + return;
  21778. +
  21779. +requeue:
  21780. + /*
  21781. + * Timer is expired. Thus move it from tree to pending list
  21782. + * again.
  21783. + */
  21784. + __remove_hrtimer(timer, base, timer->state, 0);
  21785. + list_add_tail(&timer->cb_entry, &base->expired);
  21786. +#endif
  21787. +}
  21788. +
  21789. +/*
  21790. + * The changes in mainline which removed the callback modes from
  21791. + * hrtimer are not yet working with -rt. The non wakeup_process()
  21792. + * based callbacks which involve sleeping locks need to be treated
  21793. + * seperately.
  21794. + */
  21795. +static void hrtimer_rt_run_pending(void)
  21796. +{
  21797. + enum hrtimer_restart (*fn)(struct hrtimer *);
  21798. + struct hrtimer_cpu_base *cpu_base;
  21799. + struct hrtimer_clock_base *base;
  21800. + struct hrtimer *timer;
  21801. + int index, restart;
  21802. +
  21803. + local_irq_disable();
  21804. + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
  21805. +
  21806. + raw_spin_lock(&cpu_base->lock);
  21807. +
  21808. + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
  21809. + base = &cpu_base->clock_base[index];
  21810. +
  21811. + while (!list_empty(&base->expired)) {
  21812. + timer = list_first_entry(&base->expired,
  21813. + struct hrtimer, cb_entry);
  21814. +
  21815. + /*
  21816. + * Same as the above __run_hrtimer function
  21817. + * just we run with interrupts enabled.
  21818. + */
  21819. + debug_hrtimer_deactivate(timer);
  21820. + __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
  21821. + timer_stats_account_hrtimer(timer);
  21822. + fn = timer->function;
  21823. +
  21824. + raw_spin_unlock_irq(&cpu_base->lock);
  21825. + restart = fn(timer);
  21826. + raw_spin_lock_irq(&cpu_base->lock);
  21827. +
  21828. + hrtimer_rt_reprogram(restart, timer, base);
  21829. + }
  21830. + }
  21831. +
  21832. + raw_spin_unlock_irq(&cpu_base->lock);
  21833. +
  21834. + wake_up_timer_waiters(cpu_base);
  21835. +}
  21836. +
  21837. +static int hrtimer_rt_defer(struct hrtimer *timer)
  21838. +{
  21839. + if (timer->irqsafe)
  21840. + return 0;
  21841. +
  21842. + __remove_hrtimer(timer, timer->base, timer->state, 0);
  21843. + list_add_tail(&timer->cb_entry, &timer->base->expired);
  21844. + return 1;
  21845. +}
  21846. +
  21847. +#else
  21848. +
  21849. +static inline void hrtimer_rt_run_pending(void)
  21850. +{
  21851. + hrtimer_peek_ahead_timers();
  21852. +}
  21853. +
  21854. +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
  21855. +
  21856. +#endif
  21857. +
  21858. #ifdef CONFIG_HIGH_RES_TIMERS
  21859. /*
  21860. @@ -1246,7 +1480,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
  21861. {
  21862. struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
  21863. ktime_t expires_next, now, entry_time, delta;
  21864. - int i, retries = 0;
  21865. + int i, retries = 0, raise = 0;
  21866. BUG_ON(!cpu_base->hres_active);
  21867. cpu_base->nr_events++;
  21868. @@ -1281,6 +1515,15 @@ retry:
  21869. timer = container_of(node, struct hrtimer, node);
  21870. + trace_hrtimer_interrupt(raw_smp_processor_id(),
  21871. + ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ?
  21872. + timer->praecox : hrtimer_get_expires(timer),
  21873. + basenow)),
  21874. + current,
  21875. + timer->function == hrtimer_wakeup ?
  21876. + container_of(timer, struct hrtimer_sleeper,
  21877. + timer)->task : NULL);
  21878. +
  21879. /*
  21880. * The immediate goal for using the softexpires is
  21881. * minimizing wakeups, not running timers at the
  21882. @@ -1296,7 +1539,10 @@ retry:
  21883. if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
  21884. break;
  21885. - __run_hrtimer(timer, &basenow);
  21886. + if (!hrtimer_rt_defer(timer))
  21887. + __run_hrtimer(timer, &basenow);
  21888. + else
  21889. + raise = 1;
  21890. }
  21891. }
  21892. /* Reevaluate the clock bases for the next expiry */
  21893. @@ -1313,7 +1559,7 @@ retry:
  21894. if (expires_next.tv64 == KTIME_MAX ||
  21895. !tick_program_event(expires_next, 0)) {
  21896. cpu_base->hang_detected = 0;
  21897. - return;
  21898. + goto out;
  21899. }
  21900. /*
  21901. @@ -1357,6 +1603,9 @@ retry:
  21902. tick_program_event(expires_next, 1);
  21903. printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
  21904. ktime_to_ns(delta));
  21905. +out:
  21906. + if (raise)
  21907. + raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  21908. }
  21909. /*
  21910. @@ -1392,18 +1641,18 @@ void hrtimer_peek_ahead_timers(void)
  21911. __hrtimer_peek_ahead_timers();
  21912. local_irq_restore(flags);
  21913. }
  21914. -
  21915. -static void run_hrtimer_softirq(struct softirq_action *h)
  21916. -{
  21917. - hrtimer_peek_ahead_timers();
  21918. -}
  21919. -
  21920. #else /* CONFIG_HIGH_RES_TIMERS */
  21921. static inline void __hrtimer_peek_ahead_timers(void) { }
  21922. #endif /* !CONFIG_HIGH_RES_TIMERS */
  21923. +
  21924. +static void run_hrtimer_softirq(struct softirq_action *h)
  21925. +{
  21926. + hrtimer_rt_run_pending();
  21927. +}
  21928. +
  21929. /*
  21930. * Called from timer softirq every jiffy, expire hrtimers:
  21931. *
  21932. @@ -1436,7 +1685,7 @@ void hrtimer_run_queues(void)
  21933. struct timerqueue_node *node;
  21934. struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
  21935. struct hrtimer_clock_base *base;
  21936. - int index, gettime = 1;
  21937. + int index, gettime = 1, raise = 0;
  21938. if (hrtimer_hres_active())
  21939. return;
  21940. @@ -1461,10 +1710,16 @@ void hrtimer_run_queues(void)
  21941. hrtimer_get_expires_tv64(timer))
  21942. break;
  21943. - __run_hrtimer(timer, &base->softirq_time);
  21944. + if (!hrtimer_rt_defer(timer))
  21945. + __run_hrtimer(timer, &base->softirq_time);
  21946. + else
  21947. + raise = 1;
  21948. }
  21949. raw_spin_unlock(&cpu_base->lock);
  21950. }
  21951. +
  21952. + if (raise)
  21953. + raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  21954. }
  21955. /*
  21956. @@ -1486,16 +1741,18 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
  21957. void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
  21958. {
  21959. sl->timer.function = hrtimer_wakeup;
  21960. + sl->timer.irqsafe = 1;
  21961. sl->task = task;
  21962. }
  21963. EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
  21964. -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
  21965. +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
  21966. + unsigned long state)
  21967. {
  21968. hrtimer_init_sleeper(t, current);
  21969. do {
  21970. - set_current_state(TASK_INTERRUPTIBLE);
  21971. + set_current_state(state);
  21972. hrtimer_start_expires(&t->timer, mode);
  21973. if (!hrtimer_active(&t->timer))
  21974. t->task = NULL;
  21975. @@ -1539,7 +1796,8 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
  21976. HRTIMER_MODE_ABS);
  21977. hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
  21978. - if (do_nanosleep(&t, HRTIMER_MODE_ABS))
  21979. + /* cpu_chill() does not care about restart state. */
  21980. + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE))
  21981. goto out;
  21982. rmtp = restart->nanosleep.rmtp;
  21983. @@ -1556,8 +1814,10 @@ out:
  21984. return ret;
  21985. }
  21986. -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  21987. - const enum hrtimer_mode mode, const clockid_t clockid)
  21988. +static long
  21989. +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  21990. + const enum hrtimer_mode mode, const clockid_t clockid,
  21991. + unsigned long state)
  21992. {
  21993. struct restart_block *restart;
  21994. struct hrtimer_sleeper t;
  21995. @@ -1570,7 +1830,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  21996. hrtimer_init_on_stack(&t.timer, clockid, mode);
  21997. hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
  21998. - if (do_nanosleep(&t, mode))
  21999. + if (do_nanosleep(&t, mode, state))
  22000. goto out;
  22001. /* Absolute timers do not update the rmtp value and restart: */
  22002. @@ -1597,6 +1857,12 @@ out:
  22003. return ret;
  22004. }
  22005. +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  22006. + const enum hrtimer_mode mode, const clockid_t clockid)
  22007. +{
  22008. + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE);
  22009. +}
  22010. +
  22011. SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
  22012. struct timespec __user *, rmtp)
  22013. {
  22014. @@ -1611,6 +1877,26 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
  22015. return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
  22016. }
  22017. +#ifdef CONFIG_PREEMPT_RT_FULL
  22018. +/*
  22019. + * Sleep for 1 ms in hope whoever holds what we want will let it go.
  22020. + */
  22021. +void cpu_chill(void)
  22022. +{
  22023. + struct timespec tu = {
  22024. + .tv_nsec = NSEC_PER_MSEC,
  22025. + };
  22026. + unsigned int freeze_flag = current->flags & PF_NOFREEZE;
  22027. +
  22028. + current->flags |= PF_NOFREEZE;
  22029. + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
  22030. + TASK_UNINTERRUPTIBLE);
  22031. + if (!freeze_flag)
  22032. + current->flags &= ~PF_NOFREEZE;
  22033. +}
  22034. +EXPORT_SYMBOL(cpu_chill);
  22035. +#endif
  22036. +
  22037. /*
  22038. * Functions related to boot-time initialization:
  22039. */
  22040. @@ -1622,10 +1908,14 @@ static void init_hrtimers_cpu(int cpu)
  22041. for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
  22042. cpu_base->clock_base[i].cpu_base = cpu_base;
  22043. timerqueue_init_head(&cpu_base->clock_base[i].active);
  22044. + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
  22045. }
  22046. cpu_base->cpu = cpu;
  22047. hrtimer_init_hres(cpu_base);
  22048. +#ifdef CONFIG_PREEMPT_RT_BASE
  22049. + init_waitqueue_head(&cpu_base->wait);
  22050. +#endif
  22051. }
  22052. #ifdef CONFIG_HOTPLUG_CPU
  22053. @@ -1731,9 +2021,7 @@ void __init hrtimers_init(void)
  22054. hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
  22055. (void *)(long)smp_processor_id());
  22056. register_cpu_notifier(&hrtimers_nb);
  22057. -#ifdef CONFIG_HIGH_RES_TIMERS
  22058. open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
  22059. -#endif
  22060. }
  22061. /**
  22062. diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
  22063. index 8d262b467573..d0513909d663 100644
  22064. --- a/kernel/time/itimer.c
  22065. +++ b/kernel/time/itimer.c
  22066. @@ -213,6 +213,7 @@ again:
  22067. /* We are sharing ->siglock with it_real_fn() */
  22068. if (hrtimer_try_to_cancel(timer) < 0) {
  22069. spin_unlock_irq(&tsk->sighand->siglock);
  22070. + hrtimer_wait_for_timer(&tsk->signal->real_timer);
  22071. goto again;
  22072. }
  22073. expires = timeval_to_ktime(value->it_value);
  22074. diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
  22075. index 347fecf86a3f..2ede47408a3e 100644
  22076. --- a/kernel/time/jiffies.c
  22077. +++ b/kernel/time/jiffies.c
  22078. @@ -74,7 +74,8 @@ static struct clocksource clocksource_jiffies = {
  22079. .max_cycles = 10,
  22080. };
  22081. -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
  22082. +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
  22083. +__cacheline_aligned_in_smp seqcount_t jiffies_seq;
  22084. #if (BITS_PER_LONG < 64)
  22085. u64 get_jiffies_64(void)
  22086. @@ -83,9 +84,9 @@ u64 get_jiffies_64(void)
  22087. u64 ret;
  22088. do {
  22089. - seq = read_seqbegin(&jiffies_lock);
  22090. + seq = read_seqcount_begin(&jiffies_seq);
  22091. ret = jiffies_64;
  22092. - } while (read_seqretry(&jiffies_lock, seq));
  22093. + } while (read_seqcount_retry(&jiffies_seq, seq));
  22094. return ret;
  22095. }
  22096. EXPORT_SYMBOL(get_jiffies_64);
  22097. diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
  22098. index 7a681003001c..bd9c53985d32 100644
  22099. --- a/kernel/time/ntp.c
  22100. +++ b/kernel/time/ntp.c
  22101. @@ -10,6 +10,7 @@
  22102. #include <linux/workqueue.h>
  22103. #include <linux/hrtimer.h>
  22104. #include <linux/jiffies.h>
  22105. +#include <linux/kthread.h>
  22106. #include <linux/math64.h>
  22107. #include <linux/timex.h>
  22108. #include <linux/time.h>
  22109. @@ -529,10 +530,52 @@ static void sync_cmos_clock(struct work_struct *work)
  22110. &sync_cmos_work, timespec_to_jiffies(&next));
  22111. }
  22112. +#ifdef CONFIG_PREEMPT_RT_FULL
  22113. +/*
  22114. + * RT can not call schedule_delayed_work from real interrupt context.
  22115. + * Need to make a thread to do the real work.
  22116. + */
  22117. +static struct task_struct *cmos_delay_thread;
  22118. +static bool do_cmos_delay;
  22119. +
  22120. +static int run_cmos_delay(void *ignore)
  22121. +{
  22122. + while (!kthread_should_stop()) {
  22123. + set_current_state(TASK_INTERRUPTIBLE);
  22124. + if (do_cmos_delay) {
  22125. + do_cmos_delay = false;
  22126. + queue_delayed_work(system_power_efficient_wq,
  22127. + &sync_cmos_work, 0);
  22128. + }
  22129. + schedule();
  22130. + }
  22131. + __set_current_state(TASK_RUNNING);
  22132. + return 0;
  22133. +}
  22134. +
  22135. +void ntp_notify_cmos_timer(void)
  22136. +{
  22137. + do_cmos_delay = true;
  22138. + /* Make visible before waking up process */
  22139. + smp_wmb();
  22140. + wake_up_process(cmos_delay_thread);
  22141. +}
  22142. +
  22143. +static __init int create_cmos_delay_thread(void)
  22144. +{
  22145. + cmos_delay_thread = kthread_run(run_cmos_delay, NULL, "kcmosdelayd");
  22146. + BUG_ON(!cmos_delay_thread);
  22147. + return 0;
  22148. +}
  22149. +early_initcall(create_cmos_delay_thread);
  22150. +
  22151. +#else
  22152. +
  22153. void ntp_notify_cmos_timer(void)
  22154. {
  22155. queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
  22156. }
  22157. +#endif /* CONFIG_PREEMPT_RT_FULL */
  22158. #else
  22159. void ntp_notify_cmos_timer(void) { }
  22160. diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
  22161. index 57d1acb91c56..5b24aefef595 100644
  22162. --- a/kernel/time/posix-cpu-timers.c
  22163. +++ b/kernel/time/posix-cpu-timers.c
  22164. @@ -3,6 +3,7 @@
  22165. */
  22166. #include <linux/sched.h>
  22167. +#include <linux/sched/rt.h>
  22168. #include <linux/posix-timers.h>
  22169. #include <linux/errno.h>
  22170. #include <linux/math64.h>
  22171. @@ -626,7 +627,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
  22172. /*
  22173. * Disarm any old timer after extracting its expiry time.
  22174. */
  22175. - WARN_ON_ONCE(!irqs_disabled());
  22176. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  22177. ret = 0;
  22178. old_incr = timer->it.cpu.incr;
  22179. @@ -1048,7 +1049,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
  22180. /*
  22181. * Now re-arm for the new expiry time.
  22182. */
  22183. - WARN_ON_ONCE(!irqs_disabled());
  22184. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  22185. arm_timer(timer);
  22186. unlock_task_sighand(p, &flags);
  22187. @@ -1114,10 +1115,11 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
  22188. sig = tsk->signal;
  22189. if (sig->cputimer.running) {
  22190. struct task_cputime group_sample;
  22191. + unsigned long flags;
  22192. - raw_spin_lock(&sig->cputimer.lock);
  22193. + raw_spin_lock_irqsave(&sig->cputimer.lock, flags);
  22194. group_sample = sig->cputimer.cputime;
  22195. - raw_spin_unlock(&sig->cputimer.lock);
  22196. + raw_spin_unlock_irqrestore(&sig->cputimer.lock, flags);
  22197. if (task_cputime_expired(&group_sample, &sig->cputime_expires))
  22198. return 1;
  22199. @@ -1131,13 +1133,13 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
  22200. * already updated our counts. We need to check if any timers fire now.
  22201. * Interrupts are disabled.
  22202. */
  22203. -void run_posix_cpu_timers(struct task_struct *tsk)
  22204. +static void __run_posix_cpu_timers(struct task_struct *tsk)
  22205. {
  22206. LIST_HEAD(firing);
  22207. struct k_itimer *timer, *next;
  22208. unsigned long flags;
  22209. - WARN_ON_ONCE(!irqs_disabled());
  22210. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  22211. /*
  22212. * The fast path checks that there are no expired thread or thread
  22213. @@ -1195,6 +1197,190 @@ void run_posix_cpu_timers(struct task_struct *tsk)
  22214. }
  22215. }
  22216. +#ifdef CONFIG_PREEMPT_RT_BASE
  22217. +#include <linux/kthread.h>
  22218. +#include <linux/cpu.h>
  22219. +DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
  22220. +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
  22221. +
  22222. +static int posix_cpu_timers_thread(void *data)
  22223. +{
  22224. + int cpu = (long)data;
  22225. +
  22226. + BUG_ON(per_cpu(posix_timer_task,cpu) != current);
  22227. +
  22228. + while (!kthread_should_stop()) {
  22229. + struct task_struct *tsk = NULL;
  22230. + struct task_struct *next = NULL;
  22231. +
  22232. + if (cpu_is_offline(cpu))
  22233. + goto wait_to_die;
  22234. +
  22235. + /* grab task list */
  22236. + raw_local_irq_disable();
  22237. + tsk = per_cpu(posix_timer_tasklist, cpu);
  22238. + per_cpu(posix_timer_tasklist, cpu) = NULL;
  22239. + raw_local_irq_enable();
  22240. +
  22241. + /* its possible the list is empty, just return */
  22242. + if (!tsk) {
  22243. + set_current_state(TASK_INTERRUPTIBLE);
  22244. + schedule();
  22245. + __set_current_state(TASK_RUNNING);
  22246. + continue;
  22247. + }
  22248. +
  22249. + /* Process task list */
  22250. + while (1) {
  22251. + /* save next */
  22252. + next = tsk->posix_timer_list;
  22253. +
  22254. + /* run the task timers, clear its ptr and
  22255. + * unreference it
  22256. + */
  22257. + __run_posix_cpu_timers(tsk);
  22258. + tsk->posix_timer_list = NULL;
  22259. + put_task_struct(tsk);
  22260. +
  22261. + /* check if this is the last on the list */
  22262. + if (next == tsk)
  22263. + break;
  22264. + tsk = next;
  22265. + }
  22266. + }
  22267. + return 0;
  22268. +
  22269. +wait_to_die:
  22270. + /* Wait for kthread_stop */
  22271. + set_current_state(TASK_INTERRUPTIBLE);
  22272. + while (!kthread_should_stop()) {
  22273. + schedule();
  22274. + set_current_state(TASK_INTERRUPTIBLE);
  22275. + }
  22276. + __set_current_state(TASK_RUNNING);
  22277. + return 0;
  22278. +}
  22279. +
  22280. +static inline int __fastpath_timer_check(struct task_struct *tsk)
  22281. +{
  22282. + /* tsk == current, ensure it is safe to use ->signal/sighand */
  22283. + if (unlikely(tsk->exit_state))
  22284. + return 0;
  22285. +
  22286. + if (!task_cputime_zero(&tsk->cputime_expires))
  22287. + return 1;
  22288. +
  22289. + if (!task_cputime_zero(&tsk->signal->cputime_expires))
  22290. + return 1;
  22291. +
  22292. + return 0;
  22293. +}
  22294. +
  22295. +void run_posix_cpu_timers(struct task_struct *tsk)
  22296. +{
  22297. + unsigned long cpu = smp_processor_id();
  22298. + struct task_struct *tasklist;
  22299. +
  22300. + BUG_ON(!irqs_disabled());
  22301. + if(!per_cpu(posix_timer_task, cpu))
  22302. + return;
  22303. + /* get per-cpu references */
  22304. + tasklist = per_cpu(posix_timer_tasklist, cpu);
  22305. +
  22306. + /* check to see if we're already queued */
  22307. + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
  22308. + get_task_struct(tsk);
  22309. + if (tasklist) {
  22310. + tsk->posix_timer_list = tasklist;
  22311. + } else {
  22312. + /*
  22313. + * The list is terminated by a self-pointing
  22314. + * task_struct
  22315. + */
  22316. + tsk->posix_timer_list = tsk;
  22317. + }
  22318. + per_cpu(posix_timer_tasklist, cpu) = tsk;
  22319. +
  22320. + wake_up_process(per_cpu(posix_timer_task, cpu));
  22321. + }
  22322. +}
  22323. +
  22324. +/*
  22325. + * posix_cpu_thread_call - callback that gets triggered when a CPU is added.
  22326. + * Here we can start up the necessary migration thread for the new CPU.
  22327. + */
  22328. +static int posix_cpu_thread_call(struct notifier_block *nfb,
  22329. + unsigned long action, void *hcpu)
  22330. +{
  22331. + int cpu = (long)hcpu;
  22332. + struct task_struct *p;
  22333. + struct sched_param param;
  22334. +
  22335. + switch (action) {
  22336. + case CPU_UP_PREPARE:
  22337. + p = kthread_create(posix_cpu_timers_thread, hcpu,
  22338. + "posixcputmr/%d",cpu);
  22339. + if (IS_ERR(p))
  22340. + return NOTIFY_BAD;
  22341. + p->flags |= PF_NOFREEZE;
  22342. + kthread_bind(p, cpu);
  22343. + /* Must be high prio to avoid getting starved */
  22344. + param.sched_priority = MAX_RT_PRIO-1;
  22345. + sched_setscheduler(p, SCHED_FIFO, &param);
  22346. + per_cpu(posix_timer_task,cpu) = p;
  22347. + break;
  22348. + case CPU_ONLINE:
  22349. + /* Strictly unneccessary, as first user will wake it. */
  22350. + wake_up_process(per_cpu(posix_timer_task,cpu));
  22351. + break;
  22352. +#ifdef CONFIG_HOTPLUG_CPU
  22353. + case CPU_UP_CANCELED:
  22354. + /* Unbind it from offline cpu so it can run. Fall thru. */
  22355. + kthread_bind(per_cpu(posix_timer_task, cpu),
  22356. + cpumask_any(cpu_online_mask));
  22357. + kthread_stop(per_cpu(posix_timer_task,cpu));
  22358. + per_cpu(posix_timer_task,cpu) = NULL;
  22359. + break;
  22360. + case CPU_DEAD:
  22361. + kthread_stop(per_cpu(posix_timer_task,cpu));
  22362. + per_cpu(posix_timer_task,cpu) = NULL;
  22363. + break;
  22364. +#endif
  22365. + }
  22366. + return NOTIFY_OK;
  22367. +}
  22368. +
  22369. +/* Register at highest priority so that task migration (migrate_all_tasks)
  22370. + * happens before everything else.
  22371. + */
  22372. +static struct notifier_block posix_cpu_thread_notifier = {
  22373. + .notifier_call = posix_cpu_thread_call,
  22374. + .priority = 10
  22375. +};
  22376. +
  22377. +static int __init posix_cpu_thread_init(void)
  22378. +{
  22379. + void *hcpu = (void *)(long)smp_processor_id();
  22380. + /* Start one for boot CPU. */
  22381. + unsigned long cpu;
  22382. +
  22383. + /* init the per-cpu posix_timer_tasklets */
  22384. + for_each_possible_cpu(cpu)
  22385. + per_cpu(posix_timer_tasklist, cpu) = NULL;
  22386. +
  22387. + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu);
  22388. + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu);
  22389. + register_cpu_notifier(&posix_cpu_thread_notifier);
  22390. + return 0;
  22391. +}
  22392. +early_initcall(posix_cpu_thread_init);
  22393. +#else /* CONFIG_PREEMPT_RT_BASE */
  22394. +void run_posix_cpu_timers(struct task_struct *tsk)
  22395. +{
  22396. + __run_posix_cpu_timers(tsk);
  22397. +}
  22398. +#endif /* CONFIG_PREEMPT_RT_BASE */
  22399. +
  22400. /*
  22401. * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
  22402. * The tsk->sighand->siglock must be held by the caller.
  22403. diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
  22404. index 31ea01f42e1f..0f5d7eae61f0 100644
  22405. --- a/kernel/time/posix-timers.c
  22406. +++ b/kernel/time/posix-timers.c
  22407. @@ -499,6 +499,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
  22408. static struct pid *good_sigevent(sigevent_t * event)
  22409. {
  22410. struct task_struct *rtn = current->group_leader;
  22411. + int sig = event->sigev_signo;
  22412. if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
  22413. (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
  22414. @@ -507,7 +508,8 @@ static struct pid *good_sigevent(sigevent_t * event)
  22415. return NULL;
  22416. if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
  22417. - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
  22418. + (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) ||
  22419. + sig_kernel_coredump(sig)))
  22420. return NULL;
  22421. return task_pid(rtn);
  22422. @@ -819,6 +821,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
  22423. return overrun;
  22424. }
  22425. +/*
  22426. + * Protected by RCU!
  22427. + */
  22428. +static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
  22429. +{
  22430. +#ifdef CONFIG_PREEMPT_RT_FULL
  22431. + if (kc->timer_set == common_timer_set)
  22432. + hrtimer_wait_for_timer(&timr->it.real.timer);
  22433. + else
  22434. + /* FIXME: Whacky hack for posix-cpu-timers */
  22435. + schedule_timeout(1);
  22436. +#endif
  22437. +}
  22438. +
  22439. /* Set a POSIX.1b interval timer. */
  22440. /* timr->it_lock is taken. */
  22441. static int
  22442. @@ -896,6 +912,7 @@ retry:
  22443. if (!timr)
  22444. return -EINVAL;
  22445. + rcu_read_lock();
  22446. kc = clockid_to_kclock(timr->it_clock);
  22447. if (WARN_ON_ONCE(!kc || !kc->timer_set))
  22448. error = -EINVAL;
  22449. @@ -904,9 +921,12 @@ retry:
  22450. unlock_timer(timr, flag);
  22451. if (error == TIMER_RETRY) {
  22452. + timer_wait_for_callback(kc, timr);
  22453. rtn = NULL; // We already got the old time...
  22454. + rcu_read_unlock();
  22455. goto retry;
  22456. }
  22457. + rcu_read_unlock();
  22458. if (old_setting && !error &&
  22459. copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
  22460. @@ -944,10 +964,15 @@ retry_delete:
  22461. if (!timer)
  22462. return -EINVAL;
  22463. + rcu_read_lock();
  22464. if (timer_delete_hook(timer) == TIMER_RETRY) {
  22465. unlock_timer(timer, flags);
  22466. + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
  22467. + timer);
  22468. + rcu_read_unlock();
  22469. goto retry_delete;
  22470. }
  22471. + rcu_read_unlock();
  22472. spin_lock(&current->sighand->siglock);
  22473. list_del(&timer->list);
  22474. @@ -973,8 +998,18 @@ static void itimer_delete(struct k_itimer *timer)
  22475. retry_delete:
  22476. spin_lock_irqsave(&timer->it_lock, flags);
  22477. + /* On RT we can race with a deletion */
  22478. + if (!timer->it_signal) {
  22479. + unlock_timer(timer, flags);
  22480. + return;
  22481. + }
  22482. +
  22483. if (timer_delete_hook(timer) == TIMER_RETRY) {
  22484. + rcu_read_lock();
  22485. unlock_timer(timer, flags);
  22486. + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
  22487. + timer);
  22488. + rcu_read_unlock();
  22489. goto retry_delete;
  22490. }
  22491. list_del(&timer->list);
  22492. diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
  22493. index 6aac4beedbbe..943c03395e46 100644
  22494. --- a/kernel/time/tick-broadcast-hrtimer.c
  22495. +++ b/kernel/time/tick-broadcast-hrtimer.c
  22496. @@ -109,5 +109,6 @@ void tick_setup_hrtimer_broadcast(void)
  22497. {
  22498. hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  22499. bctimer.function = bc_handler;
  22500. + bctimer.irqsafe = true;
  22501. clockevents_register_device(&ce_broadcast_hrtimer);
  22502. }
  22503. diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
  22504. index 3ae6afa1eb98..14a10917c8a3 100644
  22505. --- a/kernel/time/tick-common.c
  22506. +++ b/kernel/time/tick-common.c
  22507. @@ -78,13 +78,15 @@ int tick_is_oneshot_available(void)
  22508. static void tick_periodic(int cpu)
  22509. {
  22510. if (tick_do_timer_cpu == cpu) {
  22511. - write_seqlock(&jiffies_lock);
  22512. + raw_spin_lock(&jiffies_lock);
  22513. + write_seqcount_begin(&jiffies_seq);
  22514. /* Keep track of the next tick event */
  22515. tick_next_period = ktime_add(tick_next_period, tick_period);
  22516. do_timer(1);
  22517. - write_sequnlock(&jiffies_lock);
  22518. + write_seqcount_end(&jiffies_seq);
  22519. + raw_spin_unlock(&jiffies_lock);
  22520. update_wall_time();
  22521. }
  22522. @@ -146,9 +148,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
  22523. ktime_t next;
  22524. do {
  22525. - seq = read_seqbegin(&jiffies_lock);
  22526. + seq = read_seqcount_begin(&jiffies_seq);
  22527. next = tick_next_period;
  22528. - } while (read_seqretry(&jiffies_lock, seq));
  22529. + } while (read_seqcount_retry(&jiffies_seq, seq));
  22530. clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
  22531. diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
  22532. index 914259128145..b3841ba00c69 100644
  22533. --- a/kernel/time/tick-sched.c
  22534. +++ b/kernel/time/tick-sched.c
  22535. @@ -62,7 +62,8 @@ static void tick_do_update_jiffies64(ktime_t now)
  22536. return;
  22537. /* Reevalute with jiffies_lock held */
  22538. - write_seqlock(&jiffies_lock);
  22539. + raw_spin_lock(&jiffies_lock);
  22540. + write_seqcount_begin(&jiffies_seq);
  22541. delta = ktime_sub(now, last_jiffies_update);
  22542. if (delta.tv64 >= tick_period.tv64) {
  22543. @@ -85,10 +86,12 @@ static void tick_do_update_jiffies64(ktime_t now)
  22544. /* Keep the tick_next_period variable up to date */
  22545. tick_next_period = ktime_add(last_jiffies_update, tick_period);
  22546. } else {
  22547. - write_sequnlock(&jiffies_lock);
  22548. + write_seqcount_end(&jiffies_seq);
  22549. + raw_spin_unlock(&jiffies_lock);
  22550. return;
  22551. }
  22552. - write_sequnlock(&jiffies_lock);
  22553. + write_seqcount_end(&jiffies_seq);
  22554. + raw_spin_unlock(&jiffies_lock);
  22555. update_wall_time();
  22556. }
  22557. @@ -99,12 +102,14 @@ static ktime_t tick_init_jiffy_update(void)
  22558. {
  22559. ktime_t period;
  22560. - write_seqlock(&jiffies_lock);
  22561. + raw_spin_lock(&jiffies_lock);
  22562. + write_seqcount_begin(&jiffies_seq);
  22563. /* Did we start the jiffies update yet ? */
  22564. if (last_jiffies_update.tv64 == 0)
  22565. last_jiffies_update = tick_next_period;
  22566. period = last_jiffies_update;
  22567. - write_sequnlock(&jiffies_lock);
  22568. + write_seqcount_end(&jiffies_seq);
  22569. + raw_spin_unlock(&jiffies_lock);
  22570. return period;
  22571. }
  22572. @@ -176,6 +181,11 @@ static bool can_stop_full_tick(void)
  22573. return false;
  22574. }
  22575. + if (!arch_irq_work_has_interrupt()) {
  22576. + trace_tick_stop(0, "missing irq work interrupt\n");
  22577. + return false;
  22578. + }
  22579. +
  22580. /* sched_clock_tick() needs us? */
  22581. #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
  22582. /*
  22583. @@ -222,6 +232,7 @@ static void nohz_full_kick_work_func(struct irq_work *work)
  22584. static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
  22585. .func = nohz_full_kick_work_func,
  22586. + .flags = IRQ_WORK_HARD_IRQ,
  22587. };
  22588. /*
  22589. @@ -578,10 +589,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
  22590. /* Read jiffies and the time when jiffies were updated last */
  22591. do {
  22592. - seq = read_seqbegin(&jiffies_lock);
  22593. + seq = read_seqcount_begin(&jiffies_seq);
  22594. last_update = last_jiffies_update;
  22595. last_jiffies = jiffies;
  22596. - } while (read_seqretry(&jiffies_lock, seq));
  22597. + } while (read_seqcount_retry(&jiffies_seq, seq));
  22598. if (rcu_needs_cpu(&rcu_delta_jiffies) ||
  22599. arch_needs_cpu() || irq_work_needs_cpu()) {
  22600. @@ -759,14 +770,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
  22601. return false;
  22602. if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
  22603. - static int ratelimit;
  22604. -
  22605. - if (ratelimit < 10 &&
  22606. - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  22607. - pr_warn("NOHZ: local_softirq_pending %02x\n",
  22608. - (unsigned int) local_softirq_pending());
  22609. - ratelimit++;
  22610. - }
  22611. + softirq_check_pending_idle();
  22612. return false;
  22613. }
  22614. @@ -1154,6 +1158,7 @@ void tick_setup_sched_timer(void)
  22615. * Emulate tick processing via per-CPU hrtimers:
  22616. */
  22617. hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  22618. + ts->sched_timer.irqsafe = 1;
  22619. ts->sched_timer.function = tick_sched_timer;
  22620. /* Get the next period (per cpu) */
  22621. diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
  22622. index d9f112bd42a7..7713b181ccfa 100644
  22623. --- a/kernel/time/timekeeping.c
  22624. +++ b/kernel/time/timekeeping.c
  22625. @@ -2087,8 +2087,10 @@ EXPORT_SYMBOL(hardpps);
  22626. */
  22627. void xtime_update(unsigned long ticks)
  22628. {
  22629. - write_seqlock(&jiffies_lock);
  22630. + raw_spin_lock(&jiffies_lock);
  22631. + write_seqcount_begin(&jiffies_seq);
  22632. do_timer(ticks);
  22633. - write_sequnlock(&jiffies_lock);
  22634. + write_seqcount_end(&jiffies_seq);
  22635. + raw_spin_unlock(&jiffies_lock);
  22636. update_wall_time();
  22637. }
  22638. diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
  22639. index ead8794b9a4e..d7a9120a9f52 100644
  22640. --- a/kernel/time/timekeeping.h
  22641. +++ b/kernel/time/timekeeping.h
  22642. @@ -22,7 +22,8 @@ extern void timekeeping_resume(void);
  22643. extern void do_timer(unsigned long ticks);
  22644. extern void update_wall_time(void);
  22645. -extern seqlock_t jiffies_lock;
  22646. +extern raw_spinlock_t jiffies_lock;
  22647. +extern seqcount_t jiffies_seq;
  22648. #define CS_NAME_LEN 32
  22649. diff --git a/kernel/time/timer.c b/kernel/time/timer.c
  22650. index 2ece3aa5069c..b1f9e6c5bec4 100644
  22651. --- a/kernel/time/timer.c
  22652. +++ b/kernel/time/timer.c
  22653. @@ -78,6 +78,9 @@ struct tvec_root {
  22654. struct tvec_base {
  22655. spinlock_t lock;
  22656. struct timer_list *running_timer;
  22657. +#ifdef CONFIG_PREEMPT_RT_FULL
  22658. + wait_queue_head_t wait_for_running_timer;
  22659. +#endif
  22660. unsigned long timer_jiffies;
  22661. unsigned long next_timer;
  22662. unsigned long active_timers;
  22663. @@ -768,6 +771,36 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
  22664. }
  22665. }
  22666. +#ifndef CONFIG_PREEMPT_RT_FULL
  22667. +static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
  22668. + struct tvec_base *old,
  22669. + struct tvec_base *new)
  22670. +{
  22671. + /* See the comment in lock_timer_base() */
  22672. + timer_set_base(timer, NULL);
  22673. + spin_unlock(&old->lock);
  22674. + spin_lock(&new->lock);
  22675. + timer_set_base(timer, new);
  22676. + return new;
  22677. +}
  22678. +#else
  22679. +static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
  22680. + struct tvec_base *old,
  22681. + struct tvec_base *new)
  22682. +{
  22683. + /*
  22684. + * We cannot do the above because we might be preempted and
  22685. + * then the preempter would see NULL and loop forever.
  22686. + */
  22687. + if (spin_trylock(&new->lock)) {
  22688. + timer_set_base(timer, new);
  22689. + spin_unlock(&old->lock);
  22690. + return new;
  22691. + }
  22692. + return old;
  22693. +}
  22694. +#endif
  22695. +
  22696. static inline int
  22697. __mod_timer(struct timer_list *timer, unsigned long expires,
  22698. bool pending_only, int pinned)
  22699. @@ -798,14 +831,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
  22700. * handler yet has not finished. This also guarantees that
  22701. * the timer is serialized wrt itself.
  22702. */
  22703. - if (likely(base->running_timer != timer)) {
  22704. - /* See the comment in lock_timer_base() */
  22705. - timer_set_base(timer, NULL);
  22706. - spin_unlock(&base->lock);
  22707. - base = new_base;
  22708. - spin_lock(&base->lock);
  22709. - timer_set_base(timer, base);
  22710. - }
  22711. + if (likely(base->running_timer != timer))
  22712. + base = switch_timer_base(timer, base, new_base);
  22713. }
  22714. timer->expires = expires;
  22715. @@ -979,6 +1006,29 @@ void add_timer_on(struct timer_list *timer, int cpu)
  22716. }
  22717. EXPORT_SYMBOL_GPL(add_timer_on);
  22718. +#ifdef CONFIG_PREEMPT_RT_FULL
  22719. +/*
  22720. + * Wait for a running timer
  22721. + */
  22722. +static void wait_for_running_timer(struct timer_list *timer)
  22723. +{
  22724. + struct tvec_base *base = timer->base;
  22725. +
  22726. + if (base->running_timer == timer)
  22727. + wait_event(base->wait_for_running_timer,
  22728. + base->running_timer != timer);
  22729. +}
  22730. +
  22731. +# define wakeup_timer_waiters(b) wake_up_all(&(b)->wait_for_running_timer)
  22732. +#else
  22733. +static inline void wait_for_running_timer(struct timer_list *timer)
  22734. +{
  22735. + cpu_relax();
  22736. +}
  22737. +
  22738. +# define wakeup_timer_waiters(b) do { } while (0)
  22739. +#endif
  22740. +
  22741. /**
  22742. * del_timer - deactive a timer.
  22743. * @timer: the timer to be deactivated
  22744. @@ -1036,7 +1086,7 @@ int try_to_del_timer_sync(struct timer_list *timer)
  22745. }
  22746. EXPORT_SYMBOL(try_to_del_timer_sync);
  22747. -#ifdef CONFIG_SMP
  22748. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  22749. static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
  22750. /**
  22751. @@ -1098,7 +1148,7 @@ int del_timer_sync(struct timer_list *timer)
  22752. int ret = try_to_del_timer_sync(timer);
  22753. if (ret >= 0)
  22754. return ret;
  22755. - cpu_relax();
  22756. + wait_for_running_timer(timer);
  22757. }
  22758. }
  22759. EXPORT_SYMBOL(del_timer_sync);
  22760. @@ -1219,16 +1269,18 @@ static inline void __run_timers(struct tvec_base *base)
  22761. if (irqsafe) {
  22762. spin_unlock(&base->lock);
  22763. call_timer_fn(timer, fn, data);
  22764. + base->running_timer = NULL;
  22765. spin_lock(&base->lock);
  22766. } else {
  22767. spin_unlock_irq(&base->lock);
  22768. call_timer_fn(timer, fn, data);
  22769. + base->running_timer = NULL;
  22770. spin_lock_irq(&base->lock);
  22771. }
  22772. }
  22773. }
  22774. - base->running_timer = NULL;
  22775. spin_unlock_irq(&base->lock);
  22776. + wakeup_timer_waiters(base);
  22777. }
  22778. #ifdef CONFIG_NO_HZ_COMMON
  22779. @@ -1367,6 +1419,14 @@ unsigned long get_next_timer_interrupt(unsigned long now)
  22780. if (cpu_is_offline(smp_processor_id()))
  22781. return expires;
  22782. +#ifdef CONFIG_PREEMPT_RT_FULL
  22783. + /*
  22784. + * On PREEMPT_RT we cannot sleep here. As a result we can't take
  22785. + * the base lock to check when the next timer is pending and so
  22786. + * we assume the next jiffy.
  22787. + */
  22788. + return now + 1;
  22789. +#endif
  22790. spin_lock(&base->lock);
  22791. if (base->active_timers) {
  22792. if (time_before_eq(base->next_timer, base->timer_jiffies))
  22793. @@ -1392,13 +1452,13 @@ void update_process_times(int user_tick)
  22794. /* Note: this timer irq context must be accounted for as well. */
  22795. account_process_tick(p, user_tick);
  22796. + scheduler_tick();
  22797. run_local_timers();
  22798. rcu_check_callbacks(user_tick);
  22799. -#ifdef CONFIG_IRQ_WORK
  22800. +#if defined(CONFIG_IRQ_WORK)
  22801. if (in_irq())
  22802. irq_work_tick();
  22803. #endif
  22804. - scheduler_tick();
  22805. run_posix_cpu_timers(p);
  22806. }
  22807. @@ -1411,6 +1471,8 @@ static void run_timer_softirq(struct softirq_action *h)
  22808. hrtimer_run_pending();
  22809. + irq_work_tick_soft();
  22810. +
  22811. if (time_after_eq(jiffies, base->timer_jiffies))
  22812. __run_timers(base);
  22813. }
  22814. @@ -1566,7 +1628,7 @@ static void migrate_timers(int cpu)
  22815. BUG_ON(cpu_online(cpu));
  22816. old_base = per_cpu(tvec_bases, cpu);
  22817. - new_base = get_cpu_var(tvec_bases);
  22818. + new_base = get_local_var(tvec_bases);
  22819. /*
  22820. * The caller is globally serialized and nobody else
  22821. * takes two locks at once, deadlock is not possible.
  22822. @@ -1590,7 +1652,7 @@ static void migrate_timers(int cpu)
  22823. spin_unlock(&old_base->lock);
  22824. spin_unlock_irq(&new_base->lock);
  22825. - put_cpu_var(tvec_bases);
  22826. + put_local_var(tvec_bases);
  22827. }
  22828. static int timer_cpu_notify(struct notifier_block *self,
  22829. @@ -1625,6 +1687,9 @@ static void __init init_timer_cpu(struct tvec_base *base, int cpu)
  22830. base->cpu = cpu;
  22831. per_cpu(tvec_bases, cpu) = base;
  22832. spin_lock_init(&base->lock);
  22833. +#ifdef CONFIG_PREEMPT_RT_FULL
  22834. + init_waitqueue_head(&base->wait_for_running_timer);
  22835. +#endif
  22836. for (j = 0; j < TVN_SIZE; j++) {
  22837. INIT_LIST_HEAD(base->tv5.vec + j);
  22838. diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
  22839. index 3b9a48ae153a..ab3a277a3c20 100644
  22840. --- a/kernel/trace/Kconfig
  22841. +++ b/kernel/trace/Kconfig
  22842. @@ -187,6 +187,24 @@ config IRQSOFF_TRACER
  22843. enabled. This option and the preempt-off timing option can be
  22844. used together or separately.)
  22845. +config INTERRUPT_OFF_HIST
  22846. + bool "Interrupts-off Latency Histogram"
  22847. + depends on IRQSOFF_TRACER
  22848. + help
  22849. + This option generates continuously updated histograms (one per cpu)
  22850. + of the duration of time periods with interrupts disabled. The
  22851. + histograms are disabled by default. To enable them, write a non-zero
  22852. + number to
  22853. +
  22854. + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
  22855. +
  22856. + If PREEMPT_OFF_HIST is also selected, additional histograms (one
  22857. + per cpu) are generated that accumulate the duration of time periods
  22858. + when both interrupts and preemption are disabled. The histogram data
  22859. + will be located in the debug file system at
  22860. +
  22861. + /sys/kernel/debug/tracing/latency_hist/irqsoff
  22862. +
  22863. config PREEMPT_TRACER
  22864. bool "Preemption-off Latency Tracer"
  22865. default n
  22866. @@ -211,6 +229,24 @@ config PREEMPT_TRACER
  22867. enabled. This option and the irqs-off timing option can be
  22868. used together or separately.)
  22869. +config PREEMPT_OFF_HIST
  22870. + bool "Preemption-off Latency Histogram"
  22871. + depends on PREEMPT_TRACER
  22872. + help
  22873. + This option generates continuously updated histograms (one per cpu)
  22874. + of the duration of time periods with preemption disabled. The
  22875. + histograms are disabled by default. To enable them, write a non-zero
  22876. + number to
  22877. +
  22878. + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
  22879. +
  22880. + If INTERRUPT_OFF_HIST is also selected, additional histograms (one
  22881. + per cpu) are generated that accumulate the duration of time periods
  22882. + when both interrupts and preemption are disabled. The histogram data
  22883. + will be located in the debug file system at
  22884. +
  22885. + /sys/kernel/debug/tracing/latency_hist/preemptoff
  22886. +
  22887. config SCHED_TRACER
  22888. bool "Scheduling Latency Tracer"
  22889. select GENERIC_TRACER
  22890. @@ -221,6 +257,74 @@ config SCHED_TRACER
  22891. This tracer tracks the latency of the highest priority task
  22892. to be scheduled in, starting from the point it has woken up.
  22893. +config WAKEUP_LATENCY_HIST
  22894. + bool "Scheduling Latency Histogram"
  22895. + depends on SCHED_TRACER
  22896. + help
  22897. + This option generates continuously updated histograms (one per cpu)
  22898. + of the scheduling latency of the highest priority task.
  22899. + The histograms are disabled by default. To enable them, write a
  22900. + non-zero number to
  22901. +
  22902. + /sys/kernel/debug/tracing/latency_hist/enable/wakeup
  22903. +
  22904. + Two different algorithms are used, one to determine the latency of
  22905. + processes that exclusively use the highest priority of the system and
  22906. + another one to determine the latency of processes that share the
  22907. + highest system priority with other processes. The former is used to
  22908. + improve hardware and system software, the latter to optimize the
  22909. + priority design of a given system. The histogram data will be
  22910. + located in the debug file system at
  22911. +
  22912. + /sys/kernel/debug/tracing/latency_hist/wakeup
  22913. +
  22914. + and
  22915. +
  22916. + /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio
  22917. +
  22918. + If both Scheduling Latency Histogram and Missed Timer Offsets
  22919. + Histogram are selected, additional histogram data will be collected
  22920. + that contain, in addition to the wakeup latency, the timer latency, in
  22921. + case the wakeup was triggered by an expired timer. These histograms
  22922. + are available in the
  22923. +
  22924. + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
  22925. +
  22926. + directory. They reflect the apparent interrupt and scheduling latency
  22927. + and are best suitable to determine the worst-case latency of a given
  22928. + system. To enable these histograms, write a non-zero number to
  22929. +
  22930. + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
  22931. +
  22932. +config MISSED_TIMER_OFFSETS_HIST
  22933. + depends on HIGH_RES_TIMERS
  22934. + select GENERIC_TRACER
  22935. + bool "Missed Timer Offsets Histogram"
  22936. + help
  22937. + Generate a histogram of missed timer offsets in microseconds. The
  22938. + histograms are disabled by default. To enable them, write a non-zero
  22939. + number to
  22940. +
  22941. + /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets
  22942. +
  22943. + The histogram data will be located in the debug file system at
  22944. +
  22945. + /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets
  22946. +
  22947. + If both Scheduling Latency Histogram and Missed Timer Offsets
  22948. + Histogram are selected, additional histogram data will be collected
  22949. + that contain, in addition to the wakeup latency, the timer latency, in
  22950. + case the wakeup was triggered by an expired timer. These histograms
  22951. + are available in the
  22952. +
  22953. + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
  22954. +
  22955. + directory. They reflect the apparent interrupt and scheduling latency
  22956. + and are best suitable to determine the worst-case latency of a given
  22957. + system. To enable these histograms, write a non-zero number to
  22958. +
  22959. + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
  22960. +
  22961. config ENABLE_DEFAULT_TRACERS
  22962. bool "Trace process context switches and events"
  22963. depends on !GENERIC_TRACER
  22964. diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
  22965. index 9b1044e936a6..3bbaea06824a 100644
  22966. --- a/kernel/trace/Makefile
  22967. +++ b/kernel/trace/Makefile
  22968. @@ -36,6 +36,10 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
  22969. obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
  22970. obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
  22971. obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
  22972. +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o
  22973. +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o
  22974. +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o
  22975. +obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o
  22976. obj-$(CONFIG_NOP_TRACER) += trace_nop.o
  22977. obj-$(CONFIG_STACK_TRACER) += trace_stack.o
  22978. obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
  22979. diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c
  22980. new file mode 100644
  22981. index 000000000000..b6c1d14b71c4
  22982. --- /dev/null
  22983. +++ b/kernel/trace/latency_hist.c
  22984. @@ -0,0 +1,1178 @@
  22985. +/*
  22986. + * kernel/trace/latency_hist.c
  22987. + *
  22988. + * Add support for histograms of preemption-off latency and
  22989. + * interrupt-off latency and wakeup latency, it depends on
  22990. + * Real-Time Preemption Support.
  22991. + *
  22992. + * Copyright (C) 2005 MontaVista Software, Inc.
  22993. + * Yi Yang <yyang@ch.mvista.com>
  22994. + *
  22995. + * Converted to work with the new latency tracer.
  22996. + * Copyright (C) 2008 Red Hat, Inc.
  22997. + * Steven Rostedt <srostedt@redhat.com>
  22998. + *
  22999. + */
  23000. +#include <linux/module.h>
  23001. +#include <linux/debugfs.h>
  23002. +#include <linux/seq_file.h>
  23003. +#include <linux/percpu.h>
  23004. +#include <linux/kallsyms.h>
  23005. +#include <linux/uaccess.h>
  23006. +#include <linux/sched.h>
  23007. +#include <linux/sched/rt.h>
  23008. +#include <linux/slab.h>
  23009. +#include <linux/atomic.h>
  23010. +#include <asm/div64.h>
  23011. +
  23012. +#include "trace.h"
  23013. +#include <trace/events/sched.h>
  23014. +
  23015. +#define NSECS_PER_USECS 1000L
  23016. +
  23017. +#define CREATE_TRACE_POINTS
  23018. +#include <trace/events/hist.h>
  23019. +
  23020. +enum {
  23021. + IRQSOFF_LATENCY = 0,
  23022. + PREEMPTOFF_LATENCY,
  23023. + PREEMPTIRQSOFF_LATENCY,
  23024. + WAKEUP_LATENCY,
  23025. + WAKEUP_LATENCY_SHAREDPRIO,
  23026. + MISSED_TIMER_OFFSETS,
  23027. + TIMERANDWAKEUP_LATENCY,
  23028. + MAX_LATENCY_TYPE,
  23029. +};
  23030. +
  23031. +#define MAX_ENTRY_NUM 10240
  23032. +
  23033. +struct hist_data {
  23034. + atomic_t hist_mode; /* 0 log, 1 don't log */
  23035. + long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */
  23036. + long min_lat;
  23037. + long max_lat;
  23038. + unsigned long long below_hist_bound_samples;
  23039. + unsigned long long above_hist_bound_samples;
  23040. + long long accumulate_lat;
  23041. + unsigned long long total_samples;
  23042. + unsigned long long hist_array[MAX_ENTRY_NUM];
  23043. +};
  23044. +
  23045. +struct enable_data {
  23046. + int latency_type;
  23047. + int enabled;
  23048. +};
  23049. +
  23050. +static char *latency_hist_dir_root = "latency_hist";
  23051. +
  23052. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  23053. +static DEFINE_PER_CPU(struct hist_data, irqsoff_hist);
  23054. +static char *irqsoff_hist_dir = "irqsoff";
  23055. +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start);
  23056. +static DEFINE_PER_CPU(int, hist_irqsoff_counting);
  23057. +#endif
  23058. +
  23059. +#ifdef CONFIG_PREEMPT_OFF_HIST
  23060. +static DEFINE_PER_CPU(struct hist_data, preemptoff_hist);
  23061. +static char *preemptoff_hist_dir = "preemptoff";
  23062. +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start);
  23063. +static DEFINE_PER_CPU(int, hist_preemptoff_counting);
  23064. +#endif
  23065. +
  23066. +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
  23067. +static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist);
  23068. +static char *preemptirqsoff_hist_dir = "preemptirqsoff";
  23069. +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start);
  23070. +static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting);
  23071. +#endif
  23072. +
  23073. +#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST)
  23074. +static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start);
  23075. +static struct enable_data preemptirqsoff_enabled_data = {
  23076. + .latency_type = PREEMPTIRQSOFF_LATENCY,
  23077. + .enabled = 0,
  23078. +};
  23079. +#endif
  23080. +
  23081. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23082. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23083. +struct maxlatproc_data {
  23084. + char comm[FIELD_SIZEOF(struct task_struct, comm)];
  23085. + char current_comm[FIELD_SIZEOF(struct task_struct, comm)];
  23086. + int pid;
  23087. + int current_pid;
  23088. + int prio;
  23089. + int current_prio;
  23090. + long latency;
  23091. + long timeroffset;
  23092. + cycle_t timestamp;
  23093. +};
  23094. +#endif
  23095. +
  23096. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23097. +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist);
  23098. +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio);
  23099. +static char *wakeup_latency_hist_dir = "wakeup";
  23100. +static char *wakeup_latency_hist_dir_sharedprio = "sharedprio";
  23101. +static notrace void probe_wakeup_latency_hist_start(void *v,
  23102. + struct task_struct *p);
  23103. +static notrace void probe_wakeup_latency_hist_stop(void *v,
  23104. + struct task_struct *prev, struct task_struct *next);
  23105. +static notrace void probe_sched_migrate_task(void *,
  23106. + struct task_struct *task, int cpu);
  23107. +static struct enable_data wakeup_latency_enabled_data = {
  23108. + .latency_type = WAKEUP_LATENCY,
  23109. + .enabled = 0,
  23110. +};
  23111. +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc);
  23112. +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio);
  23113. +static DEFINE_PER_CPU(struct task_struct *, wakeup_task);
  23114. +static DEFINE_PER_CPU(int, wakeup_sharedprio);
  23115. +static unsigned long wakeup_pid;
  23116. +#endif
  23117. +
  23118. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23119. +static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets);
  23120. +static char *missed_timer_offsets_dir = "missed_timer_offsets";
  23121. +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
  23122. + long long offset, struct task_struct *curr, struct task_struct *task);
  23123. +static struct enable_data missed_timer_offsets_enabled_data = {
  23124. + .latency_type = MISSED_TIMER_OFFSETS,
  23125. + .enabled = 0,
  23126. +};
  23127. +static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc);
  23128. +static unsigned long missed_timer_offsets_pid;
  23129. +#endif
  23130. +
  23131. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  23132. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23133. +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
  23134. +static char *timerandwakeup_latency_hist_dir = "timerandwakeup";
  23135. +static struct enable_data timerandwakeup_enabled_data = {
  23136. + .latency_type = TIMERANDWAKEUP_LATENCY,
  23137. + .enabled = 0,
  23138. +};
  23139. +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
  23140. +#endif
  23141. +
  23142. +void notrace latency_hist(int latency_type, int cpu, long latency,
  23143. + long timeroffset, cycle_t stop,
  23144. + struct task_struct *p)
  23145. +{
  23146. + struct hist_data *my_hist;
  23147. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23148. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23149. + struct maxlatproc_data *mp = NULL;
  23150. +#endif
  23151. +
  23152. + if (!cpu_possible(cpu) || latency_type < 0 ||
  23153. + latency_type >= MAX_LATENCY_TYPE)
  23154. + return;
  23155. +
  23156. + switch (latency_type) {
  23157. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  23158. + case IRQSOFF_LATENCY:
  23159. + my_hist = &per_cpu(irqsoff_hist, cpu);
  23160. + break;
  23161. +#endif
  23162. +#ifdef CONFIG_PREEMPT_OFF_HIST
  23163. + case PREEMPTOFF_LATENCY:
  23164. + my_hist = &per_cpu(preemptoff_hist, cpu);
  23165. + break;
  23166. +#endif
  23167. +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
  23168. + case PREEMPTIRQSOFF_LATENCY:
  23169. + my_hist = &per_cpu(preemptirqsoff_hist, cpu);
  23170. + break;
  23171. +#endif
  23172. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23173. + case WAKEUP_LATENCY:
  23174. + my_hist = &per_cpu(wakeup_latency_hist, cpu);
  23175. + mp = &per_cpu(wakeup_maxlatproc, cpu);
  23176. + break;
  23177. + case WAKEUP_LATENCY_SHAREDPRIO:
  23178. + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
  23179. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
  23180. + break;
  23181. +#endif
  23182. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23183. + case MISSED_TIMER_OFFSETS:
  23184. + my_hist = &per_cpu(missed_timer_offsets, cpu);
  23185. + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
  23186. + break;
  23187. +#endif
  23188. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  23189. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23190. + case TIMERANDWAKEUP_LATENCY:
  23191. + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu);
  23192. + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
  23193. + break;
  23194. +#endif
  23195. +
  23196. + default:
  23197. + return;
  23198. + }
  23199. +
  23200. + latency += my_hist->offset;
  23201. +
  23202. + if (atomic_read(&my_hist->hist_mode) == 0)
  23203. + return;
  23204. +
  23205. + if (latency < 0 || latency >= MAX_ENTRY_NUM) {
  23206. + if (latency < 0)
  23207. + my_hist->below_hist_bound_samples++;
  23208. + else
  23209. + my_hist->above_hist_bound_samples++;
  23210. + } else
  23211. + my_hist->hist_array[latency]++;
  23212. +
  23213. + if (unlikely(latency > my_hist->max_lat ||
  23214. + my_hist->min_lat == LONG_MAX)) {
  23215. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23216. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23217. + if (latency_type == WAKEUP_LATENCY ||
  23218. + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
  23219. + latency_type == MISSED_TIMER_OFFSETS ||
  23220. + latency_type == TIMERANDWAKEUP_LATENCY) {
  23221. + strncpy(mp->comm, p->comm, sizeof(mp->comm));
  23222. + strncpy(mp->current_comm, current->comm,
  23223. + sizeof(mp->current_comm));
  23224. + mp->pid = task_pid_nr(p);
  23225. + mp->current_pid = task_pid_nr(current);
  23226. + mp->prio = p->prio;
  23227. + mp->current_prio = current->prio;
  23228. + mp->latency = latency;
  23229. + mp->timeroffset = timeroffset;
  23230. + mp->timestamp = stop;
  23231. + }
  23232. +#endif
  23233. + my_hist->max_lat = latency;
  23234. + }
  23235. + if (unlikely(latency < my_hist->min_lat))
  23236. + my_hist->min_lat = latency;
  23237. + my_hist->total_samples++;
  23238. + my_hist->accumulate_lat += latency;
  23239. +}
  23240. +
  23241. +static void *l_start(struct seq_file *m, loff_t *pos)
  23242. +{
  23243. + loff_t *index_ptr = NULL;
  23244. + loff_t index = *pos;
  23245. + struct hist_data *my_hist = m->private;
  23246. +
  23247. + if (index == 0) {
  23248. + char minstr[32], avgstr[32], maxstr[32];
  23249. +
  23250. + atomic_dec(&my_hist->hist_mode);
  23251. +
  23252. + if (likely(my_hist->total_samples)) {
  23253. + long avg = (long) div64_s64(my_hist->accumulate_lat,
  23254. + my_hist->total_samples);
  23255. + snprintf(minstr, sizeof(minstr), "%ld",
  23256. + my_hist->min_lat - my_hist->offset);
  23257. + snprintf(avgstr, sizeof(avgstr), "%ld",
  23258. + avg - my_hist->offset);
  23259. + snprintf(maxstr, sizeof(maxstr), "%ld",
  23260. + my_hist->max_lat - my_hist->offset);
  23261. + } else {
  23262. + strcpy(minstr, "<undef>");
  23263. + strcpy(avgstr, minstr);
  23264. + strcpy(maxstr, minstr);
  23265. + }
  23266. +
  23267. + seq_printf(m, "#Minimum latency: %s microseconds\n"
  23268. + "#Average latency: %s microseconds\n"
  23269. + "#Maximum latency: %s microseconds\n"
  23270. + "#Total samples: %llu\n"
  23271. + "#There are %llu samples lower than %ld"
  23272. + " microseconds.\n"
  23273. + "#There are %llu samples greater or equal"
  23274. + " than %ld microseconds.\n"
  23275. + "#usecs\t%16s\n",
  23276. + minstr, avgstr, maxstr,
  23277. + my_hist->total_samples,
  23278. + my_hist->below_hist_bound_samples,
  23279. + -my_hist->offset,
  23280. + my_hist->above_hist_bound_samples,
  23281. + MAX_ENTRY_NUM - my_hist->offset,
  23282. + "samples");
  23283. + }
  23284. + if (index < MAX_ENTRY_NUM) {
  23285. + index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL);
  23286. + if (index_ptr)
  23287. + *index_ptr = index;
  23288. + }
  23289. +
  23290. + return index_ptr;
  23291. +}
  23292. +
  23293. +static void *l_next(struct seq_file *m, void *p, loff_t *pos)
  23294. +{
  23295. + loff_t *index_ptr = p;
  23296. + struct hist_data *my_hist = m->private;
  23297. +
  23298. + if (++*pos >= MAX_ENTRY_NUM) {
  23299. + atomic_inc(&my_hist->hist_mode);
  23300. + return NULL;
  23301. + }
  23302. + *index_ptr = *pos;
  23303. + return index_ptr;
  23304. +}
  23305. +
  23306. +static void l_stop(struct seq_file *m, void *p)
  23307. +{
  23308. + kfree(p);
  23309. +}
  23310. +
  23311. +static int l_show(struct seq_file *m, void *p)
  23312. +{
  23313. + int index = *(loff_t *) p;
  23314. + struct hist_data *my_hist = m->private;
  23315. +
  23316. + seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset,
  23317. + my_hist->hist_array[index]);
  23318. + return 0;
  23319. +}
  23320. +
  23321. +static const struct seq_operations latency_hist_seq_op = {
  23322. + .start = l_start,
  23323. + .next = l_next,
  23324. + .stop = l_stop,
  23325. + .show = l_show
  23326. +};
  23327. +
  23328. +static int latency_hist_open(struct inode *inode, struct file *file)
  23329. +{
  23330. + int ret;
  23331. +
  23332. + ret = seq_open(file, &latency_hist_seq_op);
  23333. + if (!ret) {
  23334. + struct seq_file *seq = file->private_data;
  23335. + seq->private = inode->i_private;
  23336. + }
  23337. + return ret;
  23338. +}
  23339. +
  23340. +static const struct file_operations latency_hist_fops = {
  23341. + .open = latency_hist_open,
  23342. + .read = seq_read,
  23343. + .llseek = seq_lseek,
  23344. + .release = seq_release,
  23345. +};
  23346. +
  23347. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23348. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23349. +static void clear_maxlatprocdata(struct maxlatproc_data *mp)
  23350. +{
  23351. + mp->comm[0] = mp->current_comm[0] = '\0';
  23352. + mp->prio = mp->current_prio = mp->pid = mp->current_pid =
  23353. + mp->latency = mp->timeroffset = -1;
  23354. + mp->timestamp = 0;
  23355. +}
  23356. +#endif
  23357. +
  23358. +static void hist_reset(struct hist_data *hist)
  23359. +{
  23360. + atomic_dec(&hist->hist_mode);
  23361. +
  23362. + memset(hist->hist_array, 0, sizeof(hist->hist_array));
  23363. + hist->below_hist_bound_samples = 0ULL;
  23364. + hist->above_hist_bound_samples = 0ULL;
  23365. + hist->min_lat = LONG_MAX;
  23366. + hist->max_lat = LONG_MIN;
  23367. + hist->total_samples = 0ULL;
  23368. + hist->accumulate_lat = 0LL;
  23369. +
  23370. + atomic_inc(&hist->hist_mode);
  23371. +}
  23372. +
  23373. +static ssize_t
  23374. +latency_hist_reset(struct file *file, const char __user *a,
  23375. + size_t size, loff_t *off)
  23376. +{
  23377. + int cpu;
  23378. + struct hist_data *hist = NULL;
  23379. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23380. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23381. + struct maxlatproc_data *mp = NULL;
  23382. +#endif
  23383. + off_t latency_type = (off_t) file->private_data;
  23384. +
  23385. + for_each_online_cpu(cpu) {
  23386. +
  23387. + switch (latency_type) {
  23388. +#ifdef CONFIG_PREEMPT_OFF_HIST
  23389. + case PREEMPTOFF_LATENCY:
  23390. + hist = &per_cpu(preemptoff_hist, cpu);
  23391. + break;
  23392. +#endif
  23393. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  23394. + case IRQSOFF_LATENCY:
  23395. + hist = &per_cpu(irqsoff_hist, cpu);
  23396. + break;
  23397. +#endif
  23398. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  23399. + case PREEMPTIRQSOFF_LATENCY:
  23400. + hist = &per_cpu(preemptirqsoff_hist, cpu);
  23401. + break;
  23402. +#endif
  23403. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23404. + case WAKEUP_LATENCY:
  23405. + hist = &per_cpu(wakeup_latency_hist, cpu);
  23406. + mp = &per_cpu(wakeup_maxlatproc, cpu);
  23407. + break;
  23408. + case WAKEUP_LATENCY_SHAREDPRIO:
  23409. + hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
  23410. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
  23411. + break;
  23412. +#endif
  23413. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23414. + case MISSED_TIMER_OFFSETS:
  23415. + hist = &per_cpu(missed_timer_offsets, cpu);
  23416. + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
  23417. + break;
  23418. +#endif
  23419. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  23420. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23421. + case TIMERANDWAKEUP_LATENCY:
  23422. + hist = &per_cpu(timerandwakeup_latency_hist, cpu);
  23423. + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
  23424. + break;
  23425. +#endif
  23426. + }
  23427. +
  23428. + hist_reset(hist);
  23429. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23430. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23431. + if (latency_type == WAKEUP_LATENCY ||
  23432. + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
  23433. + latency_type == MISSED_TIMER_OFFSETS ||
  23434. + latency_type == TIMERANDWAKEUP_LATENCY)
  23435. + clear_maxlatprocdata(mp);
  23436. +#endif
  23437. + }
  23438. +
  23439. + return size;
  23440. +}
  23441. +
  23442. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23443. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23444. +static ssize_t
  23445. +show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  23446. +{
  23447. + char buf[64];
  23448. + int r;
  23449. + unsigned long *this_pid = file->private_data;
  23450. +
  23451. + r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid);
  23452. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  23453. +}
  23454. +
  23455. +static ssize_t do_pid(struct file *file, const char __user *ubuf,
  23456. + size_t cnt, loff_t *ppos)
  23457. +{
  23458. + char buf[64];
  23459. + unsigned long pid;
  23460. + unsigned long *this_pid = file->private_data;
  23461. +
  23462. + if (cnt >= sizeof(buf))
  23463. + return -EINVAL;
  23464. +
  23465. + if (copy_from_user(&buf, ubuf, cnt))
  23466. + return -EFAULT;
  23467. +
  23468. + buf[cnt] = '\0';
  23469. +
  23470. + if (kstrtoul(buf, 10, &pid))
  23471. + return -EINVAL;
  23472. +
  23473. + *this_pid = pid;
  23474. +
  23475. + return cnt;
  23476. +}
  23477. +#endif
  23478. +
  23479. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23480. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23481. +static ssize_t
  23482. +show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  23483. +{
  23484. + int r;
  23485. + struct maxlatproc_data *mp = file->private_data;
  23486. + int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
  23487. + unsigned long long t;
  23488. + unsigned long usecs, secs;
  23489. + char *buf;
  23490. +
  23491. + if (mp->pid == -1 || mp->current_pid == -1) {
  23492. + buf = "(none)\n";
  23493. + return simple_read_from_buffer(ubuf, cnt, ppos, buf,
  23494. + strlen(buf));
  23495. + }
  23496. +
  23497. + buf = kmalloc(strmaxlen, GFP_KERNEL);
  23498. + if (buf == NULL)
  23499. + return -ENOMEM;
  23500. +
  23501. + t = ns2usecs(mp->timestamp);
  23502. + usecs = do_div(t, USEC_PER_SEC);
  23503. + secs = (unsigned long) t;
  23504. + r = snprintf(buf, strmaxlen,
  23505. + "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
  23506. + MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
  23507. + mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
  23508. + secs, usecs);
  23509. + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  23510. + kfree(buf);
  23511. + return r;
  23512. +}
  23513. +#endif
  23514. +
  23515. +static ssize_t
  23516. +show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  23517. +{
  23518. + char buf[64];
  23519. + struct enable_data *ed = file->private_data;
  23520. + int r;
  23521. +
  23522. + r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled);
  23523. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  23524. +}
  23525. +
  23526. +static ssize_t
  23527. +do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
  23528. +{
  23529. + char buf[64];
  23530. + long enable;
  23531. + struct enable_data *ed = file->private_data;
  23532. +
  23533. + if (cnt >= sizeof(buf))
  23534. + return -EINVAL;
  23535. +
  23536. + if (copy_from_user(&buf, ubuf, cnt))
  23537. + return -EFAULT;
  23538. +
  23539. + buf[cnt] = 0;
  23540. +
  23541. + if (kstrtoul(buf, 10, &enable))
  23542. + return -EINVAL;
  23543. +
  23544. + if ((enable && ed->enabled) || (!enable && !ed->enabled))
  23545. + return cnt;
  23546. +
  23547. + if (enable) {
  23548. + int ret;
  23549. +
  23550. + switch (ed->latency_type) {
  23551. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  23552. + case PREEMPTIRQSOFF_LATENCY:
  23553. + ret = register_trace_preemptirqsoff_hist(
  23554. + probe_preemptirqsoff_hist, NULL);
  23555. + if (ret) {
  23556. + pr_info("wakeup trace: Couldn't assign "
  23557. + "probe_preemptirqsoff_hist "
  23558. + "to trace_preemptirqsoff_hist\n");
  23559. + return ret;
  23560. + }
  23561. + break;
  23562. +#endif
  23563. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23564. + case WAKEUP_LATENCY:
  23565. + ret = register_trace_sched_wakeup(
  23566. + probe_wakeup_latency_hist_start, NULL);
  23567. + if (ret) {
  23568. + pr_info("wakeup trace: Couldn't assign "
  23569. + "probe_wakeup_latency_hist_start "
  23570. + "to trace_sched_wakeup\n");
  23571. + return ret;
  23572. + }
  23573. + ret = register_trace_sched_wakeup_new(
  23574. + probe_wakeup_latency_hist_start, NULL);
  23575. + if (ret) {
  23576. + pr_info("wakeup trace: Couldn't assign "
  23577. + "probe_wakeup_latency_hist_start "
  23578. + "to trace_sched_wakeup_new\n");
  23579. + unregister_trace_sched_wakeup(
  23580. + probe_wakeup_latency_hist_start, NULL);
  23581. + return ret;
  23582. + }
  23583. + ret = register_trace_sched_switch(
  23584. + probe_wakeup_latency_hist_stop, NULL);
  23585. + if (ret) {
  23586. + pr_info("wakeup trace: Couldn't assign "
  23587. + "probe_wakeup_latency_hist_stop "
  23588. + "to trace_sched_switch\n");
  23589. + unregister_trace_sched_wakeup(
  23590. + probe_wakeup_latency_hist_start, NULL);
  23591. + unregister_trace_sched_wakeup_new(
  23592. + probe_wakeup_latency_hist_start, NULL);
  23593. + return ret;
  23594. + }
  23595. + ret = register_trace_sched_migrate_task(
  23596. + probe_sched_migrate_task, NULL);
  23597. + if (ret) {
  23598. + pr_info("wakeup trace: Couldn't assign "
  23599. + "probe_sched_migrate_task "
  23600. + "to trace_sched_migrate_task\n");
  23601. + unregister_trace_sched_wakeup(
  23602. + probe_wakeup_latency_hist_start, NULL);
  23603. + unregister_trace_sched_wakeup_new(
  23604. + probe_wakeup_latency_hist_start, NULL);
  23605. + unregister_trace_sched_switch(
  23606. + probe_wakeup_latency_hist_stop, NULL);
  23607. + return ret;
  23608. + }
  23609. + break;
  23610. +#endif
  23611. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23612. + case MISSED_TIMER_OFFSETS:
  23613. + ret = register_trace_hrtimer_interrupt(
  23614. + probe_hrtimer_interrupt, NULL);
  23615. + if (ret) {
  23616. + pr_info("wakeup trace: Couldn't assign "
  23617. + "probe_hrtimer_interrupt "
  23618. + "to trace_hrtimer_interrupt\n");
  23619. + return ret;
  23620. + }
  23621. + break;
  23622. +#endif
  23623. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  23624. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23625. + case TIMERANDWAKEUP_LATENCY:
  23626. + if (!wakeup_latency_enabled_data.enabled ||
  23627. + !missed_timer_offsets_enabled_data.enabled)
  23628. + return -EINVAL;
  23629. + break;
  23630. +#endif
  23631. + default:
  23632. + break;
  23633. + }
  23634. + } else {
  23635. + switch (ed->latency_type) {
  23636. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  23637. + case PREEMPTIRQSOFF_LATENCY:
  23638. + {
  23639. + int cpu;
  23640. +
  23641. + unregister_trace_preemptirqsoff_hist(
  23642. + probe_preemptirqsoff_hist, NULL);
  23643. + for_each_online_cpu(cpu) {
  23644. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  23645. + per_cpu(hist_irqsoff_counting,
  23646. + cpu) = 0;
  23647. +#endif
  23648. +#ifdef CONFIG_PREEMPT_OFF_HIST
  23649. + per_cpu(hist_preemptoff_counting,
  23650. + cpu) = 0;
  23651. +#endif
  23652. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  23653. + per_cpu(hist_preemptirqsoff_counting,
  23654. + cpu) = 0;
  23655. +#endif
  23656. + }
  23657. + }
  23658. + break;
  23659. +#endif
  23660. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23661. + case WAKEUP_LATENCY:
  23662. + {
  23663. + int cpu;
  23664. +
  23665. + unregister_trace_sched_wakeup(
  23666. + probe_wakeup_latency_hist_start, NULL);
  23667. + unregister_trace_sched_wakeup_new(
  23668. + probe_wakeup_latency_hist_start, NULL);
  23669. + unregister_trace_sched_switch(
  23670. + probe_wakeup_latency_hist_stop, NULL);
  23671. + unregister_trace_sched_migrate_task(
  23672. + probe_sched_migrate_task, NULL);
  23673. +
  23674. + for_each_online_cpu(cpu) {
  23675. + per_cpu(wakeup_task, cpu) = NULL;
  23676. + per_cpu(wakeup_sharedprio, cpu) = 0;
  23677. + }
  23678. + }
  23679. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23680. + timerandwakeup_enabled_data.enabled = 0;
  23681. +#endif
  23682. + break;
  23683. +#endif
  23684. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23685. + case MISSED_TIMER_OFFSETS:
  23686. + unregister_trace_hrtimer_interrupt(
  23687. + probe_hrtimer_interrupt, NULL);
  23688. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23689. + timerandwakeup_enabled_data.enabled = 0;
  23690. +#endif
  23691. + break;
  23692. +#endif
  23693. + default:
  23694. + break;
  23695. + }
  23696. + }
  23697. + ed->enabled = enable;
  23698. + return cnt;
  23699. +}
  23700. +
  23701. +static const struct file_operations latency_hist_reset_fops = {
  23702. + .open = tracing_open_generic,
  23703. + .write = latency_hist_reset,
  23704. +};
  23705. +
  23706. +static const struct file_operations enable_fops = {
  23707. + .open = tracing_open_generic,
  23708. + .read = show_enable,
  23709. + .write = do_enable,
  23710. +};
  23711. +
  23712. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23713. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23714. +static const struct file_operations pid_fops = {
  23715. + .open = tracing_open_generic,
  23716. + .read = show_pid,
  23717. + .write = do_pid,
  23718. +};
  23719. +
  23720. +static const struct file_operations maxlatproc_fops = {
  23721. + .open = tracing_open_generic,
  23722. + .read = show_maxlatproc,
  23723. +};
  23724. +#endif
  23725. +
  23726. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  23727. +static notrace void probe_preemptirqsoff_hist(void *v, int reason,
  23728. + int starthist)
  23729. +{
  23730. + int cpu = raw_smp_processor_id();
  23731. + int time_set = 0;
  23732. +
  23733. + if (starthist) {
  23734. + cycle_t uninitialized_var(start);
  23735. +
  23736. + if (!preempt_count() && !irqs_disabled())
  23737. + return;
  23738. +
  23739. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  23740. + if ((reason == IRQS_OFF || reason == TRACE_START) &&
  23741. + !per_cpu(hist_irqsoff_counting, cpu)) {
  23742. + per_cpu(hist_irqsoff_counting, cpu) = 1;
  23743. + start = ftrace_now(cpu);
  23744. + time_set++;
  23745. + per_cpu(hist_irqsoff_start, cpu) = start;
  23746. + }
  23747. +#endif
  23748. +
  23749. +#ifdef CONFIG_PREEMPT_OFF_HIST
  23750. + if ((reason == PREEMPT_OFF || reason == TRACE_START) &&
  23751. + !per_cpu(hist_preemptoff_counting, cpu)) {
  23752. + per_cpu(hist_preemptoff_counting, cpu) = 1;
  23753. + if (!(time_set++))
  23754. + start = ftrace_now(cpu);
  23755. + per_cpu(hist_preemptoff_start, cpu) = start;
  23756. + }
  23757. +#endif
  23758. +
  23759. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  23760. + if (per_cpu(hist_irqsoff_counting, cpu) &&
  23761. + per_cpu(hist_preemptoff_counting, cpu) &&
  23762. + !per_cpu(hist_preemptirqsoff_counting, cpu)) {
  23763. + per_cpu(hist_preemptirqsoff_counting, cpu) = 1;
  23764. + if (!time_set)
  23765. + start = ftrace_now(cpu);
  23766. + per_cpu(hist_preemptirqsoff_start, cpu) = start;
  23767. + }
  23768. +#endif
  23769. + } else {
  23770. + cycle_t uninitialized_var(stop);
  23771. +
  23772. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  23773. + if ((reason == IRQS_ON || reason == TRACE_STOP) &&
  23774. + per_cpu(hist_irqsoff_counting, cpu)) {
  23775. + cycle_t start = per_cpu(hist_irqsoff_start, cpu);
  23776. +
  23777. + stop = ftrace_now(cpu);
  23778. + time_set++;
  23779. + if (start) {
  23780. + long latency = ((long) (stop - start)) /
  23781. + NSECS_PER_USECS;
  23782. +
  23783. + latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
  23784. + stop, NULL);
  23785. + }
  23786. + per_cpu(hist_irqsoff_counting, cpu) = 0;
  23787. + }
  23788. +#endif
  23789. +
  23790. +#ifdef CONFIG_PREEMPT_OFF_HIST
  23791. + if ((reason == PREEMPT_ON || reason == TRACE_STOP) &&
  23792. + per_cpu(hist_preemptoff_counting, cpu)) {
  23793. + cycle_t start = per_cpu(hist_preemptoff_start, cpu);
  23794. +
  23795. + if (!(time_set++))
  23796. + stop = ftrace_now(cpu);
  23797. + if (start) {
  23798. + long latency = ((long) (stop - start)) /
  23799. + NSECS_PER_USECS;
  23800. +
  23801. + latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
  23802. + 0, stop, NULL);
  23803. + }
  23804. + per_cpu(hist_preemptoff_counting, cpu) = 0;
  23805. + }
  23806. +#endif
  23807. +
  23808. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  23809. + if ((!per_cpu(hist_irqsoff_counting, cpu) ||
  23810. + !per_cpu(hist_preemptoff_counting, cpu)) &&
  23811. + per_cpu(hist_preemptirqsoff_counting, cpu)) {
  23812. + cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu);
  23813. +
  23814. + if (!time_set)
  23815. + stop = ftrace_now(cpu);
  23816. + if (start) {
  23817. + long latency = ((long) (stop - start)) /
  23818. + NSECS_PER_USECS;
  23819. +
  23820. + latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
  23821. + latency, 0, stop, NULL);
  23822. + }
  23823. + per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
  23824. + }
  23825. +#endif
  23826. + }
  23827. +}
  23828. +#endif
  23829. +
  23830. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23831. +static DEFINE_RAW_SPINLOCK(wakeup_lock);
  23832. +static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
  23833. + int cpu)
  23834. +{
  23835. + int old_cpu = task_cpu(task);
  23836. +
  23837. + if (cpu != old_cpu) {
  23838. + unsigned long flags;
  23839. + struct task_struct *cpu_wakeup_task;
  23840. +
  23841. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  23842. +
  23843. + cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
  23844. + if (task == cpu_wakeup_task) {
  23845. + put_task_struct(cpu_wakeup_task);
  23846. + per_cpu(wakeup_task, old_cpu) = NULL;
  23847. + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
  23848. + get_task_struct(cpu_wakeup_task);
  23849. + }
  23850. +
  23851. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  23852. + }
  23853. +}
  23854. +
  23855. +static notrace void probe_wakeup_latency_hist_start(void *v,
  23856. + struct task_struct *p)
  23857. +{
  23858. + unsigned long flags;
  23859. + struct task_struct *curr = current;
  23860. + int cpu = task_cpu(p);
  23861. + struct task_struct *cpu_wakeup_task;
  23862. +
  23863. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  23864. +
  23865. + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
  23866. +
  23867. + if (wakeup_pid) {
  23868. + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
  23869. + p->prio == curr->prio)
  23870. + per_cpu(wakeup_sharedprio, cpu) = 1;
  23871. + if (likely(wakeup_pid != task_pid_nr(p)))
  23872. + goto out;
  23873. + } else {
  23874. + if (likely(!rt_task(p)) ||
  23875. + (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) ||
  23876. + p->prio > curr->prio)
  23877. + goto out;
  23878. + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
  23879. + p->prio == curr->prio)
  23880. + per_cpu(wakeup_sharedprio, cpu) = 1;
  23881. + }
  23882. +
  23883. + if (cpu_wakeup_task)
  23884. + put_task_struct(cpu_wakeup_task);
  23885. + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p;
  23886. + get_task_struct(cpu_wakeup_task);
  23887. + cpu_wakeup_task->preempt_timestamp_hist =
  23888. + ftrace_now(raw_smp_processor_id());
  23889. +out:
  23890. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  23891. +}
  23892. +
  23893. +static notrace void probe_wakeup_latency_hist_stop(void *v,
  23894. + struct task_struct *prev, struct task_struct *next)
  23895. +{
  23896. + unsigned long flags;
  23897. + int cpu = task_cpu(next);
  23898. + long latency;
  23899. + cycle_t stop;
  23900. + struct task_struct *cpu_wakeup_task;
  23901. +
  23902. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  23903. +
  23904. + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
  23905. +
  23906. + if (cpu_wakeup_task == NULL)
  23907. + goto out;
  23908. +
  23909. + /* Already running? */
  23910. + if (unlikely(current == cpu_wakeup_task))
  23911. + goto out_reset;
  23912. +
  23913. + if (next != cpu_wakeup_task) {
  23914. + if (next->prio < cpu_wakeup_task->prio)
  23915. + goto out_reset;
  23916. +
  23917. + if (next->prio == cpu_wakeup_task->prio)
  23918. + per_cpu(wakeup_sharedprio, cpu) = 1;
  23919. +
  23920. + goto out;
  23921. + }
  23922. +
  23923. + if (current->prio == cpu_wakeup_task->prio)
  23924. + per_cpu(wakeup_sharedprio, cpu) = 1;
  23925. +
  23926. + /*
  23927. + * The task we are waiting for is about to be switched to.
  23928. + * Calculate latency and store it in histogram.
  23929. + */
  23930. + stop = ftrace_now(raw_smp_processor_id());
  23931. +
  23932. + latency = ((long) (stop - next->preempt_timestamp_hist)) /
  23933. + NSECS_PER_USECS;
  23934. +
  23935. + if (per_cpu(wakeup_sharedprio, cpu)) {
  23936. + latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
  23937. + next);
  23938. + per_cpu(wakeup_sharedprio, cpu) = 0;
  23939. + } else {
  23940. + latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
  23941. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23942. + if (timerandwakeup_enabled_data.enabled) {
  23943. + latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
  23944. + next->timer_offset + latency, next->timer_offset,
  23945. + stop, next);
  23946. + }
  23947. +#endif
  23948. + }
  23949. +
  23950. +out_reset:
  23951. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23952. + next->timer_offset = 0;
  23953. +#endif
  23954. + put_task_struct(cpu_wakeup_task);
  23955. + per_cpu(wakeup_task, cpu) = NULL;
  23956. +out:
  23957. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  23958. +}
  23959. +#endif
  23960. +
  23961. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23962. +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
  23963. + long long latency_ns, struct task_struct *curr,
  23964. + struct task_struct *task)
  23965. +{
  23966. + if (latency_ns <= 0 && task != NULL && rt_task(task) &&
  23967. + (task->prio < curr->prio ||
  23968. + (task->prio == curr->prio &&
  23969. + !cpumask_test_cpu(cpu, &task->cpus_allowed)))) {
  23970. + long latency;
  23971. + cycle_t now;
  23972. +
  23973. + if (missed_timer_offsets_pid) {
  23974. + if (likely(missed_timer_offsets_pid !=
  23975. + task_pid_nr(task)))
  23976. + return;
  23977. + }
  23978. +
  23979. + now = ftrace_now(cpu);
  23980. + latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
  23981. + latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
  23982. + task);
  23983. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23984. + task->timer_offset = latency;
  23985. +#endif
  23986. + }
  23987. +}
  23988. +#endif
  23989. +
  23990. +static __init int latency_hist_init(void)
  23991. +{
  23992. + struct dentry *latency_hist_root = NULL;
  23993. + struct dentry *dentry;
  23994. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23995. + struct dentry *dentry_sharedprio;
  23996. +#endif
  23997. + struct dentry *entry;
  23998. + struct dentry *enable_root;
  23999. + int i = 0;
  24000. + struct hist_data *my_hist;
  24001. + char name[64];
  24002. + char *cpufmt = "CPU%d";
  24003. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  24004. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24005. + char *cpufmt_maxlatproc = "max_latency-CPU%d";
  24006. + struct maxlatproc_data *mp = NULL;
  24007. +#endif
  24008. +
  24009. + dentry = tracing_init_dentry();
  24010. + latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry);
  24011. + enable_root = debugfs_create_dir("enable", latency_hist_root);
  24012. +
  24013. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  24014. + dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root);
  24015. + for_each_possible_cpu(i) {
  24016. + sprintf(name, cpufmt, i);
  24017. + entry = debugfs_create_file(name, 0444, dentry,
  24018. + &per_cpu(irqsoff_hist, i), &latency_hist_fops);
  24019. + my_hist = &per_cpu(irqsoff_hist, i);
  24020. + atomic_set(&my_hist->hist_mode, 1);
  24021. + my_hist->min_lat = LONG_MAX;
  24022. + }
  24023. + entry = debugfs_create_file("reset", 0644, dentry,
  24024. + (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops);
  24025. +#endif
  24026. +
  24027. +#ifdef CONFIG_PREEMPT_OFF_HIST
  24028. + dentry = debugfs_create_dir(preemptoff_hist_dir,
  24029. + latency_hist_root);
  24030. + for_each_possible_cpu(i) {
  24031. + sprintf(name, cpufmt, i);
  24032. + entry = debugfs_create_file(name, 0444, dentry,
  24033. + &per_cpu(preemptoff_hist, i), &latency_hist_fops);
  24034. + my_hist = &per_cpu(preemptoff_hist, i);
  24035. + atomic_set(&my_hist->hist_mode, 1);
  24036. + my_hist->min_lat = LONG_MAX;
  24037. + }
  24038. + entry = debugfs_create_file("reset", 0644, dentry,
  24039. + (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops);
  24040. +#endif
  24041. +
  24042. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  24043. + dentry = debugfs_create_dir(preemptirqsoff_hist_dir,
  24044. + latency_hist_root);
  24045. + for_each_possible_cpu(i) {
  24046. + sprintf(name, cpufmt, i);
  24047. + entry = debugfs_create_file(name, 0444, dentry,
  24048. + &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops);
  24049. + my_hist = &per_cpu(preemptirqsoff_hist, i);
  24050. + atomic_set(&my_hist->hist_mode, 1);
  24051. + my_hist->min_lat = LONG_MAX;
  24052. + }
  24053. + entry = debugfs_create_file("reset", 0644, dentry,
  24054. + (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops);
  24055. +#endif
  24056. +
  24057. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  24058. + entry = debugfs_create_file("preemptirqsoff", 0644,
  24059. + enable_root, (void *)&preemptirqsoff_enabled_data,
  24060. + &enable_fops);
  24061. +#endif
  24062. +
  24063. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  24064. + dentry = debugfs_create_dir(wakeup_latency_hist_dir,
  24065. + latency_hist_root);
  24066. + dentry_sharedprio = debugfs_create_dir(
  24067. + wakeup_latency_hist_dir_sharedprio, dentry);
  24068. + for_each_possible_cpu(i) {
  24069. + sprintf(name, cpufmt, i);
  24070. +
  24071. + entry = debugfs_create_file(name, 0444, dentry,
  24072. + &per_cpu(wakeup_latency_hist, i),
  24073. + &latency_hist_fops);
  24074. + my_hist = &per_cpu(wakeup_latency_hist, i);
  24075. + atomic_set(&my_hist->hist_mode, 1);
  24076. + my_hist->min_lat = LONG_MAX;
  24077. +
  24078. + entry = debugfs_create_file(name, 0444, dentry_sharedprio,
  24079. + &per_cpu(wakeup_latency_hist_sharedprio, i),
  24080. + &latency_hist_fops);
  24081. + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i);
  24082. + atomic_set(&my_hist->hist_mode, 1);
  24083. + my_hist->min_lat = LONG_MAX;
  24084. +
  24085. + sprintf(name, cpufmt_maxlatproc, i);
  24086. +
  24087. + mp = &per_cpu(wakeup_maxlatproc, i);
  24088. + entry = debugfs_create_file(name, 0444, dentry, mp,
  24089. + &maxlatproc_fops);
  24090. + clear_maxlatprocdata(mp);
  24091. +
  24092. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, i);
  24093. + entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp,
  24094. + &maxlatproc_fops);
  24095. + clear_maxlatprocdata(mp);
  24096. + }
  24097. + entry = debugfs_create_file("pid", 0644, dentry,
  24098. + (void *)&wakeup_pid, &pid_fops);
  24099. + entry = debugfs_create_file("reset", 0644, dentry,
  24100. + (void *)WAKEUP_LATENCY, &latency_hist_reset_fops);
  24101. + entry = debugfs_create_file("reset", 0644, dentry_sharedprio,
  24102. + (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops);
  24103. + entry = debugfs_create_file("wakeup", 0644,
  24104. + enable_root, (void *)&wakeup_latency_enabled_data,
  24105. + &enable_fops);
  24106. +#endif
  24107. +
  24108. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  24109. + dentry = debugfs_create_dir(missed_timer_offsets_dir,
  24110. + latency_hist_root);
  24111. + for_each_possible_cpu(i) {
  24112. + sprintf(name, cpufmt, i);
  24113. + entry = debugfs_create_file(name, 0444, dentry,
  24114. + &per_cpu(missed_timer_offsets, i), &latency_hist_fops);
  24115. + my_hist = &per_cpu(missed_timer_offsets, i);
  24116. + atomic_set(&my_hist->hist_mode, 1);
  24117. + my_hist->min_lat = LONG_MAX;
  24118. +
  24119. + sprintf(name, cpufmt_maxlatproc, i);
  24120. + mp = &per_cpu(missed_timer_offsets_maxlatproc, i);
  24121. + entry = debugfs_create_file(name, 0444, dentry, mp,
  24122. + &maxlatproc_fops);
  24123. + clear_maxlatprocdata(mp);
  24124. + }
  24125. + entry = debugfs_create_file("pid", 0644, dentry,
  24126. + (void *)&missed_timer_offsets_pid, &pid_fops);
  24127. + entry = debugfs_create_file("reset", 0644, dentry,
  24128. + (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops);
  24129. + entry = debugfs_create_file("missed_timer_offsets", 0644,
  24130. + enable_root, (void *)&missed_timer_offsets_enabled_data,
  24131. + &enable_fops);
  24132. +#endif
  24133. +
  24134. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  24135. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24136. + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
  24137. + latency_hist_root);
  24138. + for_each_possible_cpu(i) {
  24139. + sprintf(name, cpufmt, i);
  24140. + entry = debugfs_create_file(name, 0444, dentry,
  24141. + &per_cpu(timerandwakeup_latency_hist, i),
  24142. + &latency_hist_fops);
  24143. + my_hist = &per_cpu(timerandwakeup_latency_hist, i);
  24144. + atomic_set(&my_hist->hist_mode, 1);
  24145. + my_hist->min_lat = LONG_MAX;
  24146. +
  24147. + sprintf(name, cpufmt_maxlatproc, i);
  24148. + mp = &per_cpu(timerandwakeup_maxlatproc, i);
  24149. + entry = debugfs_create_file(name, 0444, dentry, mp,
  24150. + &maxlatproc_fops);
  24151. + clear_maxlatprocdata(mp);
  24152. + }
  24153. + entry = debugfs_create_file("reset", 0644, dentry,
  24154. + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops);
  24155. + entry = debugfs_create_file("timerandwakeup", 0644,
  24156. + enable_root, (void *)&timerandwakeup_enabled_data,
  24157. + &enable_fops);
  24158. +#endif
  24159. + return 0;
  24160. +}
  24161. +
  24162. +device_initcall(latency_hist_init);
  24163. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
  24164. index 61ea7e8cdde5..af8b5e5469bf 100644
  24165. --- a/kernel/trace/trace.c
  24166. +++ b/kernel/trace/trace.c
  24167. @@ -1630,6 +1630,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
  24168. struct task_struct *tsk = current;
  24169. entry->preempt_count = pc & 0xff;
  24170. + entry->preempt_lazy_count = preempt_lazy_count();
  24171. entry->pid = (tsk) ? tsk->pid : 0;
  24172. entry->flags =
  24173. #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
  24174. @@ -1639,8 +1640,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
  24175. #endif
  24176. ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
  24177. ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
  24178. - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
  24179. + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) |
  24180. + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) |
  24181. (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
  24182. +
  24183. + entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
  24184. }
  24185. EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
  24186. @@ -2558,14 +2562,17 @@ get_total_entries(struct trace_buffer *buf,
  24187. static void print_lat_help_header(struct seq_file *m)
  24188. {
  24189. - seq_puts(m, "# _------=> CPU# \n"
  24190. - "# / _-----=> irqs-off \n"
  24191. - "# | / _----=> need-resched \n"
  24192. - "# || / _---=> hardirq/softirq \n"
  24193. - "# ||| / _--=> preempt-depth \n"
  24194. - "# |||| / delay \n"
  24195. - "# cmd pid ||||| time | caller \n"
  24196. - "# \\ / ||||| \\ | / \n");
  24197. + seq_puts(m, "# _--------=> CPU# \n"
  24198. + "# / _-------=> irqs-off \n"
  24199. + "# | / _------=> need-resched \n"
  24200. + "# || / _-----=> need-resched_lazy \n"
  24201. + "# ||| / _----=> hardirq/softirq \n"
  24202. + "# |||| / _---=> preempt-depth \n"
  24203. + "# ||||| / _--=> preempt-lazy-depth\n"
  24204. + "# |||||| / _-=> migrate-disable \n"
  24205. + "# ||||||| / delay \n"
  24206. + "# cmd pid |||||||| time | caller \n"
  24207. + "# \\ / |||||||| \\ | / \n");
  24208. }
  24209. static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
  24210. @@ -2591,11 +2598,14 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file
  24211. print_event_info(buf, m);
  24212. seq_puts(m, "# _-----=> irqs-off\n"
  24213. "# / _----=> need-resched\n"
  24214. - "# | / _---=> hardirq/softirq\n"
  24215. - "# || / _--=> preempt-depth\n"
  24216. - "# ||| / delay\n"
  24217. - "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
  24218. - "# | | | |||| | |\n");
  24219. + "# |/ _-----=> need-resched_lazy\n"
  24220. + "# || / _---=> hardirq/softirq\n"
  24221. + "# ||| / _--=> preempt-depth\n"
  24222. + "# |||| /_--=> preempt-lazy-depth\n"
  24223. + "# ||||| _-=> migrate-disable \n"
  24224. + "# ||||| / delay\n"
  24225. + "# TASK-PID CPU# |||||| TIMESTAMP FUNCTION\n"
  24226. + "# | | | |||||| | |\n");
  24227. }
  24228. void
  24229. diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
  24230. index 921691c5cb04..c0f3c568cac8 100644
  24231. --- a/kernel/trace/trace.h
  24232. +++ b/kernel/trace/trace.h
  24233. @@ -120,6 +120,7 @@ struct kretprobe_trace_entry_head {
  24234. * NEED_RESCHED - reschedule is requested
  24235. * HARDIRQ - inside an interrupt handler
  24236. * SOFTIRQ - inside a softirq handler
  24237. + * NEED_RESCHED_LAZY - lazy reschedule is requested
  24238. */
  24239. enum trace_flag_type {
  24240. TRACE_FLAG_IRQS_OFF = 0x01,
  24241. @@ -128,6 +129,7 @@ enum trace_flag_type {
  24242. TRACE_FLAG_HARDIRQ = 0x08,
  24243. TRACE_FLAG_SOFTIRQ = 0x10,
  24244. TRACE_FLAG_PREEMPT_RESCHED = 0x20,
  24245. + TRACE_FLAG_NEED_RESCHED_LAZY = 0x40,
  24246. };
  24247. #define TRACE_BUF_SIZE 1024
  24248. diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
  24249. index 6459f77e2c72..b83d6a4d3912 100644
  24250. --- a/kernel/trace/trace_events.c
  24251. +++ b/kernel/trace/trace_events.c
  24252. @@ -162,6 +162,8 @@ static int trace_define_common_fields(void)
  24253. __common_field(unsigned char, flags);
  24254. __common_field(unsigned char, preempt_count);
  24255. __common_field(int, pid);
  24256. + __common_field(unsigned short, migrate_disable);
  24257. + __common_field(unsigned short, padding);
  24258. return ret;
  24259. }
  24260. @@ -198,6 +200,14 @@ void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer,
  24261. local_save_flags(fbuffer->flags);
  24262. fbuffer->pc = preempt_count();
  24263. + /*
  24264. + * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables
  24265. + * preemption (adding one to the preempt_count). Since we are
  24266. + * interested in the preempt_count at the time the tracepoint was
  24267. + * hit, we need to subtract one to offset the increment.
  24268. + */
  24269. + if (IS_ENABLED(CONFIG_PREEMPT))
  24270. + fbuffer->pc--;
  24271. fbuffer->ftrace_file = ftrace_file;
  24272. fbuffer->event =
  24273. diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
  24274. index 8523ea345f2b..0f2d3e3545e8 100644
  24275. --- a/kernel/trace/trace_irqsoff.c
  24276. +++ b/kernel/trace/trace_irqsoff.c
  24277. @@ -13,6 +13,7 @@
  24278. #include <linux/uaccess.h>
  24279. #include <linux/module.h>
  24280. #include <linux/ftrace.h>
  24281. +#include <trace/events/hist.h>
  24282. #include "trace.h"
  24283. @@ -433,11 +434,13 @@ void start_critical_timings(void)
  24284. {
  24285. if (preempt_trace() || irq_trace())
  24286. start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  24287. + trace_preemptirqsoff_hist_rcuidle(TRACE_START, 1);
  24288. }
  24289. EXPORT_SYMBOL_GPL(start_critical_timings);
  24290. void stop_critical_timings(void)
  24291. {
  24292. + trace_preemptirqsoff_hist_rcuidle(TRACE_STOP, 0);
  24293. if (preempt_trace() || irq_trace())
  24294. stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  24295. }
  24296. @@ -447,6 +450,7 @@ EXPORT_SYMBOL_GPL(stop_critical_timings);
  24297. #ifdef CONFIG_PROVE_LOCKING
  24298. void time_hardirqs_on(unsigned long a0, unsigned long a1)
  24299. {
  24300. + trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0);
  24301. if (!preempt_trace() && irq_trace())
  24302. stop_critical_timing(a0, a1);
  24303. }
  24304. @@ -455,6 +459,7 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1)
  24305. {
  24306. if (!preempt_trace() && irq_trace())
  24307. start_critical_timing(a0, a1);
  24308. + trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1);
  24309. }
  24310. #else /* !CONFIG_PROVE_LOCKING */
  24311. @@ -480,6 +485,7 @@ inline void print_irqtrace_events(struct task_struct *curr)
  24312. */
  24313. void trace_hardirqs_on(void)
  24314. {
  24315. + trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0);
  24316. if (!preempt_trace() && irq_trace())
  24317. stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  24318. }
  24319. @@ -489,11 +495,13 @@ void trace_hardirqs_off(void)
  24320. {
  24321. if (!preempt_trace() && irq_trace())
  24322. start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  24323. + trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1);
  24324. }
  24325. EXPORT_SYMBOL(trace_hardirqs_off);
  24326. __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
  24327. {
  24328. + trace_preemptirqsoff_hist(IRQS_ON, 0);
  24329. if (!preempt_trace() && irq_trace())
  24330. stop_critical_timing(CALLER_ADDR0, caller_addr);
  24331. }
  24332. @@ -503,6 +511,7 @@ __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
  24333. {
  24334. if (!preempt_trace() && irq_trace())
  24335. start_critical_timing(CALLER_ADDR0, caller_addr);
  24336. + trace_preemptirqsoff_hist(IRQS_OFF, 1);
  24337. }
  24338. EXPORT_SYMBOL(trace_hardirqs_off_caller);
  24339. @@ -512,12 +521,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
  24340. #ifdef CONFIG_PREEMPT_TRACER
  24341. void trace_preempt_on(unsigned long a0, unsigned long a1)
  24342. {
  24343. + trace_preemptirqsoff_hist(PREEMPT_ON, 0);
  24344. if (preempt_trace() && !irq_trace())
  24345. stop_critical_timing(a0, a1);
  24346. }
  24347. void trace_preempt_off(unsigned long a0, unsigned long a1)
  24348. {
  24349. + trace_preemptirqsoff_hist(PREEMPT_ON, 1);
  24350. if (preempt_trace() && !irq_trace())
  24351. start_critical_timing(a0, a1);
  24352. }
  24353. diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
  24354. index 25a086bcb700..c86bed27213f 100644
  24355. --- a/kernel/trace/trace_output.c
  24356. +++ b/kernel/trace/trace_output.c
  24357. @@ -430,6 +430,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
  24358. {
  24359. char hardsoft_irq;
  24360. char need_resched;
  24361. + char need_resched_lazy;
  24362. char irqs_off;
  24363. int hardirq;
  24364. int softirq;
  24365. @@ -457,6 +458,8 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
  24366. need_resched = '.';
  24367. break;
  24368. }
  24369. + need_resched_lazy =
  24370. + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
  24371. hardsoft_irq =
  24372. (hardirq && softirq) ? 'H' :
  24373. @@ -464,14 +467,25 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
  24374. softirq ? 's' :
  24375. '.';
  24376. - trace_seq_printf(s, "%c%c%c",
  24377. - irqs_off, need_resched, hardsoft_irq);
  24378. + trace_seq_printf(s, "%c%c%c%c",
  24379. + irqs_off, need_resched, need_resched_lazy,
  24380. + hardsoft_irq);
  24381. if (entry->preempt_count)
  24382. trace_seq_printf(s, "%x", entry->preempt_count);
  24383. else
  24384. trace_seq_putc(s, '.');
  24385. + if (entry->preempt_lazy_count)
  24386. + trace_seq_printf(s, "%x", entry->preempt_lazy_count);
  24387. + else
  24388. + trace_seq_putc(s, '.');
  24389. +
  24390. + if (entry->migrate_disable)
  24391. + trace_seq_printf(s, "%x", entry->migrate_disable);
  24392. + else
  24393. + trace_seq_putc(s, '.');
  24394. +
  24395. return !trace_seq_has_overflowed(s);
  24396. }
  24397. diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
  24398. index 419ca37e72c9..f270088e9929 100644
  24399. --- a/kernel/trace/trace_sched_switch.c
  24400. +++ b/kernel/trace/trace_sched_switch.c
  24401. @@ -26,7 +26,7 @@ probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *n
  24402. }
  24403. static void
  24404. -probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success)
  24405. +probe_sched_wakeup(void *ignore, struct task_struct *wakee)
  24406. {
  24407. if (unlikely(!sched_ref))
  24408. return;
  24409. diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
  24410. index d6e1003724e9..79a2a5f7fc82 100644
  24411. --- a/kernel/trace/trace_sched_wakeup.c
  24412. +++ b/kernel/trace/trace_sched_wakeup.c
  24413. @@ -514,7 +514,7 @@ static void wakeup_reset(struct trace_array *tr)
  24414. }
  24415. static void
  24416. -probe_wakeup(void *ignore, struct task_struct *p, int success)
  24417. +probe_wakeup(void *ignore, struct task_struct *p)
  24418. {
  24419. struct trace_array_cpu *data;
  24420. int cpu = smp_processor_id();
  24421. diff --git a/kernel/user.c b/kernel/user.c
  24422. index b069ccbfb0b0..1a2e88e98b5e 100644
  24423. --- a/kernel/user.c
  24424. +++ b/kernel/user.c
  24425. @@ -161,11 +161,11 @@ void free_uid(struct user_struct *up)
  24426. if (!up)
  24427. return;
  24428. - local_irq_save(flags);
  24429. + local_irq_save_nort(flags);
  24430. if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
  24431. free_user(up, flags);
  24432. else
  24433. - local_irq_restore(flags);
  24434. + local_irq_restore_nort(flags);
  24435. }
  24436. struct user_struct *alloc_uid(kuid_t uid)
  24437. diff --git a/kernel/watchdog.c b/kernel/watchdog.c
  24438. index f89ea713213f..37dd3a5bf53f 100644
  24439. --- a/kernel/watchdog.c
  24440. +++ b/kernel/watchdog.c
  24441. @@ -262,6 +262,8 @@ static int is_softlockup(unsigned long touch_ts)
  24442. #ifdef CONFIG_HARDLOCKUP_DETECTOR
  24443. +static DEFINE_RAW_SPINLOCK(watchdog_output_lock);
  24444. +
  24445. static struct perf_event_attr wd_hw_attr = {
  24446. .type = PERF_TYPE_HARDWARE,
  24447. .config = PERF_COUNT_HW_CPU_CYCLES,
  24448. @@ -295,13 +297,21 @@ static void watchdog_overflow_callback(struct perf_event *event,
  24449. /* only print hardlockups once */
  24450. if (__this_cpu_read(hard_watchdog_warn) == true)
  24451. return;
  24452. + /*
  24453. + * If early-printk is enabled then make sure we do not
  24454. + * lock up in printk() and kill console logging:
  24455. + */
  24456. + printk_kill();
  24457. - if (hardlockup_panic)
  24458. + if (hardlockup_panic) {
  24459. panic("Watchdog detected hard LOCKUP on cpu %d",
  24460. this_cpu);
  24461. - else
  24462. + } else {
  24463. + raw_spin_lock(&watchdog_output_lock);
  24464. WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
  24465. this_cpu);
  24466. + raw_spin_unlock(&watchdog_output_lock);
  24467. + }
  24468. __this_cpu_write(hard_watchdog_warn, true);
  24469. return;
  24470. @@ -444,6 +454,7 @@ static void watchdog_enable(unsigned int cpu)
  24471. /* kick off the timer for the hardlockup detector */
  24472. hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  24473. hrtimer->function = watchdog_timer_fn;
  24474. + hrtimer->irqsafe = 1;
  24475. /* Enable the perf event */
  24476. watchdog_nmi_enable(cpu);
  24477. diff --git a/kernel/workqueue.c b/kernel/workqueue.c
  24478. index d0efe9295a0e..80f3bb082136 100644
  24479. --- a/kernel/workqueue.c
  24480. +++ b/kernel/workqueue.c
  24481. @@ -48,6 +48,8 @@
  24482. #include <linux/nodemask.h>
  24483. #include <linux/moduleparam.h>
  24484. #include <linux/uaccess.h>
  24485. +#include <linux/locallock.h>
  24486. +#include <linux/delay.h>
  24487. #include "workqueue_internal.h"
  24488. @@ -121,11 +123,16 @@ enum {
  24489. * cpu or grabbing pool->lock is enough for read access. If
  24490. * POOL_DISASSOCIATED is set, it's identical to L.
  24491. *
  24492. + * On RT we need the extra protection via rt_lock_idle_list() for
  24493. + * the list manipulations against read access from
  24494. + * wq_worker_sleeping(). All other places are nicely serialized via
  24495. + * pool->lock.
  24496. + *
  24497. * A: pool->attach_mutex protected.
  24498. *
  24499. * PL: wq_pool_mutex protected.
  24500. *
  24501. - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
  24502. + * PR: wq_pool_mutex protected for writes. RCU protected for reads.
  24503. *
  24504. * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
  24505. *
  24506. @@ -134,7 +141,7 @@ enum {
  24507. *
  24508. * WQ: wq->mutex protected.
  24509. *
  24510. - * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
  24511. + * WR: wq->mutex protected for writes. RCU protected for reads.
  24512. *
  24513. * MD: wq_mayday_lock protected.
  24514. */
  24515. @@ -183,7 +190,7 @@ struct worker_pool {
  24516. atomic_t nr_running ____cacheline_aligned_in_smp;
  24517. /*
  24518. - * Destruction of pool is sched-RCU protected to allow dereferences
  24519. + * Destruction of pool is RCU protected to allow dereferences
  24520. * from get_work_pool().
  24521. */
  24522. struct rcu_head rcu;
  24523. @@ -212,7 +219,7 @@ struct pool_workqueue {
  24524. /*
  24525. * Release of unbound pwq is punted to system_wq. See put_pwq()
  24526. * and pwq_unbound_release_workfn() for details. pool_workqueue
  24527. - * itself is also sched-RCU protected so that the first pwq can be
  24528. + * itself is also RCU protected so that the first pwq can be
  24529. * determined without grabbing wq->mutex.
  24530. */
  24531. struct work_struct unbound_release_work;
  24532. @@ -334,6 +341,8 @@ EXPORT_SYMBOL_GPL(system_power_efficient_wq);
  24533. struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
  24534. EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
  24535. +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock);
  24536. +
  24537. static int worker_thread(void *__worker);
  24538. static void copy_workqueue_attrs(struct workqueue_attrs *to,
  24539. const struct workqueue_attrs *from);
  24540. @@ -343,14 +352,14 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  24541. #include <trace/events/workqueue.h>
  24542. #define assert_rcu_or_pool_mutex() \
  24543. - rcu_lockdep_assert(rcu_read_lock_sched_held() || \
  24544. + rcu_lockdep_assert(rcu_read_lock_held() || \
  24545. lockdep_is_held(&wq_pool_mutex), \
  24546. - "sched RCU or wq_pool_mutex should be held")
  24547. + "RCU or wq_pool_mutex should be held")
  24548. #define assert_rcu_or_wq_mutex(wq) \
  24549. - rcu_lockdep_assert(rcu_read_lock_sched_held() || \
  24550. + rcu_lockdep_assert(rcu_read_lock_held() || \
  24551. lockdep_is_held(&wq->mutex), \
  24552. - "sched RCU or wq->mutex should be held")
  24553. + "RCU or wq->mutex should be held")
  24554. #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
  24555. rcu_lockdep_assert(rcu_read_lock_sched_held() || \
  24556. @@ -368,7 +377,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  24557. * @pool: iteration cursor
  24558. * @pi: integer used for iteration
  24559. *
  24560. - * This must be called either with wq_pool_mutex held or sched RCU read
  24561. + * This must be called either with wq_pool_mutex held or RCU read
  24562. * locked. If the pool needs to be used beyond the locking in effect, the
  24563. * caller is responsible for guaranteeing that the pool stays online.
  24564. *
  24565. @@ -400,7 +409,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  24566. * @pwq: iteration cursor
  24567. * @wq: the target workqueue
  24568. *
  24569. - * This must be called either with wq->mutex held or sched RCU read locked.
  24570. + * This must be called either with wq->mutex held or RCU read locked.
  24571. * If the pwq needs to be used beyond the locking in effect, the caller is
  24572. * responsible for guaranteeing that the pwq stays online.
  24573. *
  24574. @@ -412,6 +421,31 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  24575. if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
  24576. else
  24577. +#ifdef CONFIG_PREEMPT_RT_BASE
  24578. +static inline void rt_lock_idle_list(struct worker_pool *pool)
  24579. +{
  24580. + preempt_disable();
  24581. +}
  24582. +static inline void rt_unlock_idle_list(struct worker_pool *pool)
  24583. +{
  24584. + preempt_enable();
  24585. +}
  24586. +static inline void sched_lock_idle_list(struct worker_pool *pool) { }
  24587. +static inline void sched_unlock_idle_list(struct worker_pool *pool) { }
  24588. +#else
  24589. +static inline void rt_lock_idle_list(struct worker_pool *pool) { }
  24590. +static inline void rt_unlock_idle_list(struct worker_pool *pool) { }
  24591. +static inline void sched_lock_idle_list(struct worker_pool *pool)
  24592. +{
  24593. + spin_lock_irq(&pool->lock);
  24594. +}
  24595. +static inline void sched_unlock_idle_list(struct worker_pool *pool)
  24596. +{
  24597. + spin_unlock_irq(&pool->lock);
  24598. +}
  24599. +#endif
  24600. +
  24601. +
  24602. #ifdef CONFIG_DEBUG_OBJECTS_WORK
  24603. static struct debug_obj_descr work_debug_descr;
  24604. @@ -562,8 +596,7 @@ static int worker_pool_assign_id(struct worker_pool *pool)
  24605. * @wq: the target workqueue
  24606. * @node: the node ID
  24607. *
  24608. - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
  24609. - * read locked.
  24610. + * This must be called with any of wq_pool_mutex, wq->mutex or RCU read locked.
  24611. * If the pwq needs to be used beyond the locking in effect, the caller is
  24612. * responsible for guaranteeing that the pwq stays online.
  24613. *
  24614. @@ -706,8 +739,8 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
  24615. * @work: the work item of interest
  24616. *
  24617. * Pools are created and destroyed under wq_pool_mutex, and allows read
  24618. - * access under sched-RCU read lock. As such, this function should be
  24619. - * called under wq_pool_mutex or with preemption disabled.
  24620. + * access under RCU read lock. As such, this function should be
  24621. + * called under wq_pool_mutex or inside of a rcu_read_lock() region.
  24622. *
  24623. * All fields of the returned pool are accessible as long as the above
  24624. * mentioned locking is in effect. If the returned pool needs to be used
  24625. @@ -844,51 +877,44 @@ static struct worker *first_idle_worker(struct worker_pool *pool)
  24626. */
  24627. static void wake_up_worker(struct worker_pool *pool)
  24628. {
  24629. - struct worker *worker = first_idle_worker(pool);
  24630. + struct worker *worker;
  24631. +
  24632. + rt_lock_idle_list(pool);
  24633. +
  24634. + worker = first_idle_worker(pool);
  24635. if (likely(worker))
  24636. wake_up_process(worker->task);
  24637. +
  24638. + rt_unlock_idle_list(pool);
  24639. }
  24640. /**
  24641. - * wq_worker_waking_up - a worker is waking up
  24642. - * @task: task waking up
  24643. - * @cpu: CPU @task is waking up to
  24644. - *
  24645. - * This function is called during try_to_wake_up() when a worker is
  24646. - * being awoken.
  24647. + * wq_worker_running - a worker is running again
  24648. + * @task: task returning from sleep
  24649. *
  24650. - * CONTEXT:
  24651. - * spin_lock_irq(rq->lock)
  24652. + * This function is called when a worker returns from schedule()
  24653. */
  24654. -void wq_worker_waking_up(struct task_struct *task, int cpu)
  24655. +void wq_worker_running(struct task_struct *task)
  24656. {
  24657. struct worker *worker = kthread_data(task);
  24658. - if (!(worker->flags & WORKER_NOT_RUNNING)) {
  24659. - WARN_ON_ONCE(worker->pool->cpu != cpu);
  24660. + if (!worker->sleeping)
  24661. + return;
  24662. + if (!(worker->flags & WORKER_NOT_RUNNING))
  24663. atomic_inc(&worker->pool->nr_running);
  24664. - }
  24665. + worker->sleeping = 0;
  24666. }
  24667. /**
  24668. * wq_worker_sleeping - a worker is going to sleep
  24669. * @task: task going to sleep
  24670. - * @cpu: CPU in question, must be the current CPU number
  24671. - *
  24672. - * This function is called during schedule() when a busy worker is
  24673. - * going to sleep. Worker on the same cpu can be woken up by
  24674. - * returning pointer to its task.
  24675. - *
  24676. - * CONTEXT:
  24677. - * spin_lock_irq(rq->lock)
  24678. - *
  24679. - * Return:
  24680. - * Worker task on @cpu to wake up, %NULL if none.
  24681. + * This function is called from schedule() when a busy worker is
  24682. + * going to sleep.
  24683. */
  24684. -struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
  24685. +void wq_worker_sleeping(struct task_struct *task)
  24686. {
  24687. - struct worker *worker = kthread_data(task), *to_wakeup = NULL;
  24688. + struct worker *worker = kthread_data(task);
  24689. struct worker_pool *pool;
  24690. /*
  24691. @@ -897,29 +923,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
  24692. * checking NOT_RUNNING.
  24693. */
  24694. if (worker->flags & WORKER_NOT_RUNNING)
  24695. - return NULL;
  24696. + return;
  24697. pool = worker->pool;
  24698. - /* this can only happen on the local cpu */
  24699. - if (WARN_ON_ONCE(cpu != raw_smp_processor_id() || pool->cpu != cpu))
  24700. - return NULL;
  24701. + if (WARN_ON_ONCE(worker->sleeping))
  24702. + return;
  24703. +
  24704. + worker->sleeping = 1;
  24705. /*
  24706. * The counterpart of the following dec_and_test, implied mb,
  24707. * worklist not empty test sequence is in insert_work().
  24708. * Please read comment there.
  24709. - *
  24710. - * NOT_RUNNING is clear. This means that we're bound to and
  24711. - * running on the local cpu w/ rq lock held and preemption
  24712. - * disabled, which in turn means that none else could be
  24713. - * manipulating idle_list, so dereferencing idle_list without pool
  24714. - * lock is safe.
  24715. */
  24716. if (atomic_dec_and_test(&pool->nr_running) &&
  24717. - !list_empty(&pool->worklist))
  24718. - to_wakeup = first_idle_worker(pool);
  24719. - return to_wakeup ? to_wakeup->task : NULL;
  24720. + !list_empty(&pool->worklist)) {
  24721. + sched_lock_idle_list(pool);
  24722. + wake_up_worker(pool);
  24723. + sched_unlock_idle_list(pool);
  24724. + }
  24725. }
  24726. /**
  24727. @@ -1113,12 +1136,12 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)
  24728. {
  24729. if (pwq) {
  24730. /*
  24731. - * As both pwqs and pools are sched-RCU protected, the
  24732. + * As both pwqs and pools are RCU protected, the
  24733. * following lock operations are safe.
  24734. */
  24735. - spin_lock_irq(&pwq->pool->lock);
  24736. + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock);
  24737. put_pwq(pwq);
  24738. - spin_unlock_irq(&pwq->pool->lock);
  24739. + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock);
  24740. }
  24741. }
  24742. @@ -1220,7 +1243,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
  24743. struct worker_pool *pool;
  24744. struct pool_workqueue *pwq;
  24745. - local_irq_save(*flags);
  24746. + local_lock_irqsave(pendingb_lock, *flags);
  24747. /* try to steal the timer if it exists */
  24748. if (is_dwork) {
  24749. @@ -1239,6 +1262,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
  24750. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
  24751. return 0;
  24752. + rcu_read_lock();
  24753. /*
  24754. * The queueing is in progress, or it is already queued. Try to
  24755. * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
  24756. @@ -1277,14 +1301,16 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
  24757. set_work_pool_and_keep_pending(work, pool->id);
  24758. spin_unlock(&pool->lock);
  24759. + rcu_read_unlock();
  24760. return 1;
  24761. }
  24762. spin_unlock(&pool->lock);
  24763. fail:
  24764. - local_irq_restore(*flags);
  24765. + rcu_read_unlock();
  24766. + local_unlock_irqrestore(pendingb_lock, *flags);
  24767. if (work_is_canceling(work))
  24768. return -ENOENT;
  24769. - cpu_relax();
  24770. + cpu_chill();
  24771. return -EAGAIN;
  24772. }
  24773. @@ -1353,7 +1379,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
  24774. * queued or lose PENDING. Grabbing PENDING and queueing should
  24775. * happen with IRQ disabled.
  24776. */
  24777. - WARN_ON_ONCE(!irqs_disabled());
  24778. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  24779. debug_work_activate(work);
  24780. @@ -1361,6 +1387,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
  24781. if (unlikely(wq->flags & __WQ_DRAINING) &&
  24782. WARN_ON_ONCE(!is_chained_work(wq)))
  24783. return;
  24784. +
  24785. + rcu_read_lock();
  24786. retry:
  24787. if (req_cpu == WORK_CPU_UNBOUND)
  24788. cpu = raw_smp_processor_id();
  24789. @@ -1417,10 +1445,8 @@ retry:
  24790. /* pwq determined, queue */
  24791. trace_workqueue_queue_work(req_cpu, pwq, work);
  24792. - if (WARN_ON(!list_empty(&work->entry))) {
  24793. - spin_unlock(&pwq->pool->lock);
  24794. - return;
  24795. - }
  24796. + if (WARN_ON(!list_empty(&work->entry)))
  24797. + goto out;
  24798. pwq->nr_in_flight[pwq->work_color]++;
  24799. work_flags = work_color_to_flags(pwq->work_color);
  24800. @@ -1436,7 +1462,9 @@ retry:
  24801. insert_work(pwq, work, worklist, work_flags);
  24802. +out:
  24803. spin_unlock(&pwq->pool->lock);
  24804. + rcu_read_unlock();
  24805. }
  24806. /**
  24807. @@ -1456,14 +1484,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
  24808. bool ret = false;
  24809. unsigned long flags;
  24810. - local_irq_save(flags);
  24811. + local_lock_irqsave(pendingb_lock,flags);
  24812. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
  24813. __queue_work(cpu, wq, work);
  24814. ret = true;
  24815. }
  24816. - local_irq_restore(flags);
  24817. + local_unlock_irqrestore(pendingb_lock, flags);
  24818. return ret;
  24819. }
  24820. EXPORT_SYMBOL(queue_work_on);
  24821. @@ -1530,14 +1558,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
  24822. unsigned long flags;
  24823. /* read the comment in __queue_work() */
  24824. - local_irq_save(flags);
  24825. + local_lock_irqsave(pendingb_lock, flags);
  24826. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
  24827. __queue_delayed_work(cpu, wq, dwork, delay);
  24828. ret = true;
  24829. }
  24830. - local_irq_restore(flags);
  24831. + local_unlock_irqrestore(pendingb_lock, flags);
  24832. return ret;
  24833. }
  24834. EXPORT_SYMBOL(queue_delayed_work_on);
  24835. @@ -1572,7 +1600,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
  24836. if (likely(ret >= 0)) {
  24837. __queue_delayed_work(cpu, wq, dwork, delay);
  24838. - local_irq_restore(flags);
  24839. + local_unlock_irqrestore(pendingb_lock, flags);
  24840. }
  24841. /* -ENOENT from try_to_grab_pending() becomes %true */
  24842. @@ -1605,7 +1633,9 @@ static void worker_enter_idle(struct worker *worker)
  24843. worker->last_active = jiffies;
  24844. /* idle_list is LIFO */
  24845. + rt_lock_idle_list(pool);
  24846. list_add(&worker->entry, &pool->idle_list);
  24847. + rt_unlock_idle_list(pool);
  24848. if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
  24849. mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
  24850. @@ -1638,7 +1668,9 @@ static void worker_leave_idle(struct worker *worker)
  24851. return;
  24852. worker_clr_flags(worker, WORKER_IDLE);
  24853. pool->nr_idle--;
  24854. + rt_lock_idle_list(pool);
  24855. list_del_init(&worker->entry);
  24856. + rt_unlock_idle_list(pool);
  24857. }
  24858. static struct worker *alloc_worker(int node)
  24859. @@ -1806,7 +1838,9 @@ static void destroy_worker(struct worker *worker)
  24860. pool->nr_workers--;
  24861. pool->nr_idle--;
  24862. + rt_lock_idle_list(pool);
  24863. list_del_init(&worker->entry);
  24864. + rt_unlock_idle_list(pool);
  24865. worker->flags |= WORKER_DIE;
  24866. wake_up_process(worker->task);
  24867. }
  24868. @@ -2723,14 +2757,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
  24869. might_sleep();
  24870. - local_irq_disable();
  24871. + rcu_read_lock();
  24872. pool = get_work_pool(work);
  24873. if (!pool) {
  24874. - local_irq_enable();
  24875. + rcu_read_unlock();
  24876. return false;
  24877. }
  24878. - spin_lock(&pool->lock);
  24879. + spin_lock_irq(&pool->lock);
  24880. /* see the comment in try_to_grab_pending() with the same code */
  24881. pwq = get_work_pwq(work);
  24882. if (pwq) {
  24883. @@ -2757,10 +2791,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
  24884. else
  24885. lock_map_acquire_read(&pwq->wq->lockdep_map);
  24886. lock_map_release(&pwq->wq->lockdep_map);
  24887. -
  24888. + rcu_read_unlock();
  24889. return true;
  24890. already_gone:
  24891. spin_unlock_irq(&pool->lock);
  24892. + rcu_read_unlock();
  24893. return false;
  24894. }
  24895. @@ -2847,7 +2882,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
  24896. /* tell other tasks trying to grab @work to back off */
  24897. mark_work_canceling(work);
  24898. - local_irq_restore(flags);
  24899. + local_unlock_irqrestore(pendingb_lock, flags);
  24900. flush_work(work);
  24901. clear_work_data(work);
  24902. @@ -2902,10 +2937,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync);
  24903. */
  24904. bool flush_delayed_work(struct delayed_work *dwork)
  24905. {
  24906. - local_irq_disable();
  24907. + local_lock_irq(pendingb_lock);
  24908. if (del_timer_sync(&dwork->timer))
  24909. __queue_work(dwork->cpu, dwork->wq, &dwork->work);
  24910. - local_irq_enable();
  24911. + local_unlock_irq(pendingb_lock);
  24912. return flush_work(&dwork->work);
  24913. }
  24914. EXPORT_SYMBOL(flush_delayed_work);
  24915. @@ -2940,7 +2975,7 @@ bool cancel_delayed_work(struct delayed_work *dwork)
  24916. set_work_pool_and_clear_pending(&dwork->work,
  24917. get_work_pool_id(&dwork->work));
  24918. - local_irq_restore(flags);
  24919. + local_unlock_irqrestore(pendingb_lock, flags);
  24920. return ret;
  24921. }
  24922. EXPORT_SYMBOL(cancel_delayed_work);
  24923. @@ -3198,7 +3233,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
  24924. * put_unbound_pool - put a worker_pool
  24925. * @pool: worker_pool to put
  24926. *
  24927. - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
  24928. + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
  24929. * safe manner. get_unbound_pool() calls this function on its failure path
  24930. * and this function should be able to release pools which went through,
  24931. * successfully or not, init_worker_pool().
  24932. @@ -3252,8 +3287,8 @@ static void put_unbound_pool(struct worker_pool *pool)
  24933. del_timer_sync(&pool->idle_timer);
  24934. del_timer_sync(&pool->mayday_timer);
  24935. - /* sched-RCU protected to allow dereferences from get_work_pool() */
  24936. - call_rcu_sched(&pool->rcu, rcu_free_pool);
  24937. + /* RCU protected to allow dereferences from get_work_pool() */
  24938. + call_rcu(&pool->rcu, rcu_free_pool);
  24939. }
  24940. /**
  24941. @@ -3358,14 +3393,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
  24942. put_unbound_pool(pool);
  24943. mutex_unlock(&wq_pool_mutex);
  24944. - call_rcu_sched(&pwq->rcu, rcu_free_pwq);
  24945. + call_rcu(&pwq->rcu, rcu_free_pwq);
  24946. /*
  24947. * If we're the last pwq going away, @wq is already dead and no one
  24948. * is gonna access it anymore. Schedule RCU free.
  24949. */
  24950. if (is_last)
  24951. - call_rcu_sched(&wq->rcu, rcu_free_wq);
  24952. + call_rcu(&wq->rcu, rcu_free_wq);
  24953. }
  24954. /**
  24955. @@ -4003,7 +4038,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
  24956. * The base ref is never dropped on per-cpu pwqs. Directly
  24957. * schedule RCU free.
  24958. */
  24959. - call_rcu_sched(&wq->rcu, rcu_free_wq);
  24960. + call_rcu(&wq->rcu, rcu_free_wq);
  24961. } else {
  24962. /*
  24963. * We're the sole accessor of @wq at this point. Directly
  24964. @@ -4096,7 +4131,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
  24965. struct pool_workqueue *pwq;
  24966. bool ret;
  24967. - rcu_read_lock_sched();
  24968. + rcu_read_lock();
  24969. + preempt_disable();
  24970. if (cpu == WORK_CPU_UNBOUND)
  24971. cpu = smp_processor_id();
  24972. @@ -4107,7 +4143,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
  24973. pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
  24974. ret = !list_empty(&pwq->delayed_works);
  24975. - rcu_read_unlock_sched();
  24976. + preempt_enable();
  24977. + rcu_read_unlock();
  24978. return ret;
  24979. }
  24980. @@ -4133,15 +4170,15 @@ unsigned int work_busy(struct work_struct *work)
  24981. if (work_pending(work))
  24982. ret |= WORK_BUSY_PENDING;
  24983. - local_irq_save(flags);
  24984. + rcu_read_lock();
  24985. pool = get_work_pool(work);
  24986. if (pool) {
  24987. - spin_lock(&pool->lock);
  24988. + spin_lock_irqsave(&pool->lock, flags);
  24989. if (find_worker_executing_work(pool, work))
  24990. ret |= WORK_BUSY_RUNNING;
  24991. - spin_unlock(&pool->lock);
  24992. + spin_unlock_irqrestore(&pool->lock, flags);
  24993. }
  24994. - local_irq_restore(flags);
  24995. + rcu_read_unlock();
  24996. return ret;
  24997. }
  24998. @@ -4330,7 +4367,7 @@ void show_workqueue_state(void)
  24999. unsigned long flags;
  25000. int pi;
  25001. - rcu_read_lock_sched();
  25002. + rcu_read_lock();
  25003. pr_info("Showing busy workqueues and worker pools:\n");
  25004. @@ -4381,7 +4418,7 @@ void show_workqueue_state(void)
  25005. spin_unlock_irqrestore(&pool->lock, flags);
  25006. }
  25007. - rcu_read_unlock_sched();
  25008. + rcu_read_unlock();
  25009. }
  25010. /*
  25011. @@ -4742,16 +4779,16 @@ bool freeze_workqueues_busy(void)
  25012. * nr_active is monotonically decreasing. It's safe
  25013. * to peek without lock.
  25014. */
  25015. - rcu_read_lock_sched();
  25016. + rcu_read_lock();
  25017. for_each_pwq(pwq, wq) {
  25018. WARN_ON_ONCE(pwq->nr_active < 0);
  25019. if (pwq->nr_active) {
  25020. busy = true;
  25021. - rcu_read_unlock_sched();
  25022. + rcu_read_unlock();
  25023. goto out_unlock;
  25024. }
  25025. }
  25026. - rcu_read_unlock_sched();
  25027. + rcu_read_unlock();
  25028. }
  25029. out_unlock:
  25030. mutex_unlock(&wq_pool_mutex);
  25031. @@ -4865,7 +4902,8 @@ static ssize_t wq_pool_ids_show(struct device *dev,
  25032. const char *delim = "";
  25033. int node, written = 0;
  25034. - rcu_read_lock_sched();
  25035. + get_online_cpus();
  25036. + rcu_read_lock();
  25037. for_each_node(node) {
  25038. written += scnprintf(buf + written, PAGE_SIZE - written,
  25039. "%s%d:%d", delim, node,
  25040. @@ -4873,7 +4911,8 @@ static ssize_t wq_pool_ids_show(struct device *dev,
  25041. delim = " ";
  25042. }
  25043. written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
  25044. - rcu_read_unlock_sched();
  25045. + rcu_read_unlock();
  25046. + put_online_cpus();
  25047. return written;
  25048. }
  25049. diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
  25050. index 45215870ac6c..f000c4d6917e 100644
  25051. --- a/kernel/workqueue_internal.h
  25052. +++ b/kernel/workqueue_internal.h
  25053. @@ -43,6 +43,7 @@ struct worker {
  25054. unsigned long last_active; /* L: last active timestamp */
  25055. unsigned int flags; /* X: flags */
  25056. int id; /* I: worker id */
  25057. + int sleeping; /* None */
  25058. /*
  25059. * Opaque string set with work_set_desc(). Printed out with task
  25060. @@ -68,7 +69,7 @@ static inline struct worker *current_wq_worker(void)
  25061. * Scheduler hooks for concurrency managed workqueue. Only to be used from
  25062. * sched/core.c and workqueue.c.
  25063. */
  25064. -void wq_worker_waking_up(struct task_struct *task, int cpu);
  25065. -struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu);
  25066. +void wq_worker_running(struct task_struct *task);
  25067. +void wq_worker_sleeping(struct task_struct *task);
  25068. #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
  25069. diff --git a/lib/Kconfig b/lib/Kconfig
  25070. index 601965a948e8..8689649d5038 100644
  25071. --- a/lib/Kconfig
  25072. +++ b/lib/Kconfig
  25073. @@ -391,6 +391,7 @@ config CHECK_SIGNATURE
  25074. config CPUMASK_OFFSTACK
  25075. bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
  25076. + depends on !PREEMPT_RT_FULL
  25077. help
  25078. Use dynamic allocation for cpumask_var_t, instead of putting
  25079. them on the stack. This is a bit more expensive, but avoids
  25080. diff --git a/lib/debugobjects.c b/lib/debugobjects.c
  25081. index 547f7f923dbc..8fcdbc2fc6d0 100644
  25082. --- a/lib/debugobjects.c
  25083. +++ b/lib/debugobjects.c
  25084. @@ -309,7 +309,10 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
  25085. struct debug_obj *obj;
  25086. unsigned long flags;
  25087. - fill_pool();
  25088. +#ifdef CONFIG_PREEMPT_RT_FULL
  25089. + if (preempt_count() == 0 && !irqs_disabled())
  25090. +#endif
  25091. + fill_pool();
  25092. db = get_bucket((unsigned long) addr);
  25093. diff --git a/lib/dump_stack.c b/lib/dump_stack.c
  25094. index c30d07e99dba..6f2484330b50 100644
  25095. --- a/lib/dump_stack.c
  25096. +++ b/lib/dump_stack.c
  25097. @@ -8,6 +8,7 @@
  25098. #include <linux/sched.h>
  25099. #include <linux/smp.h>
  25100. #include <linux/atomic.h>
  25101. +#include <linux/locallock.h>
  25102. static void __dump_stack(void)
  25103. {
  25104. diff --git a/lib/idr.c b/lib/idr.c
  25105. index 5335c43adf46..d0681a357e69 100644
  25106. --- a/lib/idr.c
  25107. +++ b/lib/idr.c
  25108. @@ -30,6 +30,7 @@
  25109. #include <linux/idr.h>
  25110. #include <linux/spinlock.h>
  25111. #include <linux/percpu.h>
  25112. +#include <linux/locallock.h>
  25113. #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1)
  25114. #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
  25115. @@ -366,6 +367,35 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id,
  25116. idr_mark_full(pa, id);
  25117. }
  25118. +#ifdef CONFIG_PREEMPT_RT_FULL
  25119. +static DEFINE_LOCAL_IRQ_LOCK(idr_lock);
  25120. +
  25121. +static inline void idr_preload_lock(void)
  25122. +{
  25123. + local_lock(idr_lock);
  25124. +}
  25125. +
  25126. +static inline void idr_preload_unlock(void)
  25127. +{
  25128. + local_unlock(idr_lock);
  25129. +}
  25130. +
  25131. +void idr_preload_end(void)
  25132. +{
  25133. + idr_preload_unlock();
  25134. +}
  25135. +EXPORT_SYMBOL(idr_preload_end);
  25136. +#else
  25137. +static inline void idr_preload_lock(void)
  25138. +{
  25139. + preempt_disable();
  25140. +}
  25141. +
  25142. +static inline void idr_preload_unlock(void)
  25143. +{
  25144. + preempt_enable();
  25145. +}
  25146. +#endif
  25147. /**
  25148. * idr_preload - preload for idr_alloc()
  25149. @@ -401,7 +431,7 @@ void idr_preload(gfp_t gfp_mask)
  25150. WARN_ON_ONCE(in_interrupt());
  25151. might_sleep_if(gfp_mask & __GFP_WAIT);
  25152. - preempt_disable();
  25153. + idr_preload_lock();
  25154. /*
  25155. * idr_alloc() is likely to succeed w/o full idr_layer buffer and
  25156. @@ -413,9 +443,9 @@ void idr_preload(gfp_t gfp_mask)
  25157. while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
  25158. struct idr_layer *new;
  25159. - preempt_enable();
  25160. + idr_preload_unlock();
  25161. new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
  25162. - preempt_disable();
  25163. + idr_preload_lock();
  25164. if (!new)
  25165. break;
  25166. diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
  25167. index 872a15a2a637..b93a6103fa4d 100644
  25168. --- a/lib/locking-selftest.c
  25169. +++ b/lib/locking-selftest.c
  25170. @@ -590,6 +590,8 @@ GENERATE_TESTCASE(init_held_rsem)
  25171. #include "locking-selftest-spin-hardirq.h"
  25172. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
  25173. +#ifndef CONFIG_PREEMPT_RT_FULL
  25174. +
  25175. #include "locking-selftest-rlock-hardirq.h"
  25176. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
  25177. @@ -605,9 +607,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock)
  25178. #include "locking-selftest-wlock-softirq.h"
  25179. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
  25180. +#endif
  25181. +
  25182. #undef E1
  25183. #undef E2
  25184. +#ifndef CONFIG_PREEMPT_RT_FULL
  25185. /*
  25186. * Enabling hardirqs with a softirq-safe lock held:
  25187. */
  25188. @@ -640,6 +645,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
  25189. #undef E1
  25190. #undef E2
  25191. +#endif
  25192. +
  25193. /*
  25194. * Enabling irqs with an irq-safe lock held:
  25195. */
  25196. @@ -663,6 +670,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
  25197. #include "locking-selftest-spin-hardirq.h"
  25198. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
  25199. +#ifndef CONFIG_PREEMPT_RT_FULL
  25200. +
  25201. #include "locking-selftest-rlock-hardirq.h"
  25202. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
  25203. @@ -678,6 +687,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock)
  25204. #include "locking-selftest-wlock-softirq.h"
  25205. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
  25206. +#endif
  25207. +
  25208. #undef E1
  25209. #undef E2
  25210. @@ -709,6 +720,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
  25211. #include "locking-selftest-spin-hardirq.h"
  25212. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
  25213. +#ifndef CONFIG_PREEMPT_RT_FULL
  25214. +
  25215. #include "locking-selftest-rlock-hardirq.h"
  25216. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
  25217. @@ -724,6 +737,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock)
  25218. #include "locking-selftest-wlock-softirq.h"
  25219. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
  25220. +#endif
  25221. +
  25222. #undef E1
  25223. #undef E2
  25224. #undef E3
  25225. @@ -757,6 +772,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
  25226. #include "locking-selftest-spin-hardirq.h"
  25227. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
  25228. +#ifndef CONFIG_PREEMPT_RT_FULL
  25229. +
  25230. #include "locking-selftest-rlock-hardirq.h"
  25231. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
  25232. @@ -772,10 +789,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock)
  25233. #include "locking-selftest-wlock-softirq.h"
  25234. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
  25235. +#endif
  25236. +
  25237. #undef E1
  25238. #undef E2
  25239. #undef E3
  25240. +#ifndef CONFIG_PREEMPT_RT_FULL
  25241. +
  25242. /*
  25243. * read-lock / write-lock irq inversion.
  25244. *
  25245. @@ -838,6 +859,10 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
  25246. #undef E2
  25247. #undef E3
  25248. +#endif
  25249. +
  25250. +#ifndef CONFIG_PREEMPT_RT_FULL
  25251. +
  25252. /*
  25253. * read-lock / write-lock recursion that is actually safe.
  25254. */
  25255. @@ -876,6 +901,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
  25256. #undef E2
  25257. #undef E3
  25258. +#endif
  25259. +
  25260. /*
  25261. * read-lock / write-lock recursion that is unsafe.
  25262. */
  25263. @@ -1858,6 +1885,7 @@ void locking_selftest(void)
  25264. printk(" --------------------------------------------------------------------------\n");
  25265. +#ifndef CONFIG_PREEMPT_RT_FULL
  25266. /*
  25267. * irq-context testcases:
  25268. */
  25269. @@ -1870,6 +1898,28 @@ void locking_selftest(void)
  25270. DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
  25271. // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
  25272. +#else
  25273. + /* On -rt, we only do hardirq context test for raw spinlock */
  25274. + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12);
  25275. + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21);
  25276. +
  25277. + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12);
  25278. + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21);
  25279. +
  25280. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123);
  25281. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132);
  25282. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213);
  25283. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231);
  25284. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312);
  25285. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321);
  25286. +
  25287. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123);
  25288. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132);
  25289. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213);
  25290. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231);
  25291. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312);
  25292. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321);
  25293. +#endif
  25294. ww_tests();
  25295. diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
  25296. index f75715131f20..b1529f40865d 100644
  25297. --- a/lib/percpu_ida.c
  25298. +++ b/lib/percpu_ida.c
  25299. @@ -26,6 +26,9 @@
  25300. #include <linux/string.h>
  25301. #include <linux/spinlock.h>
  25302. #include <linux/percpu_ida.h>
  25303. +#include <linux/locallock.h>
  25304. +
  25305. +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock);
  25306. struct percpu_ida_cpu {
  25307. /*
  25308. @@ -148,13 +151,13 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
  25309. unsigned long flags;
  25310. int tag;
  25311. - local_irq_save(flags);
  25312. + local_lock_irqsave(irq_off_lock, flags);
  25313. tags = this_cpu_ptr(pool->tag_cpu);
  25314. /* Fastpath */
  25315. tag = alloc_local_tag(tags);
  25316. if (likely(tag >= 0)) {
  25317. - local_irq_restore(flags);
  25318. + local_unlock_irqrestore(irq_off_lock, flags);
  25319. return tag;
  25320. }
  25321. @@ -173,6 +176,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
  25322. if (!tags->nr_free)
  25323. alloc_global_tags(pool, tags);
  25324. +
  25325. if (!tags->nr_free)
  25326. steal_tags(pool, tags);
  25327. @@ -184,7 +188,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
  25328. }
  25329. spin_unlock(&pool->lock);
  25330. - local_irq_restore(flags);
  25331. + local_unlock_irqrestore(irq_off_lock, flags);
  25332. if (tag >= 0 || state == TASK_RUNNING)
  25333. break;
  25334. @@ -196,7 +200,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
  25335. schedule();
  25336. - local_irq_save(flags);
  25337. + local_lock_irqsave(irq_off_lock, flags);
  25338. tags = this_cpu_ptr(pool->tag_cpu);
  25339. }
  25340. if (state != TASK_RUNNING)
  25341. @@ -221,7 +225,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
  25342. BUG_ON(tag >= pool->nr_tags);
  25343. - local_irq_save(flags);
  25344. + local_lock_irqsave(irq_off_lock, flags);
  25345. tags = this_cpu_ptr(pool->tag_cpu);
  25346. spin_lock(&tags->lock);
  25347. @@ -253,7 +257,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
  25348. spin_unlock(&pool->lock);
  25349. }
  25350. - local_irq_restore(flags);
  25351. + local_unlock_irqrestore(irq_off_lock, flags);
  25352. }
  25353. EXPORT_SYMBOL_GPL(percpu_ida_free);
  25354. @@ -345,7 +349,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
  25355. struct percpu_ida_cpu *remote;
  25356. unsigned cpu, i, err = 0;
  25357. - local_irq_save(flags);
  25358. + local_lock_irqsave(irq_off_lock, flags);
  25359. for_each_possible_cpu(cpu) {
  25360. remote = per_cpu_ptr(pool->tag_cpu, cpu);
  25361. spin_lock(&remote->lock);
  25362. @@ -367,7 +371,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
  25363. }
  25364. spin_unlock(&pool->lock);
  25365. out:
  25366. - local_irq_restore(flags);
  25367. + local_unlock_irqrestore(irq_off_lock, flags);
  25368. return err;
  25369. }
  25370. EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
  25371. diff --git a/lib/radix-tree.c b/lib/radix-tree.c
  25372. index 8399002aa0f0..19713243e698 100644
  25373. --- a/lib/radix-tree.c
  25374. +++ b/lib/radix-tree.c
  25375. @@ -195,12 +195,13 @@ radix_tree_node_alloc(struct radix_tree_root *root)
  25376. * succeed in getting a node here (and never reach
  25377. * kmem_cache_alloc)
  25378. */
  25379. - rtp = this_cpu_ptr(&radix_tree_preloads);
  25380. + rtp = &get_cpu_var(radix_tree_preloads);
  25381. if (rtp->nr) {
  25382. ret = rtp->nodes[rtp->nr - 1];
  25383. rtp->nodes[rtp->nr - 1] = NULL;
  25384. rtp->nr--;
  25385. }
  25386. + put_cpu_var(radix_tree_preloads);
  25387. /*
  25388. * Update the allocation stack trace as this is more useful
  25389. * for debugging.
  25390. @@ -240,6 +241,7 @@ radix_tree_node_free(struct radix_tree_node *node)
  25391. call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
  25392. }
  25393. +#ifndef CONFIG_PREEMPT_RT_FULL
  25394. /*
  25395. * Load up this CPU's radix_tree_node buffer with sufficient objects to
  25396. * ensure that the addition of a single element in the tree cannot fail. On
  25397. @@ -305,6 +307,7 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
  25398. return 0;
  25399. }
  25400. EXPORT_SYMBOL(radix_tree_maybe_preload);
  25401. +#endif
  25402. /*
  25403. * Return the maximum key which can be store into a
  25404. diff --git a/lib/scatterlist.c b/lib/scatterlist.c
  25405. index c9f2e8c6ccc9..f6d1f8899dca 100644
  25406. --- a/lib/scatterlist.c
  25407. +++ b/lib/scatterlist.c
  25408. @@ -592,7 +592,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
  25409. flush_kernel_dcache_page(miter->page);
  25410. if (miter->__flags & SG_MITER_ATOMIC) {
  25411. - WARN_ON_ONCE(preemptible());
  25412. + WARN_ON_ONCE(!pagefault_disabled());
  25413. kunmap_atomic(miter->addr);
  25414. } else
  25415. kunmap(miter->page);
  25416. @@ -637,7 +637,7 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
  25417. if (!sg_miter_skip(&miter, skip))
  25418. return false;
  25419. - local_irq_save(flags);
  25420. + local_irq_save_nort(flags);
  25421. while (sg_miter_next(&miter) && offset < buflen) {
  25422. unsigned int len;
  25423. @@ -654,7 +654,7 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
  25424. sg_miter_stop(&miter);
  25425. - local_irq_restore(flags);
  25426. + local_irq_restore_nort(flags);
  25427. return offset;
  25428. }
  25429. diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
  25430. index 1afec32de6f2..11fa431046a8 100644
  25431. --- a/lib/smp_processor_id.c
  25432. +++ b/lib/smp_processor_id.c
  25433. @@ -39,8 +39,9 @@ notrace static unsigned int check_preemption_disabled(const char *what1,
  25434. if (!printk_ratelimit())
  25435. goto out_enable;
  25436. - printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n",
  25437. - what1, what2, preempt_count() - 1, current->comm, current->pid);
  25438. + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x %08x] code: %s/%d\n",
  25439. + what1, what2, preempt_count() - 1, __migrate_disabled(current),
  25440. + current->comm, current->pid);
  25441. print_symbol("caller is %s\n", (long)__builtin_return_address(0));
  25442. dump_stack();
  25443. diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
  25444. index fe9a32591c24..3a5f2b366d84 100644
  25445. --- a/lib/strnlen_user.c
  25446. +++ b/lib/strnlen_user.c
  25447. @@ -85,7 +85,8 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
  25448. * @str: The string to measure.
  25449. * @count: Maximum count (including NUL character)
  25450. *
  25451. - * Context: User context only. This function may sleep.
  25452. + * Context: User context only. This function may sleep if pagefaults are
  25453. + * enabled.
  25454. *
  25455. * Get the size of a NUL-terminated string in user space.
  25456. *
  25457. @@ -121,7 +122,8 @@ EXPORT_SYMBOL(strnlen_user);
  25458. * strlen_user: - Get the size of a user string INCLUDING final NUL.
  25459. * @str: The string to measure.
  25460. *
  25461. - * Context: User context only. This function may sleep.
  25462. + * Context: User context only. This function may sleep if pagefaults are
  25463. + * enabled.
  25464. *
  25465. * Get the size of a NUL-terminated string in user space.
  25466. *
  25467. diff --git a/mm/Kconfig b/mm/Kconfig
  25468. index 390214da4546..0cc45370563a 100644
  25469. --- a/mm/Kconfig
  25470. +++ b/mm/Kconfig
  25471. @@ -409,7 +409,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
  25472. config TRANSPARENT_HUGEPAGE
  25473. bool "Transparent Hugepage Support"
  25474. - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
  25475. + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
  25476. select COMPACTION
  25477. help
  25478. Transparent Hugepages allows the kernel to use huge pages and
  25479. diff --git a/mm/compaction.c b/mm/compaction.c
  25480. index f93ada7403bf..1504b589905e 100644
  25481. --- a/mm/compaction.c
  25482. +++ b/mm/compaction.c
  25483. @@ -1423,10 +1423,12 @@ check_drain:
  25484. cc->migrate_pfn & ~((1UL << cc->order) - 1);
  25485. if (last_migrated_pfn < current_block_start) {
  25486. - cpu = get_cpu();
  25487. + cpu = get_cpu_light();
  25488. + local_lock_irq(swapvec_lock);
  25489. lru_add_drain_cpu(cpu);
  25490. + local_unlock_irq(swapvec_lock);
  25491. drain_local_pages(zone);
  25492. - put_cpu();
  25493. + put_cpu_light();
  25494. /* No more flushing until we migrate again */
  25495. last_migrated_pfn = 0;
  25496. }
  25497. diff --git a/mm/filemap.c b/mm/filemap.c
  25498. index 1ffef05f1c1f..7d4fa2bf6ac2 100644
  25499. --- a/mm/filemap.c
  25500. +++ b/mm/filemap.c
  25501. @@ -167,7 +167,9 @@ static void page_cache_tree_delete(struct address_space *mapping,
  25502. if (!workingset_node_pages(node) &&
  25503. list_empty(&node->private_list)) {
  25504. node->private_data = mapping;
  25505. - list_lru_add(&workingset_shadow_nodes, &node->private_list);
  25506. + local_lock(workingset_shadow_lock);
  25507. + list_lru_add(&__workingset_shadow_nodes, &node->private_list);
  25508. + local_unlock(workingset_shadow_lock);
  25509. }
  25510. }
  25511. @@ -533,9 +535,12 @@ static int page_cache_tree_insert(struct address_space *mapping,
  25512. * node->private_list is protected by
  25513. * mapping->tree_lock.
  25514. */
  25515. - if (!list_empty(&node->private_list))
  25516. - list_lru_del(&workingset_shadow_nodes,
  25517. + if (!list_empty(&node->private_list)) {
  25518. + local_lock(workingset_shadow_lock);
  25519. + list_lru_del(&__workingset_shadow_nodes,
  25520. &node->private_list);
  25521. + local_unlock(workingset_shadow_lock);
  25522. + }
  25523. }
  25524. return 0;
  25525. }
  25526. diff --git a/mm/highmem.c b/mm/highmem.c
  25527. index 123bcd3ed4f2..16e8cf26d38a 100644
  25528. --- a/mm/highmem.c
  25529. +++ b/mm/highmem.c
  25530. @@ -29,10 +29,11 @@
  25531. #include <linux/kgdb.h>
  25532. #include <asm/tlbflush.h>
  25533. -
  25534. +#ifndef CONFIG_PREEMPT_RT_FULL
  25535. #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
  25536. DEFINE_PER_CPU(int, __kmap_atomic_idx);
  25537. #endif
  25538. +#endif
  25539. /*
  25540. * Virtual_count is not a pure "count".
  25541. @@ -107,8 +108,9 @@ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color)
  25542. unsigned long totalhigh_pages __read_mostly;
  25543. EXPORT_SYMBOL(totalhigh_pages);
  25544. -
  25545. +#ifndef CONFIG_PREEMPT_RT_FULL
  25546. EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
  25547. +#endif
  25548. unsigned int nr_free_highpages (void)
  25549. {
  25550. diff --git a/mm/memcontrol.c b/mm/memcontrol.c
  25551. index 221762e24a68..cefa875a4320 100644
  25552. --- a/mm/memcontrol.c
  25553. +++ b/mm/memcontrol.c
  25554. @@ -66,6 +66,8 @@
  25555. #include <net/sock.h>
  25556. #include <net/ip.h>
  25557. #include <net/tcp_memcontrol.h>
  25558. +#include <linux/locallock.h>
  25559. +
  25560. #include "slab.h"
  25561. #include <asm/uaccess.h>
  25562. @@ -85,6 +87,7 @@ int do_swap_account __read_mostly;
  25563. #define do_swap_account 0
  25564. #endif
  25565. +static DEFINE_LOCAL_IRQ_LOCK(event_lock);
  25566. static const char * const mem_cgroup_stat_names[] = {
  25567. "cache",
  25568. "rss",
  25569. @@ -2124,14 +2127,17 @@ static void drain_local_stock(struct work_struct *dummy)
  25570. */
  25571. static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
  25572. {
  25573. - struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
  25574. + struct memcg_stock_pcp *stock;
  25575. + int cpu = get_cpu_light();
  25576. +
  25577. + stock = &per_cpu(memcg_stock, cpu);
  25578. if (stock->cached != memcg) { /* reset if necessary */
  25579. drain_stock(stock);
  25580. stock->cached = memcg;
  25581. }
  25582. stock->nr_pages += nr_pages;
  25583. - put_cpu_var(memcg_stock);
  25584. + put_cpu_light();
  25585. }
  25586. /*
  25587. @@ -2147,7 +2153,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
  25588. return;
  25589. /* Notify other cpus that system-wide "drain" is running */
  25590. get_online_cpus();
  25591. - curcpu = get_cpu();
  25592. + curcpu = get_cpu_light();
  25593. for_each_online_cpu(cpu) {
  25594. struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
  25595. struct mem_cgroup *memcg;
  25596. @@ -2164,7 +2170,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
  25597. schedule_work_on(cpu, &stock->work);
  25598. }
  25599. }
  25600. - put_cpu();
  25601. + put_cpu_light();
  25602. put_online_cpus();
  25603. mutex_unlock(&percpu_charge_mutex);
  25604. }
  25605. @@ -4803,12 +4809,12 @@ static int mem_cgroup_move_account(struct page *page,
  25606. ret = 0;
  25607. - local_irq_disable();
  25608. + local_lock_irq(event_lock);
  25609. mem_cgroup_charge_statistics(to, page, nr_pages);
  25610. memcg_check_events(to, page);
  25611. mem_cgroup_charge_statistics(from, page, -nr_pages);
  25612. memcg_check_events(from, page);
  25613. - local_irq_enable();
  25614. + local_unlock_irq(event_lock);
  25615. out_unlock:
  25616. unlock_page(page);
  25617. out:
  25618. @@ -5551,10 +5557,10 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
  25619. VM_BUG_ON_PAGE(!PageTransHuge(page), page);
  25620. }
  25621. - local_irq_disable();
  25622. + local_lock_irq(event_lock);
  25623. mem_cgroup_charge_statistics(memcg, page, nr_pages);
  25624. memcg_check_events(memcg, page);
  25625. - local_irq_enable();
  25626. + local_unlock_irq(event_lock);
  25627. if (do_swap_account && PageSwapCache(page)) {
  25628. swp_entry_t entry = { .val = page_private(page) };
  25629. @@ -5610,14 +5616,14 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
  25630. memcg_oom_recover(memcg);
  25631. }
  25632. - local_irq_save(flags);
  25633. + local_lock_irqsave(event_lock, flags);
  25634. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
  25635. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
  25636. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
  25637. __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
  25638. __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
  25639. memcg_check_events(memcg, dummy_page);
  25640. - local_irq_restore(flags);
  25641. + local_unlock_irqrestore(event_lock, flags);
  25642. if (!mem_cgroup_is_root(memcg))
  25643. css_put_many(&memcg->css, nr_pages);
  25644. @@ -5821,6 +5827,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
  25645. {
  25646. struct mem_cgroup *memcg;
  25647. unsigned short oldid;
  25648. + unsigned long flags;
  25649. VM_BUG_ON_PAGE(PageLRU(page), page);
  25650. VM_BUG_ON_PAGE(page_count(page), page);
  25651. @@ -5843,9 +5850,11 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
  25652. if (!mem_cgroup_is_root(memcg))
  25653. page_counter_uncharge(&memcg->memory, 1);
  25654. + local_lock_irqsave(event_lock, flags);
  25655. /* Caller disabled preemption with mapping->tree_lock */
  25656. mem_cgroup_charge_statistics(memcg, page, -1);
  25657. memcg_check_events(memcg, page);
  25658. + local_unlock_irqrestore(event_lock, flags);
  25659. }
  25660. /**
  25661. diff --git a/mm/memory.c b/mm/memory.c
  25662. index 701d9ad45c46..3456e24cce4f 100644
  25663. --- a/mm/memory.c
  25664. +++ b/mm/memory.c
  25665. @@ -3753,7 +3753,7 @@ void print_vma_addr(char *prefix, unsigned long ip)
  25666. }
  25667. #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)
  25668. -void might_fault(void)
  25669. +void __might_fault(const char *file, int line)
  25670. {
  25671. /*
  25672. * Some code (nfs/sunrpc) uses socket ops on kernel memory while
  25673. @@ -3763,21 +3763,15 @@ void might_fault(void)
  25674. */
  25675. if (segment_eq(get_fs(), KERNEL_DS))
  25676. return;
  25677. -
  25678. - /*
  25679. - * it would be nicer only to annotate paths which are not under
  25680. - * pagefault_disable, however that requires a larger audit and
  25681. - * providing helpers like get_user_atomic.
  25682. - */
  25683. - if (in_atomic())
  25684. + if (pagefault_disabled())
  25685. return;
  25686. -
  25687. - __might_sleep(__FILE__, __LINE__, 0);
  25688. -
  25689. + __might_sleep(file, line, 0);
  25690. +#if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
  25691. if (current->mm)
  25692. might_lock_read(&current->mm->mmap_sem);
  25693. +#endif
  25694. }
  25695. -EXPORT_SYMBOL(might_fault);
  25696. +EXPORT_SYMBOL(__might_fault);
  25697. #endif
  25698. #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
  25699. diff --git a/mm/mmu_context.c b/mm/mmu_context.c
  25700. index f802c2d216a7..b1b6f238e42d 100644
  25701. --- a/mm/mmu_context.c
  25702. +++ b/mm/mmu_context.c
  25703. @@ -23,6 +23,7 @@ void use_mm(struct mm_struct *mm)
  25704. struct task_struct *tsk = current;
  25705. task_lock(tsk);
  25706. + preempt_disable_rt();
  25707. active_mm = tsk->active_mm;
  25708. if (active_mm != mm) {
  25709. atomic_inc(&mm->mm_count);
  25710. @@ -30,6 +31,7 @@ void use_mm(struct mm_struct *mm)
  25711. }
  25712. tsk->mm = mm;
  25713. switch_mm(active_mm, mm, tsk);
  25714. + preempt_enable_rt();
  25715. task_unlock(tsk);
  25716. #ifdef finish_arch_post_lock_switch
  25717. finish_arch_post_lock_switch();
  25718. diff --git a/mm/page_alloc.c b/mm/page_alloc.c
  25719. index f6f6831cec52..c6f829a374c1 100644
  25720. --- a/mm/page_alloc.c
  25721. +++ b/mm/page_alloc.c
  25722. @@ -60,6 +60,7 @@
  25723. #include <linux/page_ext.h>
  25724. #include <linux/hugetlb.h>
  25725. #include <linux/sched/rt.h>
  25726. +#include <linux/locallock.h>
  25727. #include <linux/page_owner.h>
  25728. #include <asm/sections.h>
  25729. @@ -233,6 +234,18 @@ EXPORT_SYMBOL(nr_node_ids);
  25730. EXPORT_SYMBOL(nr_online_nodes);
  25731. #endif
  25732. +static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
  25733. +
  25734. +#ifdef CONFIG_PREEMPT_RT_BASE
  25735. +# define cpu_lock_irqsave(cpu, flags) \
  25736. + local_lock_irqsave_on(pa_lock, flags, cpu)
  25737. +# define cpu_unlock_irqrestore(cpu, flags) \
  25738. + local_unlock_irqrestore_on(pa_lock, flags, cpu)
  25739. +#else
  25740. +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
  25741. +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
  25742. +#endif
  25743. +
  25744. int page_group_by_mobility_disabled __read_mostly;
  25745. void set_pageblock_migratetype(struct page *page, int migratetype)
  25746. @@ -701,7 +714,7 @@ static inline int free_pages_check(struct page *page)
  25747. }
  25748. /*
  25749. - * Frees a number of pages from the PCP lists
  25750. + * Frees a number of pages which have been collected from the pcp lists.
  25751. * Assumes all pages on list are in same zone, and of same order.
  25752. * count is the number of pages to free.
  25753. *
  25754. @@ -712,18 +725,51 @@ static inline int free_pages_check(struct page *page)
  25755. * pinned" detection logic.
  25756. */
  25757. static void free_pcppages_bulk(struct zone *zone, int count,
  25758. - struct per_cpu_pages *pcp)
  25759. + struct list_head *list)
  25760. {
  25761. - int migratetype = 0;
  25762. - int batch_free = 0;
  25763. int to_free = count;
  25764. unsigned long nr_scanned;
  25765. + unsigned long flags;
  25766. +
  25767. + spin_lock_irqsave(&zone->lock, flags);
  25768. - spin_lock(&zone->lock);
  25769. nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
  25770. if (nr_scanned)
  25771. __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
  25772. + while (!list_empty(list)) {
  25773. + struct page *page = list_first_entry(list, struct page, lru);
  25774. + int mt; /* migratetype of the to-be-freed page */
  25775. +
  25776. + /* must delete as __free_one_page list manipulates */
  25777. + list_del(&page->lru);
  25778. +
  25779. + mt = get_freepage_migratetype(page);
  25780. + if (unlikely(has_isolate_pageblock(zone)))
  25781. + mt = get_pageblock_migratetype(page);
  25782. +
  25783. + /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
  25784. + __free_one_page(page, page_to_pfn(page), zone, 0, mt);
  25785. + trace_mm_page_pcpu_drain(page, 0, mt);
  25786. + to_free--;
  25787. + }
  25788. + WARN_ON(to_free != 0);
  25789. + spin_unlock_irqrestore(&zone->lock, flags);
  25790. +}
  25791. +
  25792. +/*
  25793. + * Moves a number of pages from the PCP lists to free list which
  25794. + * is freed outside of the locked region.
  25795. + *
  25796. + * Assumes all pages on list are in same zone, and of same order.
  25797. + * count is the number of pages to free.
  25798. + */
  25799. +static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src,
  25800. + struct list_head *dst)
  25801. +{
  25802. + int migratetype = 0;
  25803. + int batch_free = 0;
  25804. +
  25805. while (to_free) {
  25806. struct page *page;
  25807. struct list_head *list;
  25808. @@ -739,7 +785,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
  25809. batch_free++;
  25810. if (++migratetype == MIGRATE_PCPTYPES)
  25811. migratetype = 0;
  25812. - list = &pcp->lists[migratetype];
  25813. + list = &src->lists[migratetype];
  25814. } while (list_empty(list));
  25815. /* This is the only non-empty list. Free them all. */
  25816. @@ -747,21 +793,11 @@ static void free_pcppages_bulk(struct zone *zone, int count,
  25817. batch_free = to_free;
  25818. do {
  25819. - int mt; /* migratetype of the to-be-freed page */
  25820. -
  25821. - page = list_entry(list->prev, struct page, lru);
  25822. - /* must delete as __free_one_page list manipulates */
  25823. + page = list_last_entry(list, struct page, lru);
  25824. list_del(&page->lru);
  25825. - mt = get_freepage_migratetype(page);
  25826. - if (unlikely(has_isolate_pageblock(zone)))
  25827. - mt = get_pageblock_migratetype(page);
  25828. -
  25829. - /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
  25830. - __free_one_page(page, page_to_pfn(page), zone, 0, mt);
  25831. - trace_mm_page_pcpu_drain(page, 0, mt);
  25832. + list_add(&page->lru, dst);
  25833. } while (--to_free && --batch_free && !list_empty(list));
  25834. }
  25835. - spin_unlock(&zone->lock);
  25836. }
  25837. static void free_one_page(struct zone *zone,
  25838. @@ -770,7 +806,9 @@ static void free_one_page(struct zone *zone,
  25839. int migratetype)
  25840. {
  25841. unsigned long nr_scanned;
  25842. - spin_lock(&zone->lock);
  25843. + unsigned long flags;
  25844. +
  25845. + spin_lock_irqsave(&zone->lock, flags);
  25846. nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
  25847. if (nr_scanned)
  25848. __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
  25849. @@ -780,7 +818,7 @@ static void free_one_page(struct zone *zone,
  25850. migratetype = get_pfnblock_migratetype(page, pfn);
  25851. }
  25852. __free_one_page(page, pfn, zone, order, migratetype);
  25853. - spin_unlock(&zone->lock);
  25854. + spin_unlock_irqrestore(&zone->lock, flags);
  25855. }
  25856. static int free_tail_pages_check(struct page *head_page, struct page *page)
  25857. @@ -845,11 +883,11 @@ static void __free_pages_ok(struct page *page, unsigned int order)
  25858. return;
  25859. migratetype = get_pfnblock_migratetype(page, pfn);
  25860. - local_irq_save(flags);
  25861. + local_lock_irqsave(pa_lock, flags);
  25862. __count_vm_events(PGFREE, 1 << order);
  25863. set_freepage_migratetype(page, migratetype);
  25864. free_one_page(page_zone(page), page, pfn, order, migratetype);
  25865. - local_irq_restore(flags);
  25866. + local_unlock_irqrestore(pa_lock, flags);
  25867. }
  25868. void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
  25869. @@ -1396,16 +1434,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
  25870. void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
  25871. {
  25872. unsigned long flags;
  25873. + LIST_HEAD(dst);
  25874. int to_drain, batch;
  25875. - local_irq_save(flags);
  25876. + local_lock_irqsave(pa_lock, flags);
  25877. batch = READ_ONCE(pcp->batch);
  25878. to_drain = min(pcp->count, batch);
  25879. if (to_drain > 0) {
  25880. - free_pcppages_bulk(zone, to_drain, pcp);
  25881. + isolate_pcp_pages(to_drain, pcp, &dst);
  25882. pcp->count -= to_drain;
  25883. }
  25884. - local_irq_restore(flags);
  25885. + local_unlock_irqrestore(pa_lock, flags);
  25886. + free_pcppages_bulk(zone, to_drain, &dst);
  25887. }
  25888. #endif
  25889. @@ -1421,16 +1461,21 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
  25890. unsigned long flags;
  25891. struct per_cpu_pageset *pset;
  25892. struct per_cpu_pages *pcp;
  25893. + LIST_HEAD(dst);
  25894. + int count;
  25895. - local_irq_save(flags);
  25896. + cpu_lock_irqsave(cpu, flags);
  25897. pset = per_cpu_ptr(zone->pageset, cpu);
  25898. pcp = &pset->pcp;
  25899. - if (pcp->count) {
  25900. - free_pcppages_bulk(zone, pcp->count, pcp);
  25901. + count = pcp->count;
  25902. + if (count) {
  25903. + isolate_pcp_pages(count, pcp, &dst);
  25904. pcp->count = 0;
  25905. }
  25906. - local_irq_restore(flags);
  25907. + cpu_unlock_irqrestore(cpu, flags);
  25908. + if (count)
  25909. + free_pcppages_bulk(zone, count, &dst);
  25910. }
  25911. /*
  25912. @@ -1516,8 +1561,17 @@ void drain_all_pages(struct zone *zone)
  25913. else
  25914. cpumask_clear_cpu(cpu, &cpus_with_pcps);
  25915. }
  25916. +#ifndef CONFIG_PREEMPT_RT_BASE
  25917. on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
  25918. zone, 1);
  25919. +#else
  25920. + for_each_cpu(cpu, &cpus_with_pcps) {
  25921. + if (zone)
  25922. + drain_pages_zone(cpu, zone);
  25923. + else
  25924. + drain_pages(cpu);
  25925. + }
  25926. +#endif
  25927. }
  25928. #ifdef CONFIG_HIBERNATION
  25929. @@ -1573,7 +1627,7 @@ void free_hot_cold_page(struct page *page, bool cold)
  25930. migratetype = get_pfnblock_migratetype(page, pfn);
  25931. set_freepage_migratetype(page, migratetype);
  25932. - local_irq_save(flags);
  25933. + local_lock_irqsave(pa_lock, flags);
  25934. __count_vm_event(PGFREE);
  25935. /*
  25936. @@ -1599,12 +1653,17 @@ void free_hot_cold_page(struct page *page, bool cold)
  25937. pcp->count++;
  25938. if (pcp->count >= pcp->high) {
  25939. unsigned long batch = READ_ONCE(pcp->batch);
  25940. - free_pcppages_bulk(zone, batch, pcp);
  25941. + LIST_HEAD(dst);
  25942. +
  25943. + isolate_pcp_pages(batch, pcp, &dst);
  25944. pcp->count -= batch;
  25945. + local_unlock_irqrestore(pa_lock, flags);
  25946. + free_pcppages_bulk(zone, batch, &dst);
  25947. + return;
  25948. }
  25949. out:
  25950. - local_irq_restore(flags);
  25951. + local_unlock_irqrestore(pa_lock, flags);
  25952. }
  25953. /*
  25954. @@ -1735,7 +1794,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
  25955. struct per_cpu_pages *pcp;
  25956. struct list_head *list;
  25957. - local_irq_save(flags);
  25958. + local_lock_irqsave(pa_lock, flags);
  25959. pcp = &this_cpu_ptr(zone->pageset)->pcp;
  25960. list = &pcp->lists[migratetype];
  25961. if (list_empty(list)) {
  25962. @@ -1767,13 +1826,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
  25963. */
  25964. WARN_ON_ONCE(order > 1);
  25965. }
  25966. - spin_lock_irqsave(&zone->lock, flags);
  25967. + local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
  25968. page = __rmqueue(zone, order, migratetype);
  25969. - spin_unlock(&zone->lock);
  25970. - if (!page)
  25971. + if (!page) {
  25972. + spin_unlock(&zone->lock);
  25973. goto failed;
  25974. + }
  25975. __mod_zone_freepage_state(zone, -(1 << order),
  25976. get_freepage_migratetype(page));
  25977. + spin_unlock(&zone->lock);
  25978. }
  25979. __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
  25980. @@ -1783,13 +1844,13 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
  25981. __count_zone_vm_events(PGALLOC, zone, 1 << order);
  25982. zone_statistics(preferred_zone, zone, gfp_flags);
  25983. - local_irq_restore(flags);
  25984. + local_unlock_irqrestore(pa_lock, flags);
  25985. VM_BUG_ON_PAGE(bad_range(zone, page), page);
  25986. return page;
  25987. failed:
  25988. - local_irq_restore(flags);
  25989. + local_unlock_irqrestore(pa_lock, flags);
  25990. return NULL;
  25991. }
  25992. @@ -5680,6 +5741,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
  25993. void __init page_alloc_init(void)
  25994. {
  25995. hotcpu_notifier(page_alloc_cpu_notify, 0);
  25996. + local_irq_lock_init(pa_lock);
  25997. }
  25998. /*
  25999. @@ -6575,7 +6637,7 @@ void zone_pcp_reset(struct zone *zone)
  26000. struct per_cpu_pageset *pset;
  26001. /* avoid races with drain_pages() */
  26002. - local_irq_save(flags);
  26003. + local_lock_irqsave(pa_lock, flags);
  26004. if (zone->pageset != &boot_pageset) {
  26005. for_each_online_cpu(cpu) {
  26006. pset = per_cpu_ptr(zone->pageset, cpu);
  26007. @@ -6584,7 +6646,7 @@ void zone_pcp_reset(struct zone *zone)
  26008. free_percpu(zone->pageset);
  26009. zone->pageset = &boot_pageset;
  26010. }
  26011. - local_irq_restore(flags);
  26012. + local_unlock_irqrestore(pa_lock, flags);
  26013. }
  26014. #ifdef CONFIG_MEMORY_HOTREMOVE
  26015. diff --git a/mm/slab.h b/mm/slab.h
  26016. index 4c3ac12dd644..0c9bda0eb0c1 100644
  26017. --- a/mm/slab.h
  26018. +++ b/mm/slab.h
  26019. @@ -330,7 +330,11 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
  26020. * The slab lists for all objects.
  26021. */
  26022. struct kmem_cache_node {
  26023. +#ifdef CONFIG_SLUB
  26024. + raw_spinlock_t list_lock;
  26025. +#else
  26026. spinlock_t list_lock;
  26027. +#endif
  26028. #ifdef CONFIG_SLAB
  26029. struct list_head slabs_partial; /* partial list first, better asm code */
  26030. diff --git a/mm/slub.c b/mm/slub.c
  26031. index 08342c523a85..905e283d7829 100644
  26032. --- a/mm/slub.c
  26033. +++ b/mm/slub.c
  26034. @@ -1069,7 +1069,7 @@ static noinline struct kmem_cache_node *free_debug_processing(
  26035. {
  26036. struct kmem_cache_node *n = get_node(s, page_to_nid(page));
  26037. - spin_lock_irqsave(&n->list_lock, *flags);
  26038. + raw_spin_lock_irqsave(&n->list_lock, *flags);
  26039. slab_lock(page);
  26040. if (!check_slab(s, page))
  26041. @@ -1116,7 +1116,7 @@ out:
  26042. fail:
  26043. slab_unlock(page);
  26044. - spin_unlock_irqrestore(&n->list_lock, *flags);
  26045. + raw_spin_unlock_irqrestore(&n->list_lock, *flags);
  26046. slab_fix(s, "Object at 0x%p not freed", object);
  26047. return NULL;
  26048. }
  26049. @@ -1242,6 +1242,12 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node,
  26050. #endif /* CONFIG_SLUB_DEBUG */
  26051. +struct slub_free_list {
  26052. + raw_spinlock_t lock;
  26053. + struct list_head list;
  26054. +};
  26055. +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
  26056. +
  26057. /*
  26058. * Hooks for other subsystems that check memory allocations. In a typical
  26059. * production configuration these hooks all should produce no code at all.
  26060. @@ -1306,6 +1312,17 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
  26061. kasan_slab_free(s, x);
  26062. }
  26063. +static void setup_object(struct kmem_cache *s, struct page *page,
  26064. + void *object)
  26065. +{
  26066. + setup_object_debug(s, page, object);
  26067. + if (unlikely(s->ctor)) {
  26068. + kasan_unpoison_object_data(s, object);
  26069. + s->ctor(object);
  26070. + kasan_poison_object_data(s, object);
  26071. + }
  26072. +}
  26073. +
  26074. /*
  26075. * Slab allocation and freeing
  26076. */
  26077. @@ -1336,10 +1353,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
  26078. struct page *page;
  26079. struct kmem_cache_order_objects oo = s->oo;
  26080. gfp_t alloc_gfp;
  26081. + void *start, *p;
  26082. + int idx, order;
  26083. + bool enableirqs;
  26084. flags &= gfp_allowed_mask;
  26085. - if (flags & __GFP_WAIT)
  26086. + enableirqs = (flags & __GFP_WAIT) != 0;
  26087. +#ifdef CONFIG_PREEMPT_RT_FULL
  26088. + enableirqs |= system_state == SYSTEM_RUNNING;
  26089. +#endif
  26090. + if (enableirqs)
  26091. local_irq_enable();
  26092. flags |= s->allocflags;
  26093. @@ -1359,13 +1383,13 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
  26094. * Try a lower order alloc if possible
  26095. */
  26096. page = alloc_slab_page(s, alloc_gfp, node, oo);
  26097. -
  26098. - if (page)
  26099. - stat(s, ORDER_FALLBACK);
  26100. + if (unlikely(!page))
  26101. + goto out;
  26102. + stat(s, ORDER_FALLBACK);
  26103. }
  26104. - if (kmemcheck_enabled && page
  26105. - && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
  26106. + if (kmemcheck_enabled &&
  26107. + !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
  26108. int pages = 1 << oo_order(oo);
  26109. kmemcheck_alloc_shadow(page, oo_order(oo), alloc_gfp, node);
  26110. @@ -1380,51 +1404,9 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
  26111. kmemcheck_mark_unallocated_pages(page, pages);
  26112. }
  26113. - if (flags & __GFP_WAIT)
  26114. - local_irq_disable();
  26115. - if (!page)
  26116. - return NULL;
  26117. -
  26118. page->objects = oo_objects(oo);
  26119. - mod_zone_page_state(page_zone(page),
  26120. - (s->flags & SLAB_RECLAIM_ACCOUNT) ?
  26121. - NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
  26122. - 1 << oo_order(oo));
  26123. -
  26124. - return page;
  26125. -}
  26126. -
  26127. -static void setup_object(struct kmem_cache *s, struct page *page,
  26128. - void *object)
  26129. -{
  26130. - setup_object_debug(s, page, object);
  26131. - if (unlikely(s->ctor)) {
  26132. - kasan_unpoison_object_data(s, object);
  26133. - s->ctor(object);
  26134. - kasan_poison_object_data(s, object);
  26135. - }
  26136. -}
  26137. -
  26138. -static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
  26139. -{
  26140. - struct page *page;
  26141. - void *start;
  26142. - void *p;
  26143. - int order;
  26144. - int idx;
  26145. -
  26146. - if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
  26147. - pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
  26148. - BUG();
  26149. - }
  26150. -
  26151. - page = allocate_slab(s,
  26152. - flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
  26153. - if (!page)
  26154. - goto out;
  26155. order = compound_order(page);
  26156. - inc_slabs_node(s, page_to_nid(page), page->objects);
  26157. page->slab_cache = s;
  26158. __SetPageSlab(page);
  26159. if (page_is_pfmemalloc(page))
  26160. @@ -1448,10 +1430,34 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
  26161. page->freelist = start;
  26162. page->inuse = page->objects;
  26163. page->frozen = 1;
  26164. +
  26165. out:
  26166. + if (enableirqs)
  26167. + local_irq_disable();
  26168. + if (!page)
  26169. + return NULL;
  26170. +
  26171. + mod_zone_page_state(page_zone(page),
  26172. + (s->flags & SLAB_RECLAIM_ACCOUNT) ?
  26173. + NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
  26174. + 1 << oo_order(oo));
  26175. +
  26176. + inc_slabs_node(s, page_to_nid(page), page->objects);
  26177. +
  26178. return page;
  26179. }
  26180. +static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
  26181. +{
  26182. + if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
  26183. + pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
  26184. + BUG();
  26185. + }
  26186. +
  26187. + return allocate_slab(s,
  26188. + flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
  26189. +}
  26190. +
  26191. static void __free_slab(struct kmem_cache *s, struct page *page)
  26192. {
  26193. int order = compound_order(page);
  26194. @@ -1483,6 +1489,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
  26195. memcg_uncharge_slab(s, order);
  26196. }
  26197. +static void free_delayed(struct list_head *h)
  26198. +{
  26199. + while(!list_empty(h)) {
  26200. + struct page *page = list_first_entry(h, struct page, lru);
  26201. +
  26202. + list_del(&page->lru);
  26203. + __free_slab(page->slab_cache, page);
  26204. + }
  26205. +}
  26206. +
  26207. #define need_reserve_slab_rcu \
  26208. (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
  26209. @@ -1517,6 +1533,12 @@ static void free_slab(struct kmem_cache *s, struct page *page)
  26210. }
  26211. call_rcu(head, rcu_free_slab);
  26212. + } else if (irqs_disabled()) {
  26213. + struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
  26214. +
  26215. + raw_spin_lock(&f->lock);
  26216. + list_add(&page->lru, &f->list);
  26217. + raw_spin_unlock(&f->lock);
  26218. } else
  26219. __free_slab(s, page);
  26220. }
  26221. @@ -1630,7 +1652,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
  26222. if (!n || !n->nr_partial)
  26223. return NULL;
  26224. - spin_lock(&n->list_lock);
  26225. + raw_spin_lock(&n->list_lock);
  26226. list_for_each_entry_safe(page, page2, &n->partial, lru) {
  26227. void *t;
  26228. @@ -1655,7 +1677,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
  26229. break;
  26230. }
  26231. - spin_unlock(&n->list_lock);
  26232. + raw_spin_unlock(&n->list_lock);
  26233. return object;
  26234. }
  26235. @@ -1901,7 +1923,7 @@ redo:
  26236. * that acquire_slab() will see a slab page that
  26237. * is frozen
  26238. */
  26239. - spin_lock(&n->list_lock);
  26240. + raw_spin_lock(&n->list_lock);
  26241. }
  26242. } else {
  26243. m = M_FULL;
  26244. @@ -1912,7 +1934,7 @@ redo:
  26245. * slabs from diagnostic functions will not see
  26246. * any frozen slabs.
  26247. */
  26248. - spin_lock(&n->list_lock);
  26249. + raw_spin_lock(&n->list_lock);
  26250. }
  26251. }
  26252. @@ -1947,7 +1969,7 @@ redo:
  26253. goto redo;
  26254. if (lock)
  26255. - spin_unlock(&n->list_lock);
  26256. + raw_spin_unlock(&n->list_lock);
  26257. if (m == M_FREE) {
  26258. stat(s, DEACTIVATE_EMPTY);
  26259. @@ -1979,10 +2001,10 @@ static void unfreeze_partials(struct kmem_cache *s,
  26260. n2 = get_node(s, page_to_nid(page));
  26261. if (n != n2) {
  26262. if (n)
  26263. - spin_unlock(&n->list_lock);
  26264. + raw_spin_unlock(&n->list_lock);
  26265. n = n2;
  26266. - spin_lock(&n->list_lock);
  26267. + raw_spin_lock(&n->list_lock);
  26268. }
  26269. do {
  26270. @@ -2011,7 +2033,7 @@ static void unfreeze_partials(struct kmem_cache *s,
  26271. }
  26272. if (n)
  26273. - spin_unlock(&n->list_lock);
  26274. + raw_spin_unlock(&n->list_lock);
  26275. while (discard_page) {
  26276. page = discard_page;
  26277. @@ -2050,14 +2072,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
  26278. pobjects = oldpage->pobjects;
  26279. pages = oldpage->pages;
  26280. if (drain && pobjects > s->cpu_partial) {
  26281. + struct slub_free_list *f;
  26282. unsigned long flags;
  26283. + LIST_HEAD(tofree);
  26284. /*
  26285. * partial array is full. Move the existing
  26286. * set to the per node partial list.
  26287. */
  26288. local_irq_save(flags);
  26289. unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
  26290. + f = this_cpu_ptr(&slub_free_list);
  26291. + raw_spin_lock(&f->lock);
  26292. + list_splice_init(&f->list, &tofree);
  26293. + raw_spin_unlock(&f->lock);
  26294. local_irq_restore(flags);
  26295. + free_delayed(&tofree);
  26296. oldpage = NULL;
  26297. pobjects = 0;
  26298. pages = 0;
  26299. @@ -2129,7 +2158,22 @@ static bool has_cpu_slab(int cpu, void *info)
  26300. static void flush_all(struct kmem_cache *s)
  26301. {
  26302. + LIST_HEAD(tofree);
  26303. + int cpu;
  26304. +
  26305. on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
  26306. + for_each_online_cpu(cpu) {
  26307. + struct slub_free_list *f;
  26308. +
  26309. + if (!has_cpu_slab(cpu, s))
  26310. + continue;
  26311. +
  26312. + f = &per_cpu(slub_free_list, cpu);
  26313. + raw_spin_lock_irq(&f->lock);
  26314. + list_splice_init(&f->list, &tofree);
  26315. + raw_spin_unlock_irq(&f->lock);
  26316. + free_delayed(&tofree);
  26317. + }
  26318. }
  26319. /*
  26320. @@ -2165,10 +2209,10 @@ static unsigned long count_partial(struct kmem_cache_node *n,
  26321. unsigned long x = 0;
  26322. struct page *page;
  26323. - spin_lock_irqsave(&n->list_lock, flags);
  26324. + raw_spin_lock_irqsave(&n->list_lock, flags);
  26325. list_for_each_entry(page, &n->partial, lru)
  26326. x += get_count(page);
  26327. - spin_unlock_irqrestore(&n->list_lock, flags);
  26328. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  26329. return x;
  26330. }
  26331. #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
  26332. @@ -2305,9 +2349,11 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
  26333. static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  26334. unsigned long addr, struct kmem_cache_cpu *c)
  26335. {
  26336. + struct slub_free_list *f;
  26337. void *freelist;
  26338. struct page *page;
  26339. unsigned long flags;
  26340. + LIST_HEAD(tofree);
  26341. local_irq_save(flags);
  26342. #ifdef CONFIG_PREEMPT
  26343. @@ -2375,7 +2421,13 @@ load_freelist:
  26344. VM_BUG_ON(!c->page->frozen);
  26345. c->freelist = get_freepointer(s, freelist);
  26346. c->tid = next_tid(c->tid);
  26347. +out:
  26348. + f = this_cpu_ptr(&slub_free_list);
  26349. + raw_spin_lock(&f->lock);
  26350. + list_splice_init(&f->list, &tofree);
  26351. + raw_spin_unlock(&f->lock);
  26352. local_irq_restore(flags);
  26353. + free_delayed(&tofree);
  26354. return freelist;
  26355. new_slab:
  26356. @@ -2392,8 +2444,7 @@ new_slab:
  26357. if (unlikely(!freelist)) {
  26358. slab_out_of_memory(s, gfpflags, node);
  26359. - local_irq_restore(flags);
  26360. - return NULL;
  26361. + goto out;
  26362. }
  26363. page = c->page;
  26364. @@ -2408,8 +2459,7 @@ new_slab:
  26365. deactivate_slab(s, page, get_freepointer(s, freelist));
  26366. c->page = NULL;
  26367. c->freelist = NULL;
  26368. - local_irq_restore(flags);
  26369. - return freelist;
  26370. + goto out;
  26371. }
  26372. /*
  26373. @@ -2593,7 +2643,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  26374. do {
  26375. if (unlikely(n)) {
  26376. - spin_unlock_irqrestore(&n->list_lock, flags);
  26377. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  26378. n = NULL;
  26379. }
  26380. prior = page->freelist;
  26381. @@ -2625,7 +2675,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  26382. * Otherwise the list_lock will synchronize with
  26383. * other processors updating the list of slabs.
  26384. */
  26385. - spin_lock_irqsave(&n->list_lock, flags);
  26386. + raw_spin_lock_irqsave(&n->list_lock, flags);
  26387. }
  26388. }
  26389. @@ -2667,7 +2717,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  26390. add_partial(n, page, DEACTIVATE_TO_TAIL);
  26391. stat(s, FREE_ADD_PARTIAL);
  26392. }
  26393. - spin_unlock_irqrestore(&n->list_lock, flags);
  26394. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  26395. return;
  26396. slab_empty:
  26397. @@ -2682,7 +2732,7 @@ slab_empty:
  26398. remove_full(s, n, page);
  26399. }
  26400. - spin_unlock_irqrestore(&n->list_lock, flags);
  26401. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  26402. stat(s, FREE_SLAB);
  26403. discard_slab(s, page);
  26404. }
  26405. @@ -2881,7 +2931,7 @@ static void
  26406. init_kmem_cache_node(struct kmem_cache_node *n)
  26407. {
  26408. n->nr_partial = 0;
  26409. - spin_lock_init(&n->list_lock);
  26410. + raw_spin_lock_init(&n->list_lock);
  26411. INIT_LIST_HEAD(&n->partial);
  26412. #ifdef CONFIG_SLUB_DEBUG
  26413. atomic_long_set(&n->nr_slabs, 0);
  26414. @@ -3463,7 +3513,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate)
  26415. for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
  26416. INIT_LIST_HEAD(promote + i);
  26417. - spin_lock_irqsave(&n->list_lock, flags);
  26418. + raw_spin_lock_irqsave(&n->list_lock, flags);
  26419. /*
  26420. * Build lists of slabs to discard or promote.
  26421. @@ -3494,7 +3544,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate)
  26422. for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
  26423. list_splice(promote + i, &n->partial);
  26424. - spin_unlock_irqrestore(&n->list_lock, flags);
  26425. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  26426. /* Release empty slabs */
  26427. list_for_each_entry_safe(page, t, &discard, lru)
  26428. @@ -3670,6 +3720,12 @@ void __init kmem_cache_init(void)
  26429. {
  26430. static __initdata struct kmem_cache boot_kmem_cache,
  26431. boot_kmem_cache_node;
  26432. + int cpu;
  26433. +
  26434. + for_each_possible_cpu(cpu) {
  26435. + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
  26436. + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
  26437. + }
  26438. if (debug_guardpage_minorder())
  26439. slub_max_order = 0;
  26440. @@ -3912,7 +3968,7 @@ static int validate_slab_node(struct kmem_cache *s,
  26441. struct page *page;
  26442. unsigned long flags;
  26443. - spin_lock_irqsave(&n->list_lock, flags);
  26444. + raw_spin_lock_irqsave(&n->list_lock, flags);
  26445. list_for_each_entry(page, &n->partial, lru) {
  26446. validate_slab_slab(s, page, map);
  26447. @@ -3934,7 +3990,7 @@ static int validate_slab_node(struct kmem_cache *s,
  26448. s->name, count, atomic_long_read(&n->nr_slabs));
  26449. out:
  26450. - spin_unlock_irqrestore(&n->list_lock, flags);
  26451. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  26452. return count;
  26453. }
  26454. @@ -4122,12 +4178,12 @@ static int list_locations(struct kmem_cache *s, char *buf,
  26455. if (!atomic_long_read(&n->nr_slabs))
  26456. continue;
  26457. - spin_lock_irqsave(&n->list_lock, flags);
  26458. + raw_spin_lock_irqsave(&n->list_lock, flags);
  26459. list_for_each_entry(page, &n->partial, lru)
  26460. process_slab(&t, s, page, alloc, map);
  26461. list_for_each_entry(page, &n->full, lru)
  26462. process_slab(&t, s, page, alloc, map);
  26463. - spin_unlock_irqrestore(&n->list_lock, flags);
  26464. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  26465. }
  26466. for (i = 0; i < t.count; i++) {
  26467. diff --git a/mm/swap.c b/mm/swap.c
  26468. index ab3b9c2dd783..b433019229f8 100644
  26469. --- a/mm/swap.c
  26470. +++ b/mm/swap.c
  26471. @@ -32,6 +32,7 @@
  26472. #include <linux/gfp.h>
  26473. #include <linux/uio.h>
  26474. #include <linux/hugetlb.h>
  26475. +#include <linux/locallock.h>
  26476. #include "internal.h"
  26477. @@ -45,6 +46,9 @@ static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
  26478. static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
  26479. static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
  26480. +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
  26481. +DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
  26482. +
  26483. /*
  26484. * This path almost never happens for VM activity - pages are normally
  26485. * freed via pagevecs. But it gets used by networking.
  26486. @@ -481,11 +485,11 @@ void rotate_reclaimable_page(struct page *page)
  26487. unsigned long flags;
  26488. page_cache_get(page);
  26489. - local_irq_save(flags);
  26490. + local_lock_irqsave(rotate_lock, flags);
  26491. pvec = this_cpu_ptr(&lru_rotate_pvecs);
  26492. if (!pagevec_add(pvec, page) || PageCompound(page))
  26493. pagevec_move_tail(pvec);
  26494. - local_irq_restore(flags);
  26495. + local_unlock_irqrestore(rotate_lock, flags);
  26496. }
  26497. }
  26498. @@ -536,12 +540,13 @@ static bool need_activate_page_drain(int cpu)
  26499. void activate_page(struct page *page)
  26500. {
  26501. if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  26502. - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
  26503. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  26504. + activate_page_pvecs);
  26505. page_cache_get(page);
  26506. if (!pagevec_add(pvec, page) || PageCompound(page))
  26507. pagevec_lru_move_fn(pvec, __activate_page, NULL);
  26508. - put_cpu_var(activate_page_pvecs);
  26509. + put_locked_var(swapvec_lock, activate_page_pvecs);
  26510. }
  26511. }
  26512. @@ -567,7 +572,7 @@ void activate_page(struct page *page)
  26513. static void __lru_cache_activate_page(struct page *page)
  26514. {
  26515. - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  26516. + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
  26517. int i;
  26518. /*
  26519. @@ -589,7 +594,7 @@ static void __lru_cache_activate_page(struct page *page)
  26520. }
  26521. }
  26522. - put_cpu_var(lru_add_pvec);
  26523. + put_locked_var(swapvec_lock, lru_add_pvec);
  26524. }
  26525. /*
  26526. @@ -628,12 +633,12 @@ EXPORT_SYMBOL(mark_page_accessed);
  26527. static void __lru_cache_add(struct page *page)
  26528. {
  26529. - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  26530. + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
  26531. page_cache_get(page);
  26532. if (!pagevec_add(pvec, page) || PageCompound(page))
  26533. __pagevec_lru_add(pvec);
  26534. - put_cpu_var(lru_add_pvec);
  26535. + put_locked_var(swapvec_lock, lru_add_pvec);
  26536. }
  26537. /**
  26538. @@ -813,9 +818,15 @@ void lru_add_drain_cpu(int cpu)
  26539. unsigned long flags;
  26540. /* No harm done if a racing interrupt already did this */
  26541. - local_irq_save(flags);
  26542. +#ifdef CONFIG_PREEMPT_RT_BASE
  26543. + local_lock_irqsave_on(rotate_lock, flags, cpu);
  26544. + pagevec_move_tail(pvec);
  26545. + local_unlock_irqrestore_on(rotate_lock, flags, cpu);
  26546. +#else
  26547. + local_lock_irqsave(rotate_lock, flags);
  26548. pagevec_move_tail(pvec);
  26549. - local_irq_restore(flags);
  26550. + local_unlock_irqrestore(rotate_lock, flags);
  26551. +#endif
  26552. }
  26553. pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
  26554. @@ -843,26 +854,47 @@ void deactivate_file_page(struct page *page)
  26555. return;
  26556. if (likely(get_page_unless_zero(page))) {
  26557. - struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
  26558. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  26559. + lru_deactivate_file_pvecs);
  26560. if (!pagevec_add(pvec, page) || PageCompound(page))
  26561. pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
  26562. - put_cpu_var(lru_deactivate_file_pvecs);
  26563. + put_locked_var(swapvec_lock, lru_deactivate_file_pvecs);
  26564. }
  26565. }
  26566. void lru_add_drain(void)
  26567. {
  26568. - lru_add_drain_cpu(get_cpu());
  26569. - put_cpu();
  26570. + lru_add_drain_cpu(local_lock_cpu(swapvec_lock));
  26571. + local_unlock_cpu(swapvec_lock);
  26572. }
  26573. +
  26574. +#ifdef CONFIG_PREEMPT_RT_BASE
  26575. +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
  26576. +{
  26577. + local_lock_on(swapvec_lock, cpu);
  26578. + lru_add_drain_cpu(cpu);
  26579. + local_unlock_on(swapvec_lock, cpu);
  26580. +}
  26581. +
  26582. +#else
  26583. +
  26584. static void lru_add_drain_per_cpu(struct work_struct *dummy)
  26585. {
  26586. lru_add_drain();
  26587. }
  26588. static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
  26589. +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
  26590. +{
  26591. + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  26592. +
  26593. + INIT_WORK(work, lru_add_drain_per_cpu);
  26594. + schedule_work_on(cpu, work);
  26595. + cpumask_set_cpu(cpu, has_work);
  26596. +}
  26597. +#endif
  26598. void lru_add_drain_all(void)
  26599. {
  26600. @@ -875,20 +907,17 @@ void lru_add_drain_all(void)
  26601. cpumask_clear(&has_work);
  26602. for_each_online_cpu(cpu) {
  26603. - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  26604. -
  26605. if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
  26606. pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
  26607. pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
  26608. - need_activate_page_drain(cpu)) {
  26609. - INIT_WORK(work, lru_add_drain_per_cpu);
  26610. - schedule_work_on(cpu, work);
  26611. - cpumask_set_cpu(cpu, &has_work);
  26612. - }
  26613. + need_activate_page_drain(cpu))
  26614. + remote_lru_add_drain(cpu, &has_work);
  26615. }
  26616. +#ifndef CONFIG_PREEMPT_RT_BASE
  26617. for_each_cpu(cpu, &has_work)
  26618. flush_work(&per_cpu(lru_add_drain_work, cpu));
  26619. +#endif
  26620. put_online_cpus();
  26621. mutex_unlock(&lock);
  26622. diff --git a/mm/truncate.c b/mm/truncate.c
  26623. index 66af9031fae8..09598db42681 100644
  26624. --- a/mm/truncate.c
  26625. +++ b/mm/truncate.c
  26626. @@ -56,8 +56,11 @@ static void clear_exceptional_entry(struct address_space *mapping,
  26627. * protected by mapping->tree_lock.
  26628. */
  26629. if (!workingset_node_shadows(node) &&
  26630. - !list_empty(&node->private_list))
  26631. - list_lru_del(&workingset_shadow_nodes, &node->private_list);
  26632. + !list_empty(&node->private_list)) {
  26633. + local_lock(workingset_shadow_lock);
  26634. + list_lru_del(&__workingset_shadow_nodes, &node->private_list);
  26635. + local_unlock(workingset_shadow_lock);
  26636. + }
  26637. __radix_tree_delete_node(&mapping->page_tree, node);
  26638. unlock:
  26639. spin_unlock_irq(&mapping->tree_lock);
  26640. diff --git a/mm/vmalloc.c b/mm/vmalloc.c
  26641. index 2faaa2976447..f87a29f1e718 100644
  26642. --- a/mm/vmalloc.c
  26643. +++ b/mm/vmalloc.c
  26644. @@ -819,7 +819,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
  26645. struct vmap_block *vb;
  26646. struct vmap_area *va;
  26647. unsigned long vb_idx;
  26648. - int node, err;
  26649. + int node, err, cpu;
  26650. void *vaddr;
  26651. node = numa_node_id();
  26652. @@ -862,11 +862,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
  26653. BUG_ON(err);
  26654. radix_tree_preload_end();
  26655. - vbq = &get_cpu_var(vmap_block_queue);
  26656. + cpu = get_cpu_light();
  26657. + vbq = this_cpu_ptr(&vmap_block_queue);
  26658. spin_lock(&vbq->lock);
  26659. list_add_tail_rcu(&vb->free_list, &vbq->free);
  26660. spin_unlock(&vbq->lock);
  26661. - put_cpu_var(vmap_block_queue);
  26662. + put_cpu_light();
  26663. return vaddr;
  26664. }
  26665. @@ -935,6 +936,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  26666. struct vmap_block *vb;
  26667. void *vaddr = NULL;
  26668. unsigned int order;
  26669. + int cpu;
  26670. BUG_ON(size & ~PAGE_MASK);
  26671. BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
  26672. @@ -949,7 +951,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  26673. order = get_order(size);
  26674. rcu_read_lock();
  26675. - vbq = &get_cpu_var(vmap_block_queue);
  26676. + cpu = get_cpu_light();
  26677. + vbq = this_cpu_ptr(&vmap_block_queue);
  26678. list_for_each_entry_rcu(vb, &vbq->free, free_list) {
  26679. unsigned long pages_off;
  26680. @@ -972,7 +975,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  26681. break;
  26682. }
  26683. - put_cpu_var(vmap_block_queue);
  26684. + put_cpu_light();
  26685. rcu_read_unlock();
  26686. /* Allocate new block if nothing was found */
  26687. diff --git a/mm/vmstat.c b/mm/vmstat.c
  26688. index 4f5cd974e11a..86f0e2e3f677 100644
  26689. --- a/mm/vmstat.c
  26690. +++ b/mm/vmstat.c
  26691. @@ -226,6 +226,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
  26692. long x;
  26693. long t;
  26694. + preempt_disable_rt();
  26695. x = delta + __this_cpu_read(*p);
  26696. t = __this_cpu_read(pcp->stat_threshold);
  26697. @@ -235,6 +236,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
  26698. x = 0;
  26699. }
  26700. __this_cpu_write(*p, x);
  26701. + preempt_enable_rt();
  26702. }
  26703. EXPORT_SYMBOL(__mod_zone_page_state);
  26704. @@ -267,6 +269,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
  26705. s8 __percpu *p = pcp->vm_stat_diff + item;
  26706. s8 v, t;
  26707. + preempt_disable_rt();
  26708. v = __this_cpu_inc_return(*p);
  26709. t = __this_cpu_read(pcp->stat_threshold);
  26710. if (unlikely(v > t)) {
  26711. @@ -275,6 +278,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
  26712. zone_page_state_add(v + overstep, zone, item);
  26713. __this_cpu_write(*p, -overstep);
  26714. }
  26715. + preempt_enable_rt();
  26716. }
  26717. void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
  26718. @@ -289,6 +293,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
  26719. s8 __percpu *p = pcp->vm_stat_diff + item;
  26720. s8 v, t;
  26721. + preempt_disable_rt();
  26722. v = __this_cpu_dec_return(*p);
  26723. t = __this_cpu_read(pcp->stat_threshold);
  26724. if (unlikely(v < - t)) {
  26725. @@ -297,6 +302,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
  26726. zone_page_state_add(v - overstep, zone, item);
  26727. __this_cpu_write(*p, overstep);
  26728. }
  26729. + preempt_enable_rt();
  26730. }
  26731. void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
  26732. diff --git a/mm/workingset.c b/mm/workingset.c
  26733. index aa017133744b..263d0194734a 100644
  26734. --- a/mm/workingset.c
  26735. +++ b/mm/workingset.c
  26736. @@ -264,7 +264,8 @@ void workingset_activation(struct page *page)
  26737. * point where they would still be useful.
  26738. */
  26739. -struct list_lru workingset_shadow_nodes;
  26740. +struct list_lru __workingset_shadow_nodes;
  26741. +DEFINE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
  26742. static unsigned long count_shadow_nodes(struct shrinker *shrinker,
  26743. struct shrink_control *sc)
  26744. @@ -274,9 +275,9 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
  26745. unsigned long pages;
  26746. /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
  26747. - local_irq_disable();
  26748. - shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc);
  26749. - local_irq_enable();
  26750. + local_lock_irq(workingset_shadow_lock);
  26751. + shadow_nodes = list_lru_shrink_count(&__workingset_shadow_nodes, sc);
  26752. + local_unlock_irq(workingset_shadow_lock);
  26753. pages = node_present_pages(sc->nid);
  26754. /*
  26755. @@ -363,9 +364,9 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
  26756. spin_unlock(&mapping->tree_lock);
  26757. ret = LRU_REMOVED_RETRY;
  26758. out:
  26759. - local_irq_enable();
  26760. + local_unlock_irq(workingset_shadow_lock);
  26761. cond_resched();
  26762. - local_irq_disable();
  26763. + local_lock_irq(workingset_shadow_lock);
  26764. spin_lock(lru_lock);
  26765. return ret;
  26766. }
  26767. @@ -376,10 +377,10 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
  26768. unsigned long ret;
  26769. /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
  26770. - local_irq_disable();
  26771. - ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc,
  26772. + local_lock_irq(workingset_shadow_lock);
  26773. + ret = list_lru_shrink_walk(&__workingset_shadow_nodes, sc,
  26774. shadow_lru_isolate, NULL);
  26775. - local_irq_enable();
  26776. + local_unlock_irq(workingset_shadow_lock);
  26777. return ret;
  26778. }
  26779. @@ -400,7 +401,7 @@ static int __init workingset_init(void)
  26780. {
  26781. int ret;
  26782. - ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);
  26783. + ret = list_lru_init_key(&__workingset_shadow_nodes, &shadow_nodes_key);
  26784. if (ret)
  26785. goto err;
  26786. ret = register_shrinker(&workingset_shadow_shrinker);
  26787. @@ -408,7 +409,7 @@ static int __init workingset_init(void)
  26788. goto err_list_lru;
  26789. return 0;
  26790. err_list_lru:
  26791. - list_lru_destroy(&workingset_shadow_nodes);
  26792. + list_lru_destroy(&__workingset_shadow_nodes);
  26793. err:
  26794. return ret;
  26795. }
  26796. diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
  26797. index fb1ec10ce449..e819dffd142c 100644
  26798. --- a/mm/zsmalloc.c
  26799. +++ b/mm/zsmalloc.c
  26800. @@ -1289,7 +1289,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
  26801. class = pool->size_class[class_idx];
  26802. off = obj_idx_to_offset(page, obj_idx, class->size);
  26803. - area = &get_cpu_var(zs_map_area);
  26804. + area = per_cpu_ptr(&zs_map_area, get_cpu_light());
  26805. area->vm_mm = mm;
  26806. if (off + class->size <= PAGE_SIZE) {
  26807. /* this object is contained entirely within a page */
  26808. @@ -1342,7 +1342,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
  26809. __zs_unmap_object(area, pages, off, class->size);
  26810. }
  26811. - put_cpu_var(zs_map_area);
  26812. + put_cpu_light();
  26813. unpin_tag(handle);
  26814. }
  26815. EXPORT_SYMBOL_GPL(zs_unmap_object);
  26816. diff --git a/net/core/dev.c b/net/core/dev.c
  26817. index 185a3398c651..78912da59fc1 100644
  26818. --- a/net/core/dev.c
  26819. +++ b/net/core/dev.c
  26820. @@ -184,6 +184,7 @@ static unsigned int napi_gen_id;
  26821. static DEFINE_HASHTABLE(napi_hash, 8);
  26822. static seqcount_t devnet_rename_seq;
  26823. +static DEFINE_MUTEX(devnet_rename_mutex);
  26824. static inline void dev_base_seq_inc(struct net *net)
  26825. {
  26826. @@ -205,14 +206,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
  26827. static inline void rps_lock(struct softnet_data *sd)
  26828. {
  26829. #ifdef CONFIG_RPS
  26830. - spin_lock(&sd->input_pkt_queue.lock);
  26831. + raw_spin_lock(&sd->input_pkt_queue.raw_lock);
  26832. #endif
  26833. }
  26834. static inline void rps_unlock(struct softnet_data *sd)
  26835. {
  26836. #ifdef CONFIG_RPS
  26837. - spin_unlock(&sd->input_pkt_queue.lock);
  26838. + raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
  26839. #endif
  26840. }
  26841. @@ -852,7 +853,8 @@ retry:
  26842. strcpy(name, dev->name);
  26843. rcu_read_unlock();
  26844. if (read_seqcount_retry(&devnet_rename_seq, seq)) {
  26845. - cond_resched();
  26846. + mutex_lock(&devnet_rename_mutex);
  26847. + mutex_unlock(&devnet_rename_mutex);
  26848. goto retry;
  26849. }
  26850. @@ -1121,20 +1123,17 @@ int dev_change_name(struct net_device *dev, const char *newname)
  26851. if (dev->flags & IFF_UP)
  26852. return -EBUSY;
  26853. - write_seqcount_begin(&devnet_rename_seq);
  26854. + mutex_lock(&devnet_rename_mutex);
  26855. + __raw_write_seqcount_begin(&devnet_rename_seq);
  26856. - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
  26857. - write_seqcount_end(&devnet_rename_seq);
  26858. - return 0;
  26859. - }
  26860. + if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
  26861. + goto outunlock;
  26862. memcpy(oldname, dev->name, IFNAMSIZ);
  26863. err = dev_get_valid_name(net, dev, newname);
  26864. - if (err < 0) {
  26865. - write_seqcount_end(&devnet_rename_seq);
  26866. - return err;
  26867. - }
  26868. + if (err < 0)
  26869. + goto outunlock;
  26870. if (oldname[0] && !strchr(oldname, '%'))
  26871. netdev_info(dev, "renamed from %s\n", oldname);
  26872. @@ -1147,11 +1146,12 @@ rollback:
  26873. if (ret) {
  26874. memcpy(dev->name, oldname, IFNAMSIZ);
  26875. dev->name_assign_type = old_assign_type;
  26876. - write_seqcount_end(&devnet_rename_seq);
  26877. - return ret;
  26878. + err = ret;
  26879. + goto outunlock;
  26880. }
  26881. - write_seqcount_end(&devnet_rename_seq);
  26882. + __raw_write_seqcount_end(&devnet_rename_seq);
  26883. + mutex_unlock(&devnet_rename_mutex);
  26884. netdev_adjacent_rename_links(dev, oldname);
  26885. @@ -1172,7 +1172,8 @@ rollback:
  26886. /* err >= 0 after dev_alloc_name() or stores the first errno */
  26887. if (err >= 0) {
  26888. err = ret;
  26889. - write_seqcount_begin(&devnet_rename_seq);
  26890. + mutex_lock(&devnet_rename_mutex);
  26891. + __raw_write_seqcount_begin(&devnet_rename_seq);
  26892. memcpy(dev->name, oldname, IFNAMSIZ);
  26893. memcpy(oldname, newname, IFNAMSIZ);
  26894. dev->name_assign_type = old_assign_type;
  26895. @@ -1185,6 +1186,11 @@ rollback:
  26896. }
  26897. return err;
  26898. +
  26899. +outunlock:
  26900. + __raw_write_seqcount_end(&devnet_rename_seq);
  26901. + mutex_unlock(&devnet_rename_mutex);
  26902. + return err;
  26903. }
  26904. /**
  26905. @@ -2214,6 +2220,7 @@ static inline void __netif_reschedule(struct Qdisc *q)
  26906. sd->output_queue_tailp = &q->next_sched;
  26907. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  26908. local_irq_restore(flags);
  26909. + preempt_check_resched_rt();
  26910. }
  26911. void __netif_schedule(struct Qdisc *q)
  26912. @@ -2295,6 +2302,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
  26913. __this_cpu_write(softnet_data.completion_queue, skb);
  26914. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  26915. local_irq_restore(flags);
  26916. + preempt_check_resched_rt();
  26917. }
  26918. EXPORT_SYMBOL(__dev_kfree_skb_irq);
  26919. @@ -2820,7 +2828,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
  26920. * This permits __QDISC___STATE_RUNNING owner to get the lock more
  26921. * often and dequeue packets faster.
  26922. */
  26923. +#ifdef CONFIG_PREEMPT_RT_FULL
  26924. + contended = true;
  26925. +#else
  26926. contended = qdisc_is_running(q);
  26927. +#endif
  26928. if (unlikely(contended))
  26929. spin_lock(&q->busylock);
  26930. @@ -2880,9 +2892,44 @@ static void skb_update_prio(struct sk_buff *skb)
  26931. #define skb_update_prio(skb)
  26932. #endif
  26933. +#ifdef CONFIG_PREEMPT_RT_FULL
  26934. +
  26935. +static inline int xmit_rec_read(void)
  26936. +{
  26937. + return current->xmit_recursion;
  26938. +}
  26939. +
  26940. +static inline void xmit_rec_inc(void)
  26941. +{
  26942. + current->xmit_recursion++;
  26943. +}
  26944. +
  26945. +static inline void xmit_rec_dec(void)
  26946. +{
  26947. + current->xmit_recursion--;
  26948. +}
  26949. +
  26950. +#else
  26951. +
  26952. DEFINE_PER_CPU(int, xmit_recursion);
  26953. EXPORT_SYMBOL(xmit_recursion);
  26954. +static inline int xmit_rec_read(void)
  26955. +{
  26956. + return __this_cpu_read(xmit_recursion);
  26957. +}
  26958. +
  26959. +static inline void xmit_rec_inc(void)
  26960. +{
  26961. + __this_cpu_inc(xmit_recursion);
  26962. +}
  26963. +
  26964. +static inline int xmit_rec_dec(void)
  26965. +{
  26966. + __this_cpu_dec(xmit_recursion);
  26967. +}
  26968. +#endif
  26969. +
  26970. #define RECURSION_LIMIT 10
  26971. /**
  26972. @@ -2984,7 +3031,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
  26973. if (txq->xmit_lock_owner != cpu) {
  26974. - if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
  26975. + if (xmit_rec_read() > RECURSION_LIMIT)
  26976. goto recursion_alert;
  26977. skb = validate_xmit_skb(skb, dev);
  26978. @@ -2994,9 +3041,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
  26979. HARD_TX_LOCK(dev, txq, cpu);
  26980. if (!netif_xmit_stopped(txq)) {
  26981. - __this_cpu_inc(xmit_recursion);
  26982. + xmit_rec_inc();
  26983. skb = dev_hard_start_xmit(skb, dev, txq, &rc);
  26984. - __this_cpu_dec(xmit_recursion);
  26985. + xmit_rec_dec();
  26986. if (dev_xmit_complete(rc)) {
  26987. HARD_TX_UNLOCK(dev, txq);
  26988. goto out;
  26989. @@ -3370,6 +3417,7 @@ drop:
  26990. rps_unlock(sd);
  26991. local_irq_restore(flags);
  26992. + preempt_check_resched_rt();
  26993. atomic_long_inc(&skb->dev->rx_dropped);
  26994. kfree_skb(skb);
  26995. @@ -3388,7 +3436,7 @@ static int netif_rx_internal(struct sk_buff *skb)
  26996. struct rps_dev_flow voidflow, *rflow = &voidflow;
  26997. int cpu;
  26998. - preempt_disable();
  26999. + migrate_disable();
  27000. rcu_read_lock();
  27001. cpu = get_rps_cpu(skb->dev, skb, &rflow);
  27002. @@ -3398,13 +3446,13 @@ static int netif_rx_internal(struct sk_buff *skb)
  27003. ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
  27004. rcu_read_unlock();
  27005. - preempt_enable();
  27006. + migrate_enable();
  27007. } else
  27008. #endif
  27009. {
  27010. unsigned int qtail;
  27011. - ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
  27012. - put_cpu();
  27013. + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
  27014. + put_cpu_light();
  27015. }
  27016. return ret;
  27017. }
  27018. @@ -3438,16 +3486,44 @@ int netif_rx_ni(struct sk_buff *skb)
  27019. trace_netif_rx_ni_entry(skb);
  27020. - preempt_disable();
  27021. + local_bh_disable();
  27022. err = netif_rx_internal(skb);
  27023. - if (local_softirq_pending())
  27024. - do_softirq();
  27025. - preempt_enable();
  27026. + local_bh_enable();
  27027. return err;
  27028. }
  27029. EXPORT_SYMBOL(netif_rx_ni);
  27030. +#ifdef CONFIG_PREEMPT_RT_FULL
  27031. +/*
  27032. + * RT runs ksoftirqd as a real time thread and the root_lock is a
  27033. + * "sleeping spinlock". If the trylock fails then we can go into an
  27034. + * infinite loop when ksoftirqd preempted the task which actually
  27035. + * holds the lock, because we requeue q and raise NET_TX softirq
  27036. + * causing ksoftirqd to loop forever.
  27037. + *
  27038. + * It's safe to use spin_lock on RT here as softirqs run in thread
  27039. + * context and cannot deadlock against the thread which is holding
  27040. + * root_lock.
  27041. + *
  27042. + * On !RT the trylock might fail, but there we bail out from the
  27043. + * softirq loop after 10 attempts which we can't do on RT. And the
  27044. + * task holding root_lock cannot be preempted, so the only downside of
  27045. + * that trylock is that we need 10 loops to decide that we should have
  27046. + * given up in the first one :)
  27047. + */
  27048. +static inline int take_root_lock(spinlock_t *lock)
  27049. +{
  27050. + spin_lock(lock);
  27051. + return 1;
  27052. +}
  27053. +#else
  27054. +static inline int take_root_lock(spinlock_t *lock)
  27055. +{
  27056. + return spin_trylock(lock);
  27057. +}
  27058. +#endif
  27059. +
  27060. static void net_tx_action(struct softirq_action *h)
  27061. {
  27062. struct softnet_data *sd = this_cpu_ptr(&softnet_data);
  27063. @@ -3489,7 +3565,7 @@ static void net_tx_action(struct softirq_action *h)
  27064. head = head->next_sched;
  27065. root_lock = qdisc_lock(q);
  27066. - if (spin_trylock(root_lock)) {
  27067. + if (take_root_lock(root_lock)) {
  27068. smp_mb__before_atomic();
  27069. clear_bit(__QDISC_STATE_SCHED,
  27070. &q->state);
  27071. @@ -3886,7 +3962,7 @@ static void flush_backlog(void *arg)
  27072. skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
  27073. if (skb->dev == dev) {
  27074. __skb_unlink(skb, &sd->input_pkt_queue);
  27075. - kfree_skb(skb);
  27076. + __skb_queue_tail(&sd->tofree_queue, skb);
  27077. input_queue_head_incr(sd);
  27078. }
  27079. }
  27080. @@ -3895,10 +3971,13 @@ static void flush_backlog(void *arg)
  27081. skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
  27082. if (skb->dev == dev) {
  27083. __skb_unlink(skb, &sd->process_queue);
  27084. - kfree_skb(skb);
  27085. + __skb_queue_tail(&sd->tofree_queue, skb);
  27086. input_queue_head_incr(sd);
  27087. }
  27088. }
  27089. +
  27090. + if (!skb_queue_empty(&sd->tofree_queue))
  27091. + raise_softirq_irqoff(NET_RX_SOFTIRQ);
  27092. }
  27093. static int napi_gro_complete(struct sk_buff *skb)
  27094. @@ -4349,6 +4428,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
  27095. sd->rps_ipi_list = NULL;
  27096. local_irq_enable();
  27097. + preempt_check_resched_rt();
  27098. /* Send pending IPI's to kick RPS processing on remote cpus. */
  27099. while (remsd) {
  27100. @@ -4362,6 +4442,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
  27101. } else
  27102. #endif
  27103. local_irq_enable();
  27104. + preempt_check_resched_rt();
  27105. }
  27106. static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
  27107. @@ -4443,6 +4524,7 @@ void __napi_schedule(struct napi_struct *n)
  27108. local_irq_save(flags);
  27109. ____napi_schedule(this_cpu_ptr(&softnet_data), n);
  27110. local_irq_restore(flags);
  27111. + preempt_check_resched_rt();
  27112. }
  27113. EXPORT_SYMBOL(__napi_schedule);
  27114. @@ -4717,7 +4799,7 @@ static void net_rx_action(struct softirq_action *h)
  27115. list_splice_tail(&repoll, &list);
  27116. list_splice(&list, &sd->poll_list);
  27117. if (!list_empty(&sd->poll_list))
  27118. - __raise_softirq_irqoff(NET_RX_SOFTIRQ);
  27119. + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ);
  27120. net_rps_action_and_irq_enable(sd);
  27121. }
  27122. @@ -6931,7 +7013,7 @@ EXPORT_SYMBOL(free_netdev);
  27123. void synchronize_net(void)
  27124. {
  27125. might_sleep();
  27126. - if (rtnl_is_locked())
  27127. + if (rtnl_is_locked() && !IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
  27128. synchronize_rcu_expedited();
  27129. else
  27130. synchronize_rcu();
  27131. @@ -7172,16 +7254,20 @@ static int dev_cpu_callback(struct notifier_block *nfb,
  27132. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  27133. local_irq_enable();
  27134. + preempt_check_resched_rt();
  27135. /* Process offline CPU's input_pkt_queue */
  27136. while ((skb = __skb_dequeue(&oldsd->process_queue))) {
  27137. netif_rx_ni(skb);
  27138. input_queue_head_incr(oldsd);
  27139. }
  27140. - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
  27141. + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
  27142. netif_rx_ni(skb);
  27143. input_queue_head_incr(oldsd);
  27144. }
  27145. + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
  27146. + kfree_skb(skb);
  27147. + }
  27148. return NOTIFY_OK;
  27149. }
  27150. @@ -7483,8 +7569,9 @@ static int __init net_dev_init(void)
  27151. for_each_possible_cpu(i) {
  27152. struct softnet_data *sd = &per_cpu(softnet_data, i);
  27153. - skb_queue_head_init(&sd->input_pkt_queue);
  27154. - skb_queue_head_init(&sd->process_queue);
  27155. + skb_queue_head_init_raw(&sd->input_pkt_queue);
  27156. + skb_queue_head_init_raw(&sd->process_queue);
  27157. + skb_queue_head_init_raw(&sd->tofree_queue);
  27158. INIT_LIST_HEAD(&sd->poll_list);
  27159. sd->output_queue_tailp = &sd->output_queue;
  27160. #ifdef CONFIG_RPS
  27161. diff --git a/net/core/skbuff.c b/net/core/skbuff.c
  27162. index c9793c6c5005..d4516d7d58aa 100644
  27163. --- a/net/core/skbuff.c
  27164. +++ b/net/core/skbuff.c
  27165. @@ -63,6 +63,7 @@
  27166. #include <linux/errqueue.h>
  27167. #include <linux/prefetch.h>
  27168. #include <linux/if_vlan.h>
  27169. +#include <linux/locallock.h>
  27170. #include <net/protocol.h>
  27171. #include <net/dst.h>
  27172. @@ -358,6 +359,8 @@ struct netdev_alloc_cache {
  27173. };
  27174. static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
  27175. static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
  27176. +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock);
  27177. +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock);
  27178. static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
  27179. gfp_t gfp_mask)
  27180. @@ -435,9 +438,9 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  27181. unsigned long flags;
  27182. void *data;
  27183. - local_irq_save(flags);
  27184. + local_lock_irqsave(netdev_alloc_lock, flags);
  27185. data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask);
  27186. - local_irq_restore(flags);
  27187. + local_unlock_irqrestore(netdev_alloc_lock, flags);
  27188. return data;
  27189. }
  27190. @@ -456,7 +459,12 @@ EXPORT_SYMBOL(netdev_alloc_frag);
  27191. static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  27192. {
  27193. - return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask);
  27194. + void *data;
  27195. +
  27196. + local_lock(napi_alloc_cache_lock);
  27197. + data = __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask);
  27198. + local_unlock(napi_alloc_cache_lock);
  27199. + return data;
  27200. }
  27201. void *napi_alloc_frag(unsigned int fragsz)
  27202. diff --git a/net/core/sock.c b/net/core/sock.c
  27203. index 47fc8bb3b946..23a1423f78ca 100644
  27204. --- a/net/core/sock.c
  27205. +++ b/net/core/sock.c
  27206. @@ -2369,12 +2369,11 @@ void lock_sock_nested(struct sock *sk, int subclass)
  27207. if (sk->sk_lock.owned)
  27208. __lock_sock(sk);
  27209. sk->sk_lock.owned = 1;
  27210. - spin_unlock(&sk->sk_lock.slock);
  27211. + spin_unlock_bh(&sk->sk_lock.slock);
  27212. /*
  27213. * The sk_lock has mutex_lock() semantics here:
  27214. */
  27215. mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
  27216. - local_bh_enable();
  27217. }
  27218. EXPORT_SYMBOL(lock_sock_nested);
  27219. diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
  27220. index f5203fba6236..d7358c1ac63c 100644
  27221. --- a/net/ipv4/icmp.c
  27222. +++ b/net/ipv4/icmp.c
  27223. @@ -69,6 +69,7 @@
  27224. #include <linux/jiffies.h>
  27225. #include <linux/kernel.h>
  27226. #include <linux/fcntl.h>
  27227. +#include <linux/sysrq.h>
  27228. #include <linux/socket.h>
  27229. #include <linux/in.h>
  27230. #include <linux/inet.h>
  27231. @@ -77,6 +78,7 @@
  27232. #include <linux/string.h>
  27233. #include <linux/netfilter_ipv4.h>
  27234. #include <linux/slab.h>
  27235. +#include <linux/locallock.h>
  27236. #include <net/snmp.h>
  27237. #include <net/ip.h>
  27238. #include <net/route.h>
  27239. @@ -203,6 +205,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
  27240. *
  27241. * On SMP we have one ICMP socket per-cpu.
  27242. */
  27243. +static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock);
  27244. +
  27245. static struct sock *icmp_sk(struct net *net)
  27246. {
  27247. return *this_cpu_ptr(net->ipv4.icmp_sk);
  27248. @@ -214,12 +218,14 @@ static inline struct sock *icmp_xmit_lock(struct net *net)
  27249. local_bh_disable();
  27250. + local_lock(icmp_sk_lock);
  27251. sk = icmp_sk(net);
  27252. if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
  27253. /* This can happen if the output path signals a
  27254. * dst_link_failure() for an outgoing ICMP packet.
  27255. */
  27256. + local_unlock(icmp_sk_lock);
  27257. local_bh_enable();
  27258. return NULL;
  27259. }
  27260. @@ -229,6 +235,7 @@ static inline struct sock *icmp_xmit_lock(struct net *net)
  27261. static inline void icmp_xmit_unlock(struct sock *sk)
  27262. {
  27263. spin_unlock_bh(&sk->sk_lock.slock);
  27264. + local_unlock(icmp_sk_lock);
  27265. }
  27266. int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
  27267. @@ -356,6 +363,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
  27268. struct sock *sk;
  27269. struct sk_buff *skb;
  27270. + local_lock(icmp_sk_lock);
  27271. sk = icmp_sk(dev_net((*rt)->dst.dev));
  27272. if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
  27273. icmp_param->data_len+icmp_param->head_len,
  27274. @@ -378,6 +386,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
  27275. skb->ip_summed = CHECKSUM_NONE;
  27276. ip_push_pending_frames(sk, fl4);
  27277. }
  27278. + local_unlock(icmp_sk_lock);
  27279. }
  27280. /*
  27281. @@ -867,6 +876,30 @@ static bool icmp_redirect(struct sk_buff *skb)
  27282. }
  27283. /*
  27284. + * 32bit and 64bit have different timestamp length, so we check for
  27285. + * the cookie at offset 20 and verify it is repeated at offset 50
  27286. + */
  27287. +#define CO_POS0 20
  27288. +#define CO_POS1 50
  27289. +#define CO_SIZE sizeof(int)
  27290. +#define ICMP_SYSRQ_SIZE 57
  27291. +
  27292. +/*
  27293. + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
  27294. + * pattern and if it matches send the next byte as a trigger to sysrq.
  27295. + */
  27296. +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb)
  27297. +{
  27298. + int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
  27299. + char *p = skb->data;
  27300. +
  27301. + if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
  27302. + !memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
  27303. + p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
  27304. + handle_sysrq(p[CO_POS0 + CO_SIZE]);
  27305. +}
  27306. +
  27307. +/*
  27308. * Handle ICMP_ECHO ("ping") requests.
  27309. *
  27310. * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
  27311. @@ -893,6 +926,11 @@ static bool icmp_echo(struct sk_buff *skb)
  27312. icmp_param.data_len = skb->len;
  27313. icmp_param.head_len = sizeof(struct icmphdr);
  27314. icmp_reply(&icmp_param, skb);
  27315. +
  27316. + if (skb->len == ICMP_SYSRQ_SIZE &&
  27317. + net->ipv4.sysctl_icmp_echo_sysrq) {
  27318. + icmp_check_sysrq(net, skb);
  27319. + }
  27320. }
  27321. /* should there be an ICMP stat for ignored echos? */
  27322. return true;
  27323. diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
  27324. index f0e829735968..aff60d4abd7c 100644
  27325. --- a/net/ipv4/sysctl_net_ipv4.c
  27326. +++ b/net/ipv4/sysctl_net_ipv4.c
  27327. @@ -779,6 +779,13 @@ static struct ctl_table ipv4_net_table[] = {
  27328. .proc_handler = proc_dointvec
  27329. },
  27330. {
  27331. + .procname = "icmp_echo_sysrq",
  27332. + .data = &init_net.ipv4.sysctl_icmp_echo_sysrq,
  27333. + .maxlen = sizeof(int),
  27334. + .mode = 0644,
  27335. + .proc_handler = proc_dointvec
  27336. + },
  27337. + {
  27338. .procname = "icmp_ignore_bogus_error_responses",
  27339. .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
  27340. .maxlen = sizeof(int),
  27341. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
  27342. index 13b92d595138..6bfa68fb5f21 100644
  27343. --- a/net/ipv4/tcp_ipv4.c
  27344. +++ b/net/ipv4/tcp_ipv4.c
  27345. @@ -62,6 +62,7 @@
  27346. #include <linux/init.h>
  27347. #include <linux/times.h>
  27348. #include <linux/slab.h>
  27349. +#include <linux/locallock.h>
  27350. #include <net/net_namespace.h>
  27351. #include <net/icmp.h>
  27352. @@ -563,6 +564,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
  27353. }
  27354. EXPORT_SYMBOL(tcp_v4_send_check);
  27355. +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock);
  27356. /*
  27357. * This routine will send an RST to the other tcp.
  27358. *
  27359. @@ -684,10 +686,13 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
  27360. arg.bound_dev_if = sk->sk_bound_dev_if;
  27361. arg.tos = ip_hdr(skb)->tos;
  27362. +
  27363. + local_lock(tcp_sk_lock);
  27364. ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
  27365. skb, &TCP_SKB_CB(skb)->header.h4.opt,
  27366. ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  27367. &arg, arg.iov[0].iov_len);
  27368. + local_unlock(tcp_sk_lock);
  27369. TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
  27370. TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
  27371. @@ -769,10 +774,12 @@ static void tcp_v4_send_ack(struct net *net,
  27372. if (oif)
  27373. arg.bound_dev_if = oif;
  27374. arg.tos = tos;
  27375. + local_lock(tcp_sk_lock);
  27376. ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
  27377. skb, &TCP_SKB_CB(skb)->header.h4.opt,
  27378. ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  27379. &arg, arg.iov[0].iov_len);
  27380. + local_unlock(tcp_sk_lock);
  27381. TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
  27382. }
  27383. diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
  27384. index 3073164a6fcf..d294ec1530b6 100644
  27385. --- a/net/mac80211/rx.c
  27386. +++ b/net/mac80211/rx.c
  27387. @@ -3574,7 +3574,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
  27388. struct ieee80211_supported_band *sband;
  27389. struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
  27390. - WARN_ON_ONCE(softirq_count() == 0);
  27391. + WARN_ON_ONCE_NONRT(softirq_count() == 0);
  27392. if (WARN_ON(status->band >= IEEE80211_NUM_BANDS))
  27393. goto drop;
  27394. diff --git a/net/netfilter/core.c b/net/netfilter/core.c
  27395. index 5d0c6fd59475..e81489741143 100644
  27396. --- a/net/netfilter/core.c
  27397. +++ b/net/netfilter/core.c
  27398. @@ -22,11 +22,17 @@
  27399. #include <linux/proc_fs.h>
  27400. #include <linux/mutex.h>
  27401. #include <linux/slab.h>
  27402. +#include <linux/locallock.h>
  27403. #include <net/net_namespace.h>
  27404. #include <net/sock.h>
  27405. #include "nf_internals.h"
  27406. +#ifdef CONFIG_PREEMPT_RT_BASE
  27407. +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
  27408. +EXPORT_PER_CPU_SYMBOL(xt_write_lock);
  27409. +#endif
  27410. +
  27411. static DEFINE_MUTEX(afinfo_mutex);
  27412. const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
  27413. diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
  27414. index a3654d929814..7c1054d808fc 100644
  27415. --- a/net/packet/af_packet.c
  27416. +++ b/net/packet/af_packet.c
  27417. @@ -63,6 +63,7 @@
  27418. #include <linux/if_packet.h>
  27419. #include <linux/wireless.h>
  27420. #include <linux/kernel.h>
  27421. +#include <linux/delay.h>
  27422. #include <linux/kmod.h>
  27423. #include <linux/slab.h>
  27424. #include <linux/vmalloc.h>
  27425. @@ -698,7 +699,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data)
  27426. if (BLOCK_NUM_PKTS(pbd)) {
  27427. while (atomic_read(&pkc->blk_fill_in_prog)) {
  27428. /* Waiting for skb_copy_bits to finish... */
  27429. - cpu_relax();
  27430. + cpu_chill();
  27431. }
  27432. }
  27433. @@ -960,7 +961,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
  27434. if (!(status & TP_STATUS_BLK_TMO)) {
  27435. while (atomic_read(&pkc->blk_fill_in_prog)) {
  27436. /* Waiting for skb_copy_bits to finish... */
  27437. - cpu_relax();
  27438. + cpu_chill();
  27439. }
  27440. }
  27441. prb_close_block(pkc, pbd, po, status);
  27442. diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
  27443. index 657ba9f5d308..c8faaf36423a 100644
  27444. --- a/net/rds/ib_rdma.c
  27445. +++ b/net/rds/ib_rdma.c
  27446. @@ -34,6 +34,7 @@
  27447. #include <linux/slab.h>
  27448. #include <linux/rculist.h>
  27449. #include <linux/llist.h>
  27450. +#include <linux/delay.h>
  27451. #include "rds.h"
  27452. #include "ib.h"
  27453. @@ -286,7 +287,7 @@ static inline void wait_clean_list_grace(void)
  27454. for_each_online_cpu(cpu) {
  27455. flag = &per_cpu(clean_list_grace, cpu);
  27456. while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
  27457. - cpu_relax();
  27458. + cpu_chill();
  27459. }
  27460. }
  27461. diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
  27462. index 9821e6d641bb..c6bf6ea6d1d5 100644
  27463. --- a/net/sched/sch_generic.c
  27464. +++ b/net/sched/sch_generic.c
  27465. @@ -899,7 +899,7 @@ void dev_deactivate_many(struct list_head *head)
  27466. /* Wait for outstanding qdisc_run calls. */
  27467. list_for_each_entry(dev, head, close_list)
  27468. while (some_qdisc_is_busy(dev))
  27469. - yield();
  27470. + msleep(1);
  27471. }
  27472. void dev_deactivate(struct net_device *dev)
  27473. diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
  27474. index 163ac45c3639..ba2313cd4e36 100644
  27475. --- a/net/sunrpc/svc_xprt.c
  27476. +++ b/net/sunrpc/svc_xprt.c
  27477. @@ -341,7 +341,7 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
  27478. goto out;
  27479. }
  27480. - cpu = get_cpu();
  27481. + cpu = get_cpu_light();
  27482. pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
  27483. atomic_long_inc(&pool->sp_stats.packets);
  27484. @@ -377,7 +377,7 @@ redo_search:
  27485. atomic_long_inc(&pool->sp_stats.threads_woken);
  27486. wake_up_process(rqstp->rq_task);
  27487. - put_cpu();
  27488. + put_cpu_light();
  27489. goto out;
  27490. }
  27491. rcu_read_unlock();
  27492. @@ -398,7 +398,7 @@ redo_search:
  27493. goto redo_search;
  27494. }
  27495. rqstp = NULL;
  27496. - put_cpu();
  27497. + put_cpu_light();
  27498. out:
  27499. trace_svc_xprt_do_enqueue(xprt, rqstp);
  27500. }
  27501. diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
  27502. index 6fdc97ef6023..523e0420d7f0 100755
  27503. --- a/scripts/mkcompile_h
  27504. +++ b/scripts/mkcompile_h
  27505. @@ -4,7 +4,8 @@ TARGET=$1
  27506. ARCH=$2
  27507. SMP=$3
  27508. PREEMPT=$4
  27509. -CC=$5
  27510. +RT=$5
  27511. +CC=$6
  27512. vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
  27513. @@ -57,6 +58,7 @@ UTS_VERSION="#$VERSION"
  27514. CONFIG_FLAGS=""
  27515. if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
  27516. if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
  27517. +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi
  27518. UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
  27519. # Truncate to maximum length
  27520. diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
  27521. index aa999e747c94..8195f789c680 100644
  27522. --- a/sound/core/pcm_native.c
  27523. +++ b/sound/core/pcm_native.c
  27524. @@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock);
  27525. void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream)
  27526. {
  27527. if (!substream->pcm->nonatomic)
  27528. - local_irq_disable();
  27529. + local_irq_disable_nort();
  27530. snd_pcm_stream_lock(substream);
  27531. }
  27532. EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq);
  27533. @@ -150,7 +150,7 @@ void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream)
  27534. {
  27535. snd_pcm_stream_unlock(substream);
  27536. if (!substream->pcm->nonatomic)
  27537. - local_irq_enable();
  27538. + local_irq_enable_nort();
  27539. }
  27540. EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq);
  27541. @@ -158,7 +158,7 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream)
  27542. {
  27543. unsigned long flags = 0;
  27544. if (!substream->pcm->nonatomic)
  27545. - local_irq_save(flags);
  27546. + local_irq_save_nort(flags);
  27547. snd_pcm_stream_lock(substream);
  27548. return flags;
  27549. }
  27550. @@ -176,7 +176,7 @@ void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream,
  27551. {
  27552. snd_pcm_stream_unlock(substream);
  27553. if (!substream->pcm->nonatomic)
  27554. - local_irq_restore(flags);
  27555. + local_irq_restore_nort(flags);
  27556. }
  27557. EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore);
  27558. diff --git a/sound/soc/intel/atom/sst/sst.c b/sound/soc/intel/atom/sst/sst.c
  27559. index 96c2e420cce6..a4b458e77089 100644
  27560. --- a/sound/soc/intel/atom/sst/sst.c
  27561. +++ b/sound/soc/intel/atom/sst/sst.c
  27562. @@ -368,8 +368,8 @@ static inline void sst_restore_shim64(struct intel_sst_drv *ctx,
  27563. * initialize by FW or driver when firmware is loaded
  27564. */
  27565. spin_lock_irqsave(&ctx->ipc_spin_lock, irq_flags);
  27566. - sst_shim_write64(shim, SST_IMRX, shim_regs->imrx),
  27567. - sst_shim_write64(shim, SST_CSR, shim_regs->csr),
  27568. + sst_shim_write64(shim, SST_IMRX, shim_regs->imrx);
  27569. + sst_shim_write64(shim, SST_CSR, shim_regs->csr);
  27570. spin_unlock_irqrestore(&ctx->ipc_spin_lock, irq_flags);
  27571. }
  27572. diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
  27573. index f84f5856520a..9b0cd1b03222 100644
  27574. --- a/virt/kvm/async_pf.c
  27575. +++ b/virt/kvm/async_pf.c
  27576. @@ -94,8 +94,8 @@ static void async_pf_execute(struct work_struct *work)
  27577. trace_kvm_async_pf_completed(addr, gva);
  27578. - if (waitqueue_active(&vcpu->wq))
  27579. - wake_up_interruptible(&vcpu->wq);
  27580. + if (swaitqueue_active(&vcpu->wq))
  27581. + swait_wake_interruptible(&vcpu->wq);
  27582. mmput(mm);
  27583. kvm_put_kvm(vcpu->kvm);
  27584. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
  27585. index f9746f29f870..3424e7fe6678 100644
  27586. --- a/virt/kvm/kvm_main.c
  27587. +++ b/virt/kvm/kvm_main.c
  27588. @@ -220,7 +220,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
  27589. vcpu->kvm = kvm;
  27590. vcpu->vcpu_id = id;
  27591. vcpu->pid = NULL;
  27592. - init_waitqueue_head(&vcpu->wq);
  27593. + init_swait_head(&vcpu->wq);
  27594. kvm_async_pf_vcpu_init(vcpu);
  27595. page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  27596. @@ -1782,7 +1782,7 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
  27597. void kvm_vcpu_block(struct kvm_vcpu *vcpu)
  27598. {
  27599. ktime_t start, cur;
  27600. - DEFINE_WAIT(wait);
  27601. + DEFINE_SWAITER(wait);
  27602. bool waited = false;
  27603. start = cur = ktime_get();
  27604. @@ -1803,7 +1803,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
  27605. }
  27606. for (;;) {
  27607. - prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
  27608. + swait_prepare(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
  27609. if (kvm_vcpu_check_block(vcpu) < 0)
  27610. break;
  27611. @@ -1812,7 +1812,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
  27612. schedule();
  27613. }
  27614. - finish_wait(&vcpu->wq, &wait);
  27615. + swait_finish(&vcpu->wq, &wait);
  27616. cur = ktime_get();
  27617. out:
  27618. @@ -1828,11 +1828,11 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
  27619. {
  27620. int me;
  27621. int cpu = vcpu->cpu;
  27622. - wait_queue_head_t *wqp;
  27623. + struct swait_head *wqp;
  27624. wqp = kvm_arch_vcpu_wq(vcpu);
  27625. - if (waitqueue_active(wqp)) {
  27626. - wake_up_interruptible(wqp);
  27627. + if (swaitqueue_active(wqp)) {
  27628. + swait_wake_interruptible(wqp);
  27629. ++vcpu->stat.halt_wakeup;
  27630. }
  27631. @@ -1933,7 +1933,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
  27632. continue;
  27633. if (vcpu == me)
  27634. continue;
  27635. - if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
  27636. + if (swaitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
  27637. continue;
  27638. if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
  27639. continue;