realtime.patch 696 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678156791568015681156821568315684156851568615687156881568915690156911569215693156941569515696156971569815699157001570115702157031570415705157061570715708157091571015711157121571315714157151571615717157181571915720157211572215723157241572515726157271572815729157301573115732157331573415735157361573715738157391574015741157421574315744157451574615747157481574915750157511575215753157541575515756157571575815759157601576115762157631576415765157661576715768157691577015771157721577315774157751577615777157781577915780157811578215783157841578515786157871578815789157901579115792157931579415795157961579715798157991580015801158021580315804158051580615807158081580915810158111581215813158141581515816158171581815819158201582115822158231582415825158261582715828158291583015831158321583315834158351583615837158381583915840158411584215843158441584515846158471584815849158501585115852158531585415855158561585715858158591586015861158621586315864158651586615867158681586915870158711587215873158741587515876158771587815879158801588115882158831588415885158861588715888158891589015891158921589315894158951589615897158981589915900159011590215903159041590515906159071590815909159101591115912159131591415915159161591715918159191592015921159221592315924159251592615927159281592915930159311593215933159341593515936159371593815939159401594115942159431594415945159461594715948159491595015951159521595315954159551595615957159581595915960159611596215963159641596515966159671596815969159701597115972159731597415975159761597715978159791598015981159821598315984159851598615987159881598915990159911599215993159941599515996159971599815999160001600116002160031600416005160061600716008160091601016011160121601316014160151601616017160181601916020160211602216023160241602516026160271602816029160301603116032160331603416035160361603716038160391604016041160421604316044160451604616047160481604916050160511605216053160541605516056160571605816059160601606116062160631606416065160661606716068160691607016071160721607316074160751607616077160781607916080160811608216083160841608516086160871608816089160901609116092160931609416095160961609716098160991610016101161021610316104161051610616107161081610916110161111611216113161141611516116161171611816119161201612116122161231612416125161261612716128161291613016131161321613316134161351613616137161381613916140161411614216143161441614516146161471614816149161501615116152161531615416155161561615716158161591616016161161621616316164161651616616167161681616916170161711617216173161741617516176161771617816179161801618116182161831618416185161861618716188161891619016191161921619316194161951619616197161981619916200162011620216203162041620516206162071620816209162101621116212162131621416215162161621716218162191622016221162221622316224162251622616227162281622916230162311623216233162341623516236162371623816239162401624116242162431624416245162461624716248162491625016251162521625316254162551625616257162581625916260162611626216263162641626516266162671626816269162701627116272162731627416275162761627716278162791628016281162821628316284162851628616287162881628916290162911629216293162941629516296162971629816299163001630116302163031630416305163061630716308163091631016311163121631316314163151631616317163181631916320163211632216323163241632516326163271632816329163301633116332163331633416335163361633716338163391634016341163421634316344163451634616347163481634916350163511635216353163541635516356163571635816359163601636116362163631636416365163661636716368163691637016371163721637316374163751637616377163781637916380163811638216383163841638516386163871638816389163901639116392163931639416395163961639716398163991640016401164021640316404164051640616407164081640916410164111641216413164141641516416164171641816419164201642116422164231642416425164261642716428164291643016431164321643316434164351643616437164381643916440164411644216443164441644516446164471644816449164501645116452164531645416455164561645716458164591646016461164621646316464164651646616467164681646916470164711647216473164741647516476164771647816479164801648116482164831648416485164861648716488164891649016491164921649316494164951649616497164981649916500165011650216503165041650516506165071650816509165101651116512165131651416515165161651716518165191652016521165221652316524165251652616527165281652916530165311653216533165341653516536165371653816539165401654116542165431654416545165461654716548165491655016551165521655316554165551655616557165581655916560165611656216563165641656516566165671656816569165701657116572165731657416575165761657716578165791658016581165821658316584165851658616587165881658916590165911659216593165941659516596165971659816599166001660116602166031660416605166061660716608166091661016611166121661316614166151661616617166181661916620166211662216623166241662516626166271662816629166301663116632166331663416635166361663716638166391664016641166421664316644166451664616647166481664916650166511665216653166541665516656166571665816659166601666116662166631666416665166661666716668166691667016671166721667316674166751667616677166781667916680166811668216683166841668516686166871668816689166901669116692166931669416695166961669716698166991670016701167021670316704167051670616707167081670916710167111671216713167141671516716167171671816719167201672116722167231672416725167261672716728167291673016731167321673316734167351673616737167381673916740167411674216743167441674516746167471674816749167501675116752167531675416755167561675716758167591676016761167621676316764167651676616767167681676916770167711677216773167741677516776167771677816779167801678116782167831678416785167861678716788167891679016791167921679316794167951679616797167981679916800168011680216803168041680516806168071680816809168101681116812168131681416815168161681716818168191682016821168221682316824168251682616827168281682916830168311683216833168341683516836168371683816839168401684116842168431684416845168461684716848168491685016851168521685316854168551685616857168581685916860168611686216863168641686516866168671686816869168701687116872168731687416875168761687716878168791688016881168821688316884168851688616887168881688916890168911689216893168941689516896168971689816899169001690116902169031690416905169061690716908169091691016911169121691316914169151691616917169181691916920169211692216923169241692516926169271692816929169301693116932169331693416935169361693716938169391694016941169421694316944169451694616947169481694916950169511695216953169541695516956169571695816959169601696116962169631696416965169661696716968169691697016971169721697316974169751697616977169781697916980169811698216983169841698516986169871698816989169901699116992169931699416995169961699716998169991700017001170021700317004170051700617007170081700917010170111701217013170141701517016170171701817019170201702117022170231702417025170261702717028170291703017031170321703317034170351703617037170381703917040170411704217043170441704517046170471704817049170501705117052170531705417055170561705717058170591706017061170621706317064170651706617067170681706917070170711707217073170741707517076170771707817079170801708117082170831708417085170861708717088170891709017091170921709317094170951709617097170981709917100171011710217103171041710517106171071710817109171101711117112171131711417115171161711717118171191712017121171221712317124171251712617127171281712917130171311713217133171341713517136171371713817139171401714117142171431714417145171461714717148171491715017151171521715317154171551715617157171581715917160171611716217163171641716517166171671716817169171701717117172171731717417175171761717717178171791718017181171821718317184171851718617187171881718917190171911719217193171941719517196171971719817199172001720117202172031720417205172061720717208172091721017211172121721317214172151721617217172181721917220172211722217223172241722517226172271722817229172301723117232172331723417235172361723717238172391724017241172421724317244172451724617247172481724917250172511725217253172541725517256172571725817259172601726117262172631726417265172661726717268172691727017271172721727317274172751727617277172781727917280172811728217283172841728517286172871728817289172901729117292172931729417295172961729717298172991730017301173021730317304173051730617307173081730917310173111731217313173141731517316173171731817319173201732117322173231732417325173261732717328173291733017331173321733317334173351733617337173381733917340173411734217343173441734517346173471734817349173501735117352173531735417355173561735717358173591736017361173621736317364173651736617367173681736917370173711737217373173741737517376173771737817379173801738117382173831738417385173861738717388173891739017391173921739317394173951739617397173981739917400174011740217403174041740517406174071740817409174101741117412174131741417415174161741717418174191742017421174221742317424174251742617427174281742917430174311743217433174341743517436174371743817439174401744117442174431744417445174461744717448174491745017451174521745317454174551745617457174581745917460174611746217463174641746517466174671746817469174701747117472174731747417475174761747717478174791748017481174821748317484174851748617487174881748917490174911749217493174941749517496174971749817499175001750117502175031750417505175061750717508175091751017511175121751317514175151751617517175181751917520175211752217523175241752517526175271752817529175301753117532175331753417535175361753717538175391754017541175421754317544175451754617547175481754917550175511755217553175541755517556175571755817559175601756117562175631756417565175661756717568175691757017571175721757317574175751757617577175781757917580175811758217583175841758517586175871758817589175901759117592175931759417595175961759717598175991760017601176021760317604176051760617607176081760917610176111761217613176141761517616176171761817619176201762117622176231762417625176261762717628176291763017631176321763317634176351763617637176381763917640176411764217643176441764517646176471764817649176501765117652176531765417655176561765717658176591766017661176621766317664176651766617667176681766917670176711767217673176741767517676176771767817679176801768117682176831768417685176861768717688176891769017691176921769317694176951769617697176981769917700177011770217703177041770517706177071770817709177101771117712177131771417715177161771717718177191772017721177221772317724177251772617727177281772917730177311773217733177341773517736177371773817739177401774117742177431774417745177461774717748177491775017751177521775317754177551775617757177581775917760177611776217763177641776517766177671776817769177701777117772177731777417775177761777717778177791778017781177821778317784177851778617787177881778917790177911779217793177941779517796177971779817799178001780117802178031780417805178061780717808178091781017811178121781317814178151781617817178181781917820178211782217823178241782517826178271782817829178301783117832178331783417835178361783717838178391784017841178421784317844178451784617847178481784917850178511785217853178541785517856178571785817859178601786117862178631786417865178661786717868178691787017871178721787317874178751787617877178781787917880178811788217883178841788517886178871788817889178901789117892178931789417895178961789717898178991790017901179021790317904179051790617907179081790917910179111791217913179141791517916179171791817919179201792117922179231792417925179261792717928179291793017931179321793317934179351793617937179381793917940179411794217943179441794517946179471794817949179501795117952179531795417955179561795717958179591796017961179621796317964179651796617967179681796917970179711797217973179741797517976179771797817979179801798117982179831798417985179861798717988179891799017991179921799317994179951799617997179981799918000180011800218003180041800518006180071800818009180101801118012180131801418015180161801718018180191802018021180221802318024180251802618027180281802918030180311803218033180341803518036180371803818039180401804118042180431804418045180461804718048180491805018051180521805318054180551805618057180581805918060180611806218063180641806518066180671806818069180701807118072180731807418075180761807718078180791808018081180821808318084180851808618087180881808918090180911809218093180941809518096180971809818099181001810118102181031810418105181061810718108181091811018111181121811318114181151811618117181181811918120181211812218123181241812518126181271812818129181301813118132181331813418135181361813718138181391814018141181421814318144181451814618147181481814918150181511815218153181541815518156181571815818159181601816118162181631816418165181661816718168181691817018171181721817318174181751817618177181781817918180181811818218183181841818518186181871818818189181901819118192181931819418195181961819718198181991820018201182021820318204182051820618207182081820918210182111821218213182141821518216182171821818219182201822118222182231822418225182261822718228182291823018231182321823318234182351823618237182381823918240182411824218243182441824518246182471824818249182501825118252182531825418255182561825718258182591826018261182621826318264182651826618267182681826918270182711827218273182741827518276182771827818279182801828118282182831828418285182861828718288182891829018291182921829318294182951829618297182981829918300183011830218303183041830518306183071830818309183101831118312183131831418315183161831718318183191832018321183221832318324183251832618327183281832918330183311833218333183341833518336183371833818339183401834118342183431834418345183461834718348183491835018351183521835318354183551835618357183581835918360183611836218363183641836518366183671836818369183701837118372183731837418375183761837718378183791838018381183821838318384183851838618387183881838918390183911839218393183941839518396183971839818399184001840118402184031840418405184061840718408184091841018411184121841318414184151841618417184181841918420184211842218423184241842518426184271842818429184301843118432184331843418435184361843718438184391844018441184421844318444184451844618447184481844918450184511845218453184541845518456184571845818459184601846118462184631846418465184661846718468184691847018471184721847318474184751847618477184781847918480184811848218483184841848518486184871848818489184901849118492184931849418495184961849718498184991850018501185021850318504185051850618507185081850918510185111851218513185141851518516185171851818519185201852118522185231852418525185261852718528185291853018531185321853318534185351853618537185381853918540185411854218543185441854518546185471854818549185501855118552185531855418555185561855718558185591856018561185621856318564185651856618567185681856918570185711857218573185741857518576185771857818579185801858118582185831858418585185861858718588185891859018591185921859318594185951859618597185981859918600186011860218603186041860518606186071860818609186101861118612186131861418615186161861718618186191862018621186221862318624186251862618627186281862918630186311863218633186341863518636186371863818639186401864118642186431864418645186461864718648186491865018651186521865318654186551865618657186581865918660186611866218663186641866518666186671866818669186701867118672186731867418675186761867718678186791868018681186821868318684186851868618687186881868918690186911869218693186941869518696186971869818699187001870118702187031870418705187061870718708187091871018711187121871318714187151871618717187181871918720187211872218723187241872518726187271872818729187301873118732187331873418735187361873718738187391874018741187421874318744187451874618747187481874918750187511875218753187541875518756187571875818759187601876118762187631876418765187661876718768187691877018771187721877318774187751877618777187781877918780187811878218783187841878518786187871878818789187901879118792187931879418795187961879718798187991880018801188021880318804188051880618807188081880918810188111881218813188141881518816188171881818819188201882118822188231882418825188261882718828188291883018831188321883318834188351883618837188381883918840188411884218843188441884518846188471884818849188501885118852188531885418855188561885718858188591886018861188621886318864188651886618867188681886918870188711887218873188741887518876188771887818879188801888118882188831888418885188861888718888188891889018891188921889318894188951889618897188981889918900189011890218903189041890518906189071890818909189101891118912189131891418915189161891718918189191892018921189221892318924189251892618927189281892918930189311893218933189341893518936189371893818939189401894118942189431894418945189461894718948189491895018951189521895318954189551895618957189581895918960189611896218963189641896518966189671896818969189701897118972189731897418975189761897718978189791898018981189821898318984189851898618987189881898918990189911899218993189941899518996189971899818999190001900119002190031900419005190061900719008190091901019011190121901319014190151901619017190181901919020190211902219023190241902519026190271902819029190301903119032190331903419035190361903719038190391904019041190421904319044190451904619047190481904919050190511905219053190541905519056190571905819059190601906119062190631906419065190661906719068190691907019071190721907319074190751907619077190781907919080190811908219083190841908519086190871908819089190901909119092190931909419095190961909719098190991910019101191021910319104191051910619107191081910919110191111911219113191141911519116191171911819119191201912119122191231912419125191261912719128191291913019131191321913319134191351913619137191381913919140191411914219143191441914519146191471914819149191501915119152191531915419155191561915719158191591916019161191621916319164191651916619167191681916919170191711917219173191741917519176191771917819179191801918119182191831918419185191861918719188191891919019191191921919319194191951919619197191981919919200192011920219203192041920519206192071920819209192101921119212192131921419215192161921719218192191922019221192221922319224192251922619227192281922919230192311923219233192341923519236192371923819239192401924119242192431924419245192461924719248192491925019251192521925319254192551925619257192581925919260192611926219263192641926519266192671926819269192701927119272192731927419275192761927719278192791928019281192821928319284192851928619287192881928919290192911929219293192941929519296192971929819299193001930119302193031930419305193061930719308193091931019311193121931319314193151931619317193181931919320193211932219323193241932519326193271932819329193301933119332193331933419335193361933719338193391934019341193421934319344193451934619347193481934919350193511935219353193541935519356193571935819359193601936119362193631936419365193661936719368193691937019371193721937319374193751937619377193781937919380193811938219383193841938519386193871938819389193901939119392193931939419395193961939719398193991940019401194021940319404194051940619407194081940919410194111941219413194141941519416194171941819419194201942119422194231942419425194261942719428194291943019431194321943319434194351943619437194381943919440194411944219443194441944519446194471944819449194501945119452194531945419455194561945719458194591946019461194621946319464194651946619467194681946919470194711947219473194741947519476194771947819479194801948119482194831948419485194861948719488194891949019491194921949319494194951949619497194981949919500195011950219503195041950519506195071950819509195101951119512195131951419515195161951719518195191952019521195221952319524195251952619527195281952919530195311953219533195341953519536195371953819539195401954119542195431954419545195461954719548195491955019551195521955319554195551955619557195581955919560195611956219563195641956519566195671956819569195701957119572195731957419575195761957719578195791958019581195821958319584195851958619587195881958919590195911959219593195941959519596195971959819599196001960119602196031960419605196061960719608196091961019611196121961319614196151961619617196181961919620196211962219623196241962519626196271962819629196301963119632196331963419635196361963719638196391964019641196421964319644196451964619647196481964919650196511965219653196541965519656196571965819659196601966119662196631966419665196661966719668196691967019671196721967319674196751967619677196781967919680196811968219683196841968519686196871968819689196901969119692196931969419695196961969719698196991970019701197021970319704197051970619707197081970919710197111971219713197141971519716197171971819719197201972119722197231972419725197261972719728197291973019731197321973319734197351973619737197381973919740197411974219743197441974519746197471974819749197501975119752197531975419755197561975719758197591976019761197621976319764197651976619767197681976919770197711977219773197741977519776197771977819779197801978119782197831978419785197861978719788197891979019791197921979319794197951979619797197981979919800198011980219803198041980519806198071980819809198101981119812198131981419815198161981719818198191982019821198221982319824198251982619827198281982919830198311983219833198341983519836198371983819839198401984119842198431984419845198461984719848198491985019851198521985319854198551985619857198581985919860198611986219863198641986519866198671986819869198701987119872198731987419875198761987719878198791988019881198821988319884198851988619887198881988919890198911989219893198941989519896198971989819899199001990119902199031990419905199061990719908199091991019911199121991319914199151991619917199181991919920199211992219923199241992519926199271992819929199301993119932199331993419935199361993719938199391994019941199421994319944199451994619947199481994919950199511995219953199541995519956199571995819959199601996119962199631996419965199661996719968199691997019971199721997319974199751997619977199781997919980199811998219983199841998519986199871998819989199901999119992199931999419995199961999719998199992000020001200022000320004200052000620007200082000920010200112001220013200142001520016200172001820019200202002120022200232002420025200262002720028200292003020031200322003320034200352003620037200382003920040200412004220043200442004520046200472004820049200502005120052200532005420055200562005720058200592006020061200622006320064200652006620067200682006920070200712007220073200742007520076200772007820079200802008120082200832008420085200862008720088200892009020091200922009320094200952009620097200982009920100201012010220103201042010520106201072010820109201102011120112201132011420115201162011720118201192012020121201222012320124201252012620127201282012920130201312013220133201342013520136201372013820139201402014120142201432014420145201462014720148201492015020151201522015320154201552015620157201582015920160201612016220163201642016520166201672016820169201702017120172201732017420175201762017720178201792018020181201822018320184201852018620187201882018920190201912019220193201942019520196201972019820199202002020120202202032020420205202062020720208202092021020211202122021320214202152021620217202182021920220202212022220223202242022520226202272022820229202302023120232202332023420235202362023720238202392024020241202422024320244202452024620247202482024920250202512025220253202542025520256202572025820259202602026120262202632026420265202662026720268202692027020271202722027320274202752027620277202782027920280202812028220283202842028520286202872028820289202902029120292202932029420295202962029720298202992030020301203022030320304203052030620307203082030920310203112031220313203142031520316203172031820319203202032120322203232032420325203262032720328203292033020331203322033320334203352033620337203382033920340203412034220343203442034520346203472034820349203502035120352203532035420355203562035720358203592036020361203622036320364203652036620367203682036920370203712037220373203742037520376203772037820379203802038120382203832038420385203862038720388203892039020391203922039320394203952039620397203982039920400204012040220403204042040520406204072040820409204102041120412204132041420415204162041720418204192042020421204222042320424204252042620427204282042920430204312043220433204342043520436204372043820439204402044120442204432044420445204462044720448204492045020451204522045320454204552045620457204582045920460204612046220463204642046520466204672046820469204702047120472204732047420475204762047720478204792048020481204822048320484204852048620487204882048920490204912049220493204942049520496204972049820499205002050120502205032050420505205062050720508205092051020511205122051320514205152051620517205182051920520205212052220523205242052520526205272052820529205302053120532205332053420535205362053720538205392054020541205422054320544205452054620547205482054920550205512055220553205542055520556205572055820559205602056120562205632056420565205662056720568205692057020571205722057320574205752057620577205782057920580205812058220583205842058520586205872058820589205902059120592205932059420595205962059720598205992060020601206022060320604206052060620607206082060920610206112061220613206142061520616206172061820619206202062120622206232062420625206262062720628206292063020631206322063320634206352063620637206382063920640206412064220643206442064520646206472064820649206502065120652206532065420655206562065720658206592066020661206622066320664206652066620667206682066920670206712067220673206742067520676206772067820679206802068120682206832068420685206862068720688206892069020691206922069320694206952069620697206982069920700207012070220703207042070520706207072070820709207102071120712207132071420715207162071720718207192072020721207222072320724207252072620727207282072920730207312073220733207342073520736207372073820739207402074120742207432074420745207462074720748207492075020751207522075320754207552075620757207582075920760207612076220763207642076520766207672076820769207702077120772207732077420775207762077720778207792078020781207822078320784207852078620787207882078920790207912079220793207942079520796207972079820799208002080120802208032080420805208062080720808208092081020811208122081320814208152081620817208182081920820208212082220823208242082520826208272082820829208302083120832208332083420835208362083720838208392084020841208422084320844208452084620847208482084920850208512085220853208542085520856208572085820859208602086120862208632086420865208662086720868208692087020871208722087320874208752087620877208782087920880208812088220883208842088520886208872088820889208902089120892208932089420895208962089720898208992090020901209022090320904209052090620907209082090920910209112091220913209142091520916209172091820919209202092120922209232092420925209262092720928209292093020931209322093320934209352093620937209382093920940209412094220943209442094520946209472094820949209502095120952209532095420955209562095720958209592096020961209622096320964209652096620967209682096920970209712097220973209742097520976209772097820979209802098120982209832098420985209862098720988209892099020991209922099320994209952099620997209982099921000210012100221003210042100521006210072100821009210102101121012210132101421015210162101721018210192102021021210222102321024210252102621027210282102921030210312103221033210342103521036210372103821039210402104121042210432104421045210462104721048210492105021051210522105321054210552105621057210582105921060210612106221063210642106521066210672106821069210702107121072210732107421075210762107721078210792108021081210822108321084210852108621087210882108921090210912109221093210942109521096210972109821099211002110121102211032110421105211062110721108211092111021111211122111321114211152111621117211182111921120211212112221123211242112521126211272112821129211302113121132211332113421135211362113721138211392114021141211422114321144211452114621147211482114921150211512115221153211542115521156211572115821159211602116121162211632116421165211662116721168211692117021171211722117321174211752117621177211782117921180211812118221183211842118521186211872118821189211902119121192211932119421195211962119721198211992120021201212022120321204212052120621207212082120921210212112121221213212142121521216212172121821219212202122121222212232122421225212262122721228212292123021231212322123321234212352123621237212382123921240212412124221243212442124521246212472124821249212502125121252212532125421255212562125721258212592126021261212622126321264212652126621267212682126921270212712127221273212742127521276212772127821279212802128121282212832128421285212862128721288212892129021291212922129321294212952129621297212982129921300213012130221303213042130521306213072130821309213102131121312213132131421315213162131721318213192132021321213222132321324213252132621327213282132921330213312133221333213342133521336213372133821339213402134121342213432134421345213462134721348213492135021351213522135321354213552135621357213582135921360213612136221363213642136521366213672136821369213702137121372213732137421375213762137721378213792138021381213822138321384213852138621387213882138921390213912139221393213942139521396213972139821399214002140121402214032140421405214062140721408214092141021411214122141321414214152141621417214182141921420214212142221423214242142521426214272142821429214302143121432214332143421435214362143721438214392144021441214422144321444214452144621447214482144921450214512145221453214542145521456214572145821459214602146121462214632146421465214662146721468214692147021471214722147321474214752147621477214782147921480214812148221483214842148521486214872148821489214902149121492214932149421495214962149721498214992150021501215022150321504215052150621507215082150921510215112151221513215142151521516215172151821519215202152121522215232152421525215262152721528215292153021531215322153321534215352153621537215382153921540215412154221543215442154521546215472154821549215502155121552215532155421555215562155721558215592156021561215622156321564215652156621567215682156921570215712157221573215742157521576215772157821579215802158121582215832158421585215862158721588215892159021591215922159321594215952159621597215982159921600216012160221603216042160521606216072160821609216102161121612216132161421615216162161721618216192162021621216222162321624216252162621627216282162921630216312163221633216342163521636216372163821639216402164121642216432164421645216462164721648216492165021651216522165321654216552165621657216582165921660216612166221663216642166521666216672166821669216702167121672216732167421675216762167721678216792168021681216822168321684216852168621687216882168921690216912169221693216942169521696216972169821699217002170121702217032170421705217062170721708217092171021711217122171321714217152171621717217182171921720217212172221723217242172521726217272172821729217302173121732217332173421735217362173721738217392174021741217422174321744217452174621747217482174921750217512175221753217542175521756217572175821759217602176121762217632176421765217662176721768217692177021771217722177321774217752177621777217782177921780217812178221783217842178521786217872178821789217902179121792217932179421795217962179721798217992180021801218022180321804218052180621807218082180921810218112181221813218142181521816218172181821819218202182121822218232182421825218262182721828218292183021831218322183321834218352183621837218382183921840218412184221843218442184521846218472184821849218502185121852218532185421855218562185721858218592186021861218622186321864218652186621867218682186921870218712187221873218742187521876218772187821879218802188121882218832188421885218862188721888218892189021891218922189321894218952189621897218982189921900219012190221903219042190521906219072190821909219102191121912219132191421915219162191721918219192192021921219222192321924219252192621927219282192921930219312193221933219342193521936219372193821939219402194121942219432194421945219462194721948219492195021951219522195321954219552195621957219582195921960219612196221963219642196521966219672196821969219702197121972219732197421975219762197721978219792198021981219822198321984219852198621987219882198921990219912199221993219942199521996219972199821999220002200122002220032200422005220062200722008220092201022011220122201322014220152201622017220182201922020220212202222023220242202522026220272202822029220302203122032220332203422035220362203722038220392204022041220422204322044220452204622047220482204922050220512205222053220542205522056220572205822059220602206122062220632206422065220662206722068220692207022071220722207322074220752207622077220782207922080220812208222083220842208522086220872208822089220902209122092220932209422095220962209722098220992210022101221022210322104221052210622107221082210922110221112211222113221142211522116221172211822119221202212122122221232212422125221262212722128221292213022131221322213322134221352213622137221382213922140221412214222143221442214522146221472214822149221502215122152221532215422155221562215722158221592216022161221622216322164221652216622167221682216922170221712217222173221742217522176221772217822179221802218122182221832218422185221862218722188221892219022191221922219322194221952219622197221982219922200222012220222203222042220522206222072220822209222102221122212222132221422215222162221722218222192222022221222222222322224222252222622227222282222922230222312223222233222342223522236222372223822239222402224122242222432224422245222462224722248222492225022251222522225322254222552225622257222582225922260222612226222263222642226522266222672226822269222702227122272222732227422275222762227722278222792228022281222822228322284222852228622287222882228922290222912229222293222942229522296222972229822299223002230122302223032230422305223062230722308223092231022311223122231322314223152231622317223182231922320223212232222323223242232522326223272232822329223302233122332223332233422335223362233722338223392234022341223422234322344223452234622347223482234922350223512235222353223542235522356223572235822359223602236122362223632236422365223662236722368223692237022371223722237322374223752237622377223782237922380223812238222383223842238522386223872238822389223902239122392223932239422395223962239722398223992240022401224022240322404224052240622407224082240922410224112241222413224142241522416224172241822419224202242122422224232242422425224262242722428224292243022431224322243322434224352243622437224382243922440224412244222443224442244522446224472244822449224502245122452224532245422455224562245722458224592246022461224622246322464224652246622467224682246922470224712247222473224742247522476224772247822479224802248122482224832248422485224862248722488224892249022491224922249322494224952249622497224982249922500225012250222503225042250522506225072250822509225102251122512225132251422515225162251722518225192252022521225222252322524225252252622527225282252922530225312253222533225342253522536225372253822539225402254122542225432254422545225462254722548225492255022551225522255322554225552255622557225582255922560225612256222563225642256522566225672256822569225702257122572225732257422575225762257722578225792258022581225822258322584225852258622587225882258922590225912259222593225942259522596225972259822599226002260122602226032260422605226062260722608226092261022611226122261322614226152261622617226182261922620226212262222623226242262522626226272262822629226302263122632226332263422635226362263722638226392264022641226422264322644226452264622647226482264922650226512265222653226542265522656226572265822659226602266122662226632266422665226662266722668226692267022671226722267322674226752267622677226782267922680226812268222683226842268522686226872268822689226902269122692226932269422695226962269722698226992270022701227022270322704227052270622707227082270922710227112271222713227142271522716227172271822719227202272122722227232272422725227262272722728227292273022731227322273322734227352273622737227382273922740227412274222743227442274522746227472274822749227502275122752227532275422755227562275722758227592276022761227622276322764227652276622767227682276922770227712277222773227742277522776227772277822779227802278122782227832278422785227862278722788227892279022791227922279322794227952279622797227982279922800228012280222803228042280522806228072280822809228102281122812228132281422815228162281722818228192282022821228222282322824228252282622827228282282922830228312283222833228342283522836228372283822839228402284122842228432284422845228462284722848228492285022851228522285322854228552285622857228582285922860228612286222863228642286522866228672286822869228702287122872228732287422875228762287722878228792288022881228822288322884228852288622887228882288922890228912289222893228942289522896228972289822899229002290122902229032290422905229062290722908229092291022911229122291322914229152291622917229182291922920229212292222923229242292522926229272292822929229302293122932229332293422935229362293722938229392294022941229422294322944229452294622947229482294922950229512295222953229542295522956229572295822959229602296122962229632296422965229662296722968229692297022971229722297322974229752297622977229782297922980229812298222983229842298522986229872298822989229902299122992229932299422995229962299722998229992300023001230022300323004230052300623007230082300923010230112301223013230142301523016230172301823019230202302123022230232302423025230262302723028230292303023031230322303323034230352303623037230382303923040230412304223043230442304523046230472304823049230502305123052230532305423055230562305723058230592306023061230622306323064230652306623067230682306923070230712307223073230742307523076230772307823079230802308123082230832308423085230862308723088230892309023091230922309323094230952309623097230982309923100231012310223103231042310523106231072310823109231102311123112231132311423115231162311723118231192312023121231222312323124231252312623127231282312923130231312313223133231342313523136231372313823139231402314123142231432314423145231462314723148231492315023151231522315323154231552315623157231582315923160231612316223163231642316523166231672316823169231702317123172231732317423175231762317723178231792318023181231822318323184231852318623187231882318923190231912319223193231942319523196231972319823199232002320123202232032320423205232062320723208232092321023211232122321323214232152321623217232182321923220232212322223223232242322523226232272322823229232302323123232232332323423235232362323723238232392324023241232422324323244232452324623247232482324923250232512325223253232542325523256232572325823259232602326123262232632326423265232662326723268232692327023271232722327323274232752327623277232782327923280232812328223283232842328523286232872328823289232902329123292232932329423295232962329723298232992330023301233022330323304233052330623307233082330923310233112331223313233142331523316233172331823319233202332123322233232332423325233262332723328233292333023331233322333323334233352333623337233382333923340233412334223343233442334523346233472334823349233502335123352233532335423355233562335723358233592336023361233622336323364233652336623367233682336923370233712337223373233742337523376233772337823379233802338123382233832338423385233862338723388233892339023391233922339323394233952339623397233982339923400234012340223403234042340523406234072340823409234102341123412234132341423415234162341723418234192342023421234222342323424234252342623427234282342923430234312343223433234342343523436234372343823439234402344123442234432344423445234462344723448234492345023451234522345323454234552345623457234582345923460234612346223463234642346523466234672346823469234702347123472234732347423475234762347723478234792348023481234822348323484234852348623487234882348923490234912349223493234942349523496234972349823499235002350123502235032350423505235062350723508235092351023511235122351323514235152351623517235182351923520235212352223523235242352523526235272352823529235302353123532235332353423535235362353723538235392354023541235422354323544235452354623547235482354923550235512355223553235542355523556235572355823559235602356123562235632356423565235662356723568235692357023571235722357323574235752357623577235782357923580235812358223583235842358523586235872358823589235902359123592235932359423595235962359723598235992360023601236022360323604236052360623607236082360923610236112361223613236142361523616236172361823619236202362123622236232362423625236262362723628236292363023631236322363323634236352363623637236382363923640236412364223643236442364523646236472364823649236502365123652236532365423655236562365723658236592366023661236622366323664236652366623667236682366923670236712367223673236742367523676236772367823679236802368123682236832368423685236862368723688236892369023691236922369323694236952369623697236982369923700237012370223703237042370523706237072370823709237102371123712237132371423715237162371723718237192372023721237222372323724237252372623727237282372923730237312373223733237342373523736237372373823739237402374123742237432374423745237462374723748237492375023751237522375323754237552375623757237582375923760237612376223763237642376523766237672376823769237702377123772237732377423775237762377723778237792378023781237822378323784237852378623787237882378923790237912379223793237942379523796237972379823799238002380123802238032380423805238062380723808238092381023811238122381323814238152381623817238182381923820238212382223823238242382523826238272382823829238302383123832238332383423835238362383723838238392384023841238422384323844238452384623847238482384923850238512385223853238542385523856238572385823859238602386123862238632386423865238662386723868238692387023871238722387323874238752387623877238782387923880238812388223883238842388523886238872388823889238902389123892238932389423895238962389723898238992390023901239022390323904239052390623907239082390923910239112391223913239142391523916239172391823919239202392123922239232392423925239262392723928239292393023931239322393323934239352393623937239382393923940239412394223943239442394523946239472394823949239502395123952239532395423955239562395723958239592396023961239622396323964239652396623967239682396923970239712397223973239742397523976239772397823979239802398123982239832398423985239862398723988239892399023991239922399323994239952399623997239982399924000240012400224003240042400524006240072400824009240102401124012240132401424015240162401724018240192402024021240222402324024240252402624027240282402924030240312403224033240342403524036240372403824039240402404124042240432404424045240462404724048240492405024051240522405324054240552405624057240582405924060240612406224063240642406524066240672406824069240702407124072240732407424075240762407724078240792408024081240822408324084240852408624087240882408924090240912409224093240942409524096240972409824099241002410124102241032410424105241062410724108241092411024111241122411324114241152411624117241182411924120241212412224123241242412524126241272412824129241302413124132241332413424135241362413724138241392414024141241422414324144241452414624147241482414924150241512415224153241542415524156241572415824159241602416124162241632416424165241662416724168241692417024171241722417324174241752417624177241782417924180241812418224183241842418524186241872418824189241902419124192241932419424195241962419724198241992420024201242022420324204242052420624207242082420924210242112421224213242142421524216242172421824219242202422124222242232422424225242262422724228242292423024231242322423324234242352423624237242382423924240242412424224243242442424524246242472424824249242502425124252242532425424255242562425724258242592426024261242622426324264242652426624267242682426924270242712427224273242742427524276242772427824279
  1. diff -Nur linux-3.18.10.orig/arch/alpha/mm/fault.c linux-3.18.10/arch/alpha/mm/fault.c
  2. --- linux-3.18.10.orig/arch/alpha/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  3. +++ linux-3.18.10/arch/alpha/mm/fault.c 2015-03-26 12:42:13.555582327 +0100
  4. @@ -107,7 +107,7 @@
  5. /* If we're in an interrupt context, or have no user context,
  6. we must not take the fault. */
  7. - if (!mm || in_atomic())
  8. + if (!mm || pagefault_disabled())
  9. goto no_context;
  10. #ifdef CONFIG_ALPHA_LARGE_VMALLOC
  11. diff -Nur linux-3.18.10.orig/arch/arm/include/asm/cmpxchg.h linux-3.18.10/arch/arm/include/asm/cmpxchg.h
  12. --- linux-3.18.10.orig/arch/arm/include/asm/cmpxchg.h 2015-03-24 02:05:12.000000000 +0100
  13. +++ linux-3.18.10/arch/arm/include/asm/cmpxchg.h 2015-03-26 12:42:13.555582327 +0100
  14. @@ -129,6 +129,8 @@
  15. #else /* min ARCH >= ARMv6 */
  16. +#define __HAVE_ARCH_CMPXCHG 1
  17. +
  18. extern void __bad_cmpxchg(volatile void *ptr, int size);
  19. /*
  20. diff -Nur linux-3.18.10.orig/arch/arm/include/asm/futex.h linux-3.18.10/arch/arm/include/asm/futex.h
  21. --- linux-3.18.10.orig/arch/arm/include/asm/futex.h 2015-03-24 02:05:12.000000000 +0100
  22. +++ linux-3.18.10/arch/arm/include/asm/futex.h 2015-03-26 12:42:13.555582327 +0100
  23. @@ -93,6 +93,8 @@
  24. if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
  25. return -EFAULT;
  26. + preempt_disable_rt();
  27. +
  28. __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
  29. "1: " TUSER(ldr) " %1, [%4]\n"
  30. " teq %1, %2\n"
  31. @@ -104,6 +106,8 @@
  32. : "cc", "memory");
  33. *uval = val;
  34. +
  35. + preempt_enable_rt();
  36. return ret;
  37. }
  38. diff -Nur linux-3.18.10.orig/arch/arm/include/asm/switch_to.h linux-3.18.10/arch/arm/include/asm/switch_to.h
  39. --- linux-3.18.10.orig/arch/arm/include/asm/switch_to.h 2015-03-24 02:05:12.000000000 +0100
  40. +++ linux-3.18.10/arch/arm/include/asm/switch_to.h 2015-03-26 12:42:13.555582327 +0100
  41. @@ -3,6 +3,13 @@
  42. #include <linux/thread_info.h>
  43. +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
  44. +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
  45. +#else
  46. +static inline void
  47. +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
  48. +#endif
  49. +
  50. /*
  51. * For v7 SMP cores running a preemptible kernel we may be pre-empted
  52. * during a TLB maintenance operation, so execute an inner-shareable dsb
  53. @@ -22,6 +29,7 @@
  54. #define switch_to(prev,next,last) \
  55. do { \
  56. + switch_kmaps(prev, next); \
  57. last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
  58. } while (0)
  59. diff -Nur linux-3.18.10.orig/arch/arm/include/asm/thread_info.h linux-3.18.10/arch/arm/include/asm/thread_info.h
  60. --- linux-3.18.10.orig/arch/arm/include/asm/thread_info.h 2015-03-24 02:05:12.000000000 +0100
  61. +++ linux-3.18.10/arch/arm/include/asm/thread_info.h 2015-03-26 12:42:13.555582327 +0100
  62. @@ -51,6 +51,7 @@
  63. struct thread_info {
  64. unsigned long flags; /* low level flags */
  65. int preempt_count; /* 0 => preemptable, <0 => bug */
  66. + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
  67. mm_segment_t addr_limit; /* address limit */
  68. struct task_struct *task; /* main task structure */
  69. struct exec_domain *exec_domain; /* execution domain */
  70. @@ -149,6 +150,7 @@
  71. #define TIF_SIGPENDING 0
  72. #define TIF_NEED_RESCHED 1
  73. #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
  74. +#define TIF_NEED_RESCHED_LAZY 3
  75. #define TIF_UPROBE 7
  76. #define TIF_SYSCALL_TRACE 8
  77. #define TIF_SYSCALL_AUDIT 9
  78. @@ -162,6 +164,7 @@
  79. #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
  80. #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
  81. #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
  82. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  83. #define _TIF_UPROBE (1 << TIF_UPROBE)
  84. #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
  85. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  86. diff -Nur linux-3.18.10.orig/arch/arm/Kconfig linux-3.18.10/arch/arm/Kconfig
  87. --- linux-3.18.10.orig/arch/arm/Kconfig 2015-03-24 02:05:12.000000000 +0100
  88. +++ linux-3.18.10/arch/arm/Kconfig 2015-03-26 12:42:13.555582327 +0100
  89. @@ -62,6 +62,7 @@
  90. select HAVE_PERF_EVENTS
  91. select HAVE_PERF_REGS
  92. select HAVE_PERF_USER_STACK_DUMP
  93. + select HAVE_PREEMPT_LAZY
  94. select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
  95. select HAVE_REGS_AND_STACK_ACCESS_API
  96. select HAVE_SYSCALL_TRACEPOINTS
  97. diff -Nur linux-3.18.10.orig/arch/arm/kernel/asm-offsets.c linux-3.18.10/arch/arm/kernel/asm-offsets.c
  98. --- linux-3.18.10.orig/arch/arm/kernel/asm-offsets.c 2015-03-24 02:05:12.000000000 +0100
  99. +++ linux-3.18.10/arch/arm/kernel/asm-offsets.c 2015-03-26 12:42:13.555582327 +0100
  100. @@ -64,6 +64,7 @@
  101. BLANK();
  102. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  103. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  104. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  105. DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
  106. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  107. DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain));
  108. diff -Nur linux-3.18.10.orig/arch/arm/kernel/entry-armv.S linux-3.18.10/arch/arm/kernel/entry-armv.S
  109. --- linux-3.18.10.orig/arch/arm/kernel/entry-armv.S 2015-03-24 02:05:12.000000000 +0100
  110. +++ linux-3.18.10/arch/arm/kernel/entry-armv.S 2015-03-26 12:42:13.555582327 +0100
  111. @@ -207,11 +207,18 @@
  112. #ifdef CONFIG_PREEMPT
  113. get_thread_info tsk
  114. ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
  115. - ldr r0, [tsk, #TI_FLAGS] @ get flags
  116. teq r8, #0 @ if preempt count != 0
  117. + bne 1f @ return from exeption
  118. + ldr r0, [tsk, #TI_FLAGS] @ get flags
  119. + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
  120. + blne svc_preempt @ preempt!
  121. +
  122. + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
  123. + teq r8, #0 @ if preempt lazy count != 0
  124. movne r0, #0 @ force flags to 0
  125. - tst r0, #_TIF_NEED_RESCHED
  126. + tst r0, #_TIF_NEED_RESCHED_LAZY
  127. blne svc_preempt
  128. +1:
  129. #endif
  130. svc_exit r5, irq = 1 @ return from exception
  131. @@ -226,6 +233,8 @@
  132. 1: bl preempt_schedule_irq @ irq en/disable is done inside
  133. ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
  134. tst r0, #_TIF_NEED_RESCHED
  135. + bne 1b
  136. + tst r0, #_TIF_NEED_RESCHED_LAZY
  137. reteq r8 @ go again
  138. b 1b
  139. #endif
  140. diff -Nur linux-3.18.10.orig/arch/arm/kernel/process.c linux-3.18.10/arch/arm/kernel/process.c
  141. --- linux-3.18.10.orig/arch/arm/kernel/process.c 2015-03-24 02:05:12.000000000 +0100
  142. +++ linux-3.18.10/arch/arm/kernel/process.c 2015-03-26 12:42:13.555582327 +0100
  143. @@ -431,6 +431,30 @@
  144. }
  145. #ifdef CONFIG_MMU
  146. +/*
  147. + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not
  148. + * initialized by pgtable_page_ctor() then a coredump of the vector page will
  149. + * fail.
  150. + */
  151. +static int __init vectors_user_mapping_init_page(void)
  152. +{
  153. + struct page *page;
  154. + unsigned long addr = 0xffff0000;
  155. + pgd_t *pgd;
  156. + pud_t *pud;
  157. + pmd_t *pmd;
  158. +
  159. + pgd = pgd_offset_k(addr);
  160. + pud = pud_offset(pgd, addr);
  161. + pmd = pmd_offset(pud, addr);
  162. + page = pmd_page(*(pmd));
  163. +
  164. + pgtable_page_ctor(page);
  165. +
  166. + return 0;
  167. +}
  168. +late_initcall(vectors_user_mapping_init_page);
  169. +
  170. #ifdef CONFIG_KUSER_HELPERS
  171. /*
  172. * The vectors page is always readable from user space for the
  173. diff -Nur linux-3.18.10.orig/arch/arm/kernel/signal.c linux-3.18.10/arch/arm/kernel/signal.c
  174. --- linux-3.18.10.orig/arch/arm/kernel/signal.c 2015-03-24 02:05:12.000000000 +0100
  175. +++ linux-3.18.10/arch/arm/kernel/signal.c 2015-03-26 12:42:13.555582327 +0100
  176. @@ -574,7 +574,8 @@
  177. do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
  178. {
  179. do {
  180. - if (likely(thread_flags & _TIF_NEED_RESCHED)) {
  181. + if (likely(thread_flags & (_TIF_NEED_RESCHED |
  182. + _TIF_NEED_RESCHED_LAZY))) {
  183. schedule();
  184. } else {
  185. if (unlikely(!user_mode(regs)))
  186. diff -Nur linux-3.18.10.orig/arch/arm/kernel/smp.c linux-3.18.10/arch/arm/kernel/smp.c
  187. --- linux-3.18.10.orig/arch/arm/kernel/smp.c 2015-03-24 02:05:12.000000000 +0100
  188. +++ linux-3.18.10/arch/arm/kernel/smp.c 2015-03-26 12:42:13.555582327 +0100
  189. @@ -506,12 +506,14 @@
  190. }
  191. #ifdef CONFIG_IRQ_WORK
  192. +#ifndef CONFIG_PREEMPT_RT_FULL
  193. void arch_irq_work_raise(void)
  194. {
  195. if (arch_irq_work_has_interrupt())
  196. smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
  197. }
  198. #endif
  199. +#endif
  200. #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
  201. void tick_broadcast(const struct cpumask *mask)
  202. diff -Nur linux-3.18.10.orig/arch/arm/kernel/unwind.c linux-3.18.10/arch/arm/kernel/unwind.c
  203. --- linux-3.18.10.orig/arch/arm/kernel/unwind.c 2015-03-24 02:05:12.000000000 +0100
  204. +++ linux-3.18.10/arch/arm/kernel/unwind.c 2015-03-26 12:42:13.555582327 +0100
  205. @@ -93,7 +93,7 @@
  206. static const struct unwind_idx *__origin_unwind_idx;
  207. extern const struct unwind_idx __stop_unwind_idx[];
  208. -static DEFINE_SPINLOCK(unwind_lock);
  209. +static DEFINE_RAW_SPINLOCK(unwind_lock);
  210. static LIST_HEAD(unwind_tables);
  211. /* Convert a prel31 symbol to an absolute address */
  212. @@ -201,7 +201,7 @@
  213. /* module unwind tables */
  214. struct unwind_table *table;
  215. - spin_lock_irqsave(&unwind_lock, flags);
  216. + raw_spin_lock_irqsave(&unwind_lock, flags);
  217. list_for_each_entry(table, &unwind_tables, list) {
  218. if (addr >= table->begin_addr &&
  219. addr < table->end_addr) {
  220. @@ -213,7 +213,7 @@
  221. break;
  222. }
  223. }
  224. - spin_unlock_irqrestore(&unwind_lock, flags);
  225. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  226. }
  227. pr_debug("%s: idx = %p\n", __func__, idx);
  228. @@ -530,9 +530,9 @@
  229. tab->begin_addr = text_addr;
  230. tab->end_addr = text_addr + text_size;
  231. - spin_lock_irqsave(&unwind_lock, flags);
  232. + raw_spin_lock_irqsave(&unwind_lock, flags);
  233. list_add_tail(&tab->list, &unwind_tables);
  234. - spin_unlock_irqrestore(&unwind_lock, flags);
  235. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  236. return tab;
  237. }
  238. @@ -544,9 +544,9 @@
  239. if (!tab)
  240. return;
  241. - spin_lock_irqsave(&unwind_lock, flags);
  242. + raw_spin_lock_irqsave(&unwind_lock, flags);
  243. list_del(&tab->list);
  244. - spin_unlock_irqrestore(&unwind_lock, flags);
  245. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  246. kfree(tab);
  247. }
  248. diff -Nur linux-3.18.10.orig/arch/arm/mach-at91/at91rm9200_time.c linux-3.18.10/arch/arm/mach-at91/at91rm9200_time.c
  249. --- linux-3.18.10.orig/arch/arm/mach-at91/at91rm9200_time.c 2015-03-24 02:05:12.000000000 +0100
  250. +++ linux-3.18.10/arch/arm/mach-at91/at91rm9200_time.c 2015-03-26 12:42:13.555582327 +0100
  251. @@ -135,6 +135,7 @@
  252. break;
  253. case CLOCK_EVT_MODE_SHUTDOWN:
  254. case CLOCK_EVT_MODE_UNUSED:
  255. + remove_irq(NR_IRQS_LEGACY + AT91_ID_SYS, &at91rm9200_timer_irq);
  256. case CLOCK_EVT_MODE_RESUME:
  257. irqmask = 0;
  258. break;
  259. diff -Nur linux-3.18.10.orig/arch/arm/mach-exynos/platsmp.c linux-3.18.10/arch/arm/mach-exynos/platsmp.c
  260. --- linux-3.18.10.orig/arch/arm/mach-exynos/platsmp.c 2015-03-24 02:05:12.000000000 +0100
  261. +++ linux-3.18.10/arch/arm/mach-exynos/platsmp.c 2015-03-26 12:42:13.555582327 +0100
  262. @@ -137,7 +137,7 @@
  263. return (void __iomem *)(S5P_VA_SCU);
  264. }
  265. -static DEFINE_SPINLOCK(boot_lock);
  266. +static DEFINE_RAW_SPINLOCK(boot_lock);
  267. static void exynos_secondary_init(unsigned int cpu)
  268. {
  269. @@ -150,8 +150,8 @@
  270. /*
  271. * Synchronise with the boot thread.
  272. */
  273. - spin_lock(&boot_lock);
  274. - spin_unlock(&boot_lock);
  275. + raw_spin_lock(&boot_lock);
  276. + raw_spin_unlock(&boot_lock);
  277. }
  278. static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
  279. @@ -165,7 +165,7 @@
  280. * Set synchronisation state between this boot processor
  281. * and the secondary one
  282. */
  283. - spin_lock(&boot_lock);
  284. + raw_spin_lock(&boot_lock);
  285. /*
  286. * The secondary processor is waiting to be released from
  287. @@ -192,7 +192,7 @@
  288. if (timeout == 0) {
  289. printk(KERN_ERR "cpu1 power enable failed");
  290. - spin_unlock(&boot_lock);
  291. + raw_spin_unlock(&boot_lock);
  292. return -ETIMEDOUT;
  293. }
  294. }
  295. @@ -242,7 +242,7 @@
  296. * calibrations, then wait for it to finish
  297. */
  298. fail:
  299. - spin_unlock(&boot_lock);
  300. + raw_spin_unlock(&boot_lock);
  301. return pen_release != -1 ? ret : 0;
  302. }
  303. diff -Nur linux-3.18.10.orig/arch/arm/mach-hisi/platmcpm.c linux-3.18.10/arch/arm/mach-hisi/platmcpm.c
  304. --- linux-3.18.10.orig/arch/arm/mach-hisi/platmcpm.c 2015-03-24 02:05:12.000000000 +0100
  305. +++ linux-3.18.10/arch/arm/mach-hisi/platmcpm.c 2015-03-26 12:42:13.555582327 +0100
  306. @@ -57,7 +57,7 @@
  307. static void __iomem *sysctrl, *fabric;
  308. static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
  309. -static DEFINE_SPINLOCK(boot_lock);
  310. +static DEFINE_RAW_SPINLOCK(boot_lock);
  311. static u32 fabric_phys_addr;
  312. /*
  313. * [0]: bootwrapper physical address
  314. @@ -104,7 +104,7 @@
  315. if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
  316. return -EINVAL;
  317. - spin_lock_irq(&boot_lock);
  318. + raw_spin_lock_irq(&boot_lock);
  319. if (hip04_cpu_table[cluster][cpu])
  320. goto out;
  321. @@ -133,7 +133,7 @@
  322. udelay(20);
  323. out:
  324. hip04_cpu_table[cluster][cpu]++;
  325. - spin_unlock_irq(&boot_lock);
  326. + raw_spin_unlock_irq(&boot_lock);
  327. return 0;
  328. }
  329. @@ -149,7 +149,7 @@
  330. __mcpm_cpu_going_down(cpu, cluster);
  331. - spin_lock(&boot_lock);
  332. + raw_spin_lock(&boot_lock);
  333. BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
  334. hip04_cpu_table[cluster][cpu]--;
  335. if (hip04_cpu_table[cluster][cpu] == 1) {
  336. @@ -162,7 +162,7 @@
  337. last_man = hip04_cluster_is_down(cluster);
  338. if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
  339. - spin_unlock(&boot_lock);
  340. + raw_spin_unlock(&boot_lock);
  341. /* Since it's Cortex A15, disable L2 prefetching. */
  342. asm volatile(
  343. "mcr p15, 1, %0, c15, c0, 3 \n\t"
  344. @@ -173,7 +173,7 @@
  345. hip04_set_snoop_filter(cluster, 0);
  346. __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
  347. } else {
  348. - spin_unlock(&boot_lock);
  349. + raw_spin_unlock(&boot_lock);
  350. v7_exit_coherency_flush(louis);
  351. }
  352. @@ -192,7 +192,7 @@
  353. cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
  354. count = TIMEOUT_MSEC / POLL_MSEC;
  355. - spin_lock_irq(&boot_lock);
  356. + raw_spin_lock_irq(&boot_lock);
  357. for (tries = 0; tries < count; tries++) {
  358. if (hip04_cpu_table[cluster][cpu]) {
  359. ret = -EBUSY;
  360. @@ -202,10 +202,10 @@
  361. data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
  362. if (data & CORE_WFI_STATUS(cpu))
  363. break;
  364. - spin_unlock_irq(&boot_lock);
  365. + raw_spin_unlock_irq(&boot_lock);
  366. /* Wait for clean L2 when the whole cluster is down. */
  367. msleep(POLL_MSEC);
  368. - spin_lock_irq(&boot_lock);
  369. + raw_spin_lock_irq(&boot_lock);
  370. }
  371. if (tries >= count)
  372. goto err;
  373. @@ -220,10 +220,10 @@
  374. }
  375. if (tries >= count)
  376. goto err;
  377. - spin_unlock_irq(&boot_lock);
  378. + raw_spin_unlock_irq(&boot_lock);
  379. return 0;
  380. err:
  381. - spin_unlock_irq(&boot_lock);
  382. + raw_spin_unlock_irq(&boot_lock);
  383. return ret;
  384. }
  385. @@ -235,10 +235,10 @@
  386. cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
  387. cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
  388. - spin_lock(&boot_lock);
  389. + raw_spin_lock(&boot_lock);
  390. if (!hip04_cpu_table[cluster][cpu])
  391. hip04_cpu_table[cluster][cpu] = 1;
  392. - spin_unlock(&boot_lock);
  393. + raw_spin_unlock(&boot_lock);
  394. }
  395. static void __naked hip04_mcpm_power_up_setup(unsigned int affinity_level)
  396. diff -Nur linux-3.18.10.orig/arch/arm/mach-omap2/omap-smp.c linux-3.18.10/arch/arm/mach-omap2/omap-smp.c
  397. --- linux-3.18.10.orig/arch/arm/mach-omap2/omap-smp.c 2015-03-24 02:05:12.000000000 +0100
  398. +++ linux-3.18.10/arch/arm/mach-omap2/omap-smp.c 2015-03-26 12:42:13.555582327 +0100
  399. @@ -43,7 +43,7 @@
  400. /* SCU base address */
  401. static void __iomem *scu_base;
  402. -static DEFINE_SPINLOCK(boot_lock);
  403. +static DEFINE_RAW_SPINLOCK(boot_lock);
  404. void __iomem *omap4_get_scu_base(void)
  405. {
  406. @@ -74,8 +74,8 @@
  407. /*
  408. * Synchronise with the boot thread.
  409. */
  410. - spin_lock(&boot_lock);
  411. - spin_unlock(&boot_lock);
  412. + raw_spin_lock(&boot_lock);
  413. + raw_spin_unlock(&boot_lock);
  414. }
  415. static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
  416. @@ -89,7 +89,7 @@
  417. * Set synchronisation state between this boot processor
  418. * and the secondary one
  419. */
  420. - spin_lock(&boot_lock);
  421. + raw_spin_lock(&boot_lock);
  422. /*
  423. * Update the AuxCoreBoot0 with boot state for secondary core.
  424. @@ -166,7 +166,7 @@
  425. * Now the secondary core is starting up let it run its
  426. * calibrations, then wait for it to finish
  427. */
  428. - spin_unlock(&boot_lock);
  429. + raw_spin_unlock(&boot_lock);
  430. return 0;
  431. }
  432. diff -Nur linux-3.18.10.orig/arch/arm/mach-prima2/platsmp.c linux-3.18.10/arch/arm/mach-prima2/platsmp.c
  433. --- linux-3.18.10.orig/arch/arm/mach-prima2/platsmp.c 2015-03-24 02:05:12.000000000 +0100
  434. +++ linux-3.18.10/arch/arm/mach-prima2/platsmp.c 2015-03-26 12:42:13.555582327 +0100
  435. @@ -23,7 +23,7 @@
  436. static void __iomem *scu_base;
  437. static void __iomem *rsc_base;
  438. -static DEFINE_SPINLOCK(boot_lock);
  439. +static DEFINE_RAW_SPINLOCK(boot_lock);
  440. static struct map_desc scu_io_desc __initdata = {
  441. .length = SZ_4K,
  442. @@ -56,8 +56,8 @@
  443. /*
  444. * Synchronise with the boot thread.
  445. */
  446. - spin_lock(&boot_lock);
  447. - spin_unlock(&boot_lock);
  448. + raw_spin_lock(&boot_lock);
  449. + raw_spin_unlock(&boot_lock);
  450. }
  451. static struct of_device_id rsc_ids[] = {
  452. @@ -95,7 +95,7 @@
  453. /* make sure write buffer is drained */
  454. mb();
  455. - spin_lock(&boot_lock);
  456. + raw_spin_lock(&boot_lock);
  457. /*
  458. * The secondary processor is waiting to be released from
  459. @@ -127,7 +127,7 @@
  460. * now the secondary core is starting up let it run its
  461. * calibrations, then wait for it to finish
  462. */
  463. - spin_unlock(&boot_lock);
  464. + raw_spin_unlock(&boot_lock);
  465. return pen_release != -1 ? -ENOSYS : 0;
  466. }
  467. diff -Nur linux-3.18.10.orig/arch/arm/mach-qcom/platsmp.c linux-3.18.10/arch/arm/mach-qcom/platsmp.c
  468. --- linux-3.18.10.orig/arch/arm/mach-qcom/platsmp.c 2015-03-24 02:05:12.000000000 +0100
  469. +++ linux-3.18.10/arch/arm/mach-qcom/platsmp.c 2015-03-26 12:42:13.555582327 +0100
  470. @@ -46,7 +46,7 @@
  471. extern void secondary_startup(void);
  472. -static DEFINE_SPINLOCK(boot_lock);
  473. +static DEFINE_RAW_SPINLOCK(boot_lock);
  474. #ifdef CONFIG_HOTPLUG_CPU
  475. static void __ref qcom_cpu_die(unsigned int cpu)
  476. @@ -60,8 +60,8 @@
  477. /*
  478. * Synchronise with the boot thread.
  479. */
  480. - spin_lock(&boot_lock);
  481. - spin_unlock(&boot_lock);
  482. + raw_spin_lock(&boot_lock);
  483. + raw_spin_unlock(&boot_lock);
  484. }
  485. static int scss_release_secondary(unsigned int cpu)
  486. @@ -284,7 +284,7 @@
  487. * set synchronisation state between this boot processor
  488. * and the secondary one
  489. */
  490. - spin_lock(&boot_lock);
  491. + raw_spin_lock(&boot_lock);
  492. /*
  493. * Send the secondary CPU a soft interrupt, thereby causing
  494. @@ -297,7 +297,7 @@
  495. * now the secondary core is starting up let it run its
  496. * calibrations, then wait for it to finish
  497. */
  498. - spin_unlock(&boot_lock);
  499. + raw_spin_unlock(&boot_lock);
  500. return ret;
  501. }
  502. diff -Nur linux-3.18.10.orig/arch/arm/mach-spear/platsmp.c linux-3.18.10/arch/arm/mach-spear/platsmp.c
  503. --- linux-3.18.10.orig/arch/arm/mach-spear/platsmp.c 2015-03-24 02:05:12.000000000 +0100
  504. +++ linux-3.18.10/arch/arm/mach-spear/platsmp.c 2015-03-26 12:42:13.555582327 +0100
  505. @@ -32,7 +32,7 @@
  506. sync_cache_w(&pen_release);
  507. }
  508. -static DEFINE_SPINLOCK(boot_lock);
  509. +static DEFINE_RAW_SPINLOCK(boot_lock);
  510. static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
  511. @@ -47,8 +47,8 @@
  512. /*
  513. * Synchronise with the boot thread.
  514. */
  515. - spin_lock(&boot_lock);
  516. - spin_unlock(&boot_lock);
  517. + raw_spin_lock(&boot_lock);
  518. + raw_spin_unlock(&boot_lock);
  519. }
  520. static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
  521. @@ -59,7 +59,7 @@
  522. * set synchronisation state between this boot processor
  523. * and the secondary one
  524. */
  525. - spin_lock(&boot_lock);
  526. + raw_spin_lock(&boot_lock);
  527. /*
  528. * The secondary processor is waiting to be released from
  529. @@ -84,7 +84,7 @@
  530. * now the secondary core is starting up let it run its
  531. * calibrations, then wait for it to finish
  532. */
  533. - spin_unlock(&boot_lock);
  534. + raw_spin_unlock(&boot_lock);
  535. return pen_release != -1 ? -ENOSYS : 0;
  536. }
  537. diff -Nur linux-3.18.10.orig/arch/arm/mach-sti/platsmp.c linux-3.18.10/arch/arm/mach-sti/platsmp.c
  538. --- linux-3.18.10.orig/arch/arm/mach-sti/platsmp.c 2015-03-24 02:05:12.000000000 +0100
  539. +++ linux-3.18.10/arch/arm/mach-sti/platsmp.c 2015-03-26 12:42:13.555582327 +0100
  540. @@ -34,7 +34,7 @@
  541. sync_cache_w(&pen_release);
  542. }
  543. -static DEFINE_SPINLOCK(boot_lock);
  544. +static DEFINE_RAW_SPINLOCK(boot_lock);
  545. static void sti_secondary_init(unsigned int cpu)
  546. {
  547. @@ -49,8 +49,8 @@
  548. /*
  549. * Synchronise with the boot thread.
  550. */
  551. - spin_lock(&boot_lock);
  552. - spin_unlock(&boot_lock);
  553. + raw_spin_lock(&boot_lock);
  554. + raw_spin_unlock(&boot_lock);
  555. }
  556. static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
  557. @@ -61,7 +61,7 @@
  558. * set synchronisation state between this boot processor
  559. * and the secondary one
  560. */
  561. - spin_lock(&boot_lock);
  562. + raw_spin_lock(&boot_lock);
  563. /*
  564. * The secondary processor is waiting to be released from
  565. @@ -92,7 +92,7 @@
  566. * now the secondary core is starting up let it run its
  567. * calibrations, then wait for it to finish
  568. */
  569. - spin_unlock(&boot_lock);
  570. + raw_spin_unlock(&boot_lock);
  571. return pen_release != -1 ? -ENOSYS : 0;
  572. }
  573. diff -Nur linux-3.18.10.orig/arch/arm/mach-ux500/platsmp.c linux-3.18.10/arch/arm/mach-ux500/platsmp.c
  574. --- linux-3.18.10.orig/arch/arm/mach-ux500/platsmp.c 2015-03-24 02:05:12.000000000 +0100
  575. +++ linux-3.18.10/arch/arm/mach-ux500/platsmp.c 2015-03-26 12:42:13.555582327 +0100
  576. @@ -51,7 +51,7 @@
  577. return NULL;
  578. }
  579. -static DEFINE_SPINLOCK(boot_lock);
  580. +static DEFINE_RAW_SPINLOCK(boot_lock);
  581. static void ux500_secondary_init(unsigned int cpu)
  582. {
  583. @@ -64,8 +64,8 @@
  584. /*
  585. * Synchronise with the boot thread.
  586. */
  587. - spin_lock(&boot_lock);
  588. - spin_unlock(&boot_lock);
  589. + raw_spin_lock(&boot_lock);
  590. + raw_spin_unlock(&boot_lock);
  591. }
  592. static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle)
  593. @@ -76,7 +76,7 @@
  594. * set synchronisation state between this boot processor
  595. * and the secondary one
  596. */
  597. - spin_lock(&boot_lock);
  598. + raw_spin_lock(&boot_lock);
  599. /*
  600. * The secondary processor is waiting to be released from
  601. @@ -97,7 +97,7 @@
  602. * now the secondary core is starting up let it run its
  603. * calibrations, then wait for it to finish
  604. */
  605. - spin_unlock(&boot_lock);
  606. + raw_spin_unlock(&boot_lock);
  607. return pen_release != -1 ? -ENOSYS : 0;
  608. }
  609. diff -Nur linux-3.18.10.orig/arch/arm/mm/fault.c linux-3.18.10/arch/arm/mm/fault.c
  610. --- linux-3.18.10.orig/arch/arm/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  611. +++ linux-3.18.10/arch/arm/mm/fault.c 2015-03-26 12:42:13.555582327 +0100
  612. @@ -277,7 +277,7 @@
  613. * If we're in an interrupt or have no user
  614. * context, we must not take the fault..
  615. */
  616. - if (in_atomic() || !mm)
  617. + if (!mm || pagefault_disabled())
  618. goto no_context;
  619. if (user_mode(regs))
  620. @@ -431,6 +431,9 @@
  621. if (addr < TASK_SIZE)
  622. return do_page_fault(addr, fsr, regs);
  623. + if (interrupts_enabled(regs))
  624. + local_irq_enable();
  625. +
  626. if (user_mode(regs))
  627. goto bad_area;
  628. @@ -498,6 +501,9 @@
  629. static int
  630. do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
  631. {
  632. + if (interrupts_enabled(regs))
  633. + local_irq_enable();
  634. +
  635. do_bad_area(addr, fsr, regs);
  636. return 0;
  637. }
  638. diff -Nur linux-3.18.10.orig/arch/arm/mm/highmem.c linux-3.18.10/arch/arm/mm/highmem.c
  639. --- linux-3.18.10.orig/arch/arm/mm/highmem.c 2015-03-24 02:05:12.000000000 +0100
  640. +++ linux-3.18.10/arch/arm/mm/highmem.c 2015-03-26 12:42:13.555582327 +0100
  641. @@ -53,6 +53,7 @@
  642. void *kmap_atomic(struct page *page)
  643. {
  644. + pte_t pte = mk_pte(page, kmap_prot);
  645. unsigned int idx;
  646. unsigned long vaddr;
  647. void *kmap;
  648. @@ -91,7 +92,10 @@
  649. * in place, so the contained TLB flush ensures the TLB is updated
  650. * with the new mapping.
  651. */
  652. - set_fixmap_pte(idx, mk_pte(page, kmap_prot));
  653. +#ifdef CONFIG_PREEMPT_RT_FULL
  654. + current->kmap_pte[type] = pte;
  655. +#endif
  656. + set_fixmap_pte(idx, pte);
  657. return (void *)vaddr;
  658. }
  659. @@ -108,12 +112,15 @@
  660. if (cache_is_vivt())
  661. __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
  662. +#ifdef CONFIG_PREEMPT_RT_FULL
  663. + current->kmap_pte[type] = __pte(0);
  664. +#endif
  665. #ifdef CONFIG_DEBUG_HIGHMEM
  666. BUG_ON(vaddr != __fix_to_virt(idx));
  667. - set_fixmap_pte(idx, __pte(0));
  668. #else
  669. (void) idx; /* to kill a warning */
  670. #endif
  671. + set_fixmap_pte(idx, __pte(0));
  672. kmap_atomic_idx_pop();
  673. } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
  674. /* this address was obtained through kmap_high_get() */
  675. @@ -125,6 +132,7 @@
  676. void *kmap_atomic_pfn(unsigned long pfn)
  677. {
  678. + pte_t pte = pfn_pte(pfn, kmap_prot);
  679. unsigned long vaddr;
  680. int idx, type;
  681. struct page *page = pfn_to_page(pfn);
  682. @@ -139,7 +147,10 @@
  683. #ifdef CONFIG_DEBUG_HIGHMEM
  684. BUG_ON(!pte_none(*(fixmap_page_table + idx)));
  685. #endif
  686. - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
  687. +#ifdef CONFIG_PREEMPT_RT_FULL
  688. + current->kmap_pte[type] = pte;
  689. +#endif
  690. + set_fixmap_pte(idx, pte);
  691. return (void *)vaddr;
  692. }
  693. @@ -153,3 +164,28 @@
  694. return pte_page(get_fixmap_pte(vaddr));
  695. }
  696. +
  697. +#if defined CONFIG_PREEMPT_RT_FULL
  698. +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
  699. +{
  700. + int i;
  701. +
  702. + /*
  703. + * Clear @prev's kmap_atomic mappings
  704. + */
  705. + for (i = 0; i < prev_p->kmap_idx; i++) {
  706. + int idx = i + KM_TYPE_NR * smp_processor_id();
  707. +
  708. + set_fixmap_pte(idx, __pte(0));
  709. + }
  710. + /*
  711. + * Restore @next_p's kmap_atomic mappings
  712. + */
  713. + for (i = 0; i < next_p->kmap_idx; i++) {
  714. + int idx = i + KM_TYPE_NR * smp_processor_id();
  715. +
  716. + if (!pte_none(next_p->kmap_pte[i]))
  717. + set_fixmap_pte(idx, next_p->kmap_pte[i]);
  718. + }
  719. +}
  720. +#endif
  721. diff -Nur linux-3.18.10.orig/arch/arm/plat-versatile/platsmp.c linux-3.18.10/arch/arm/plat-versatile/platsmp.c
  722. --- linux-3.18.10.orig/arch/arm/plat-versatile/platsmp.c 2015-03-24 02:05:12.000000000 +0100
  723. +++ linux-3.18.10/arch/arm/plat-versatile/platsmp.c 2015-03-26 12:42:13.555582327 +0100
  724. @@ -30,7 +30,7 @@
  725. sync_cache_w(&pen_release);
  726. }
  727. -static DEFINE_SPINLOCK(boot_lock);
  728. +static DEFINE_RAW_SPINLOCK(boot_lock);
  729. void versatile_secondary_init(unsigned int cpu)
  730. {
  731. @@ -43,8 +43,8 @@
  732. /*
  733. * Synchronise with the boot thread.
  734. */
  735. - spin_lock(&boot_lock);
  736. - spin_unlock(&boot_lock);
  737. + raw_spin_lock(&boot_lock);
  738. + raw_spin_unlock(&boot_lock);
  739. }
  740. int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
  741. @@ -55,7 +55,7 @@
  742. * Set synchronisation state between this boot processor
  743. * and the secondary one
  744. */
  745. - spin_lock(&boot_lock);
  746. + raw_spin_lock(&boot_lock);
  747. /*
  748. * This is really belt and braces; we hold unintended secondary
  749. @@ -85,7 +85,7 @@
  750. * now the secondary core is starting up let it run its
  751. * calibrations, then wait for it to finish
  752. */
  753. - spin_unlock(&boot_lock);
  754. + raw_spin_unlock(&boot_lock);
  755. return pen_release != -1 ? -ENOSYS : 0;
  756. }
  757. diff -Nur linux-3.18.10.orig/arch/arm64/kernel/smp.c linux-3.18.10/arch/arm64/kernel/smp.c
  758. --- linux-3.18.10.orig/arch/arm64/kernel/smp.c 2015-03-24 02:05:12.000000000 +0100
  759. +++ linux-3.18.10/arch/arm64/kernel/smp.c 2015-03-26 12:42:13.559582331 +0100
  760. @@ -529,12 +529,14 @@
  761. }
  762. #ifdef CONFIG_IRQ_WORK
  763. +#ifndef CONFIG_PREEMPT_RT_FULL
  764. void arch_irq_work_raise(void)
  765. {
  766. if (__smp_cross_call)
  767. smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
  768. }
  769. #endif
  770. +#endif
  771. static DEFINE_RAW_SPINLOCK(stop_lock);
  772. diff -Nur linux-3.18.10.orig/arch/avr32/mm/fault.c linux-3.18.10/arch/avr32/mm/fault.c
  773. --- linux-3.18.10.orig/arch/avr32/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  774. +++ linux-3.18.10/arch/avr32/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  775. @@ -81,7 +81,7 @@
  776. * If we're in an interrupt or have no user context, we must
  777. * not take the fault...
  778. */
  779. - if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
  780. + if (!mm || regs->sr & SYSREG_BIT(GM) || pagefault_disabled())
  781. goto no_context;
  782. local_irq_enable();
  783. diff -Nur linux-3.18.10.orig/arch/cris/mm/fault.c linux-3.18.10/arch/cris/mm/fault.c
  784. --- linux-3.18.10.orig/arch/cris/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  785. +++ linux-3.18.10/arch/cris/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  786. @@ -113,7 +113,7 @@
  787. * user context, we must not take the fault.
  788. */
  789. - if (in_atomic() || !mm)
  790. + if (!mm || pagefault_disabled())
  791. goto no_context;
  792. if (user_mode(regs))
  793. diff -Nur linux-3.18.10.orig/arch/frv/mm/fault.c linux-3.18.10/arch/frv/mm/fault.c
  794. --- linux-3.18.10.orig/arch/frv/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  795. +++ linux-3.18.10/arch/frv/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  796. @@ -78,7 +78,7 @@
  797. * If we're in an interrupt or have no user
  798. * context, we must not take the fault..
  799. */
  800. - if (in_atomic() || !mm)
  801. + if (!mm || pagefault_disabled())
  802. goto no_context;
  803. if (user_mode(__frame))
  804. diff -Nur linux-3.18.10.orig/arch/ia64/mm/fault.c linux-3.18.10/arch/ia64/mm/fault.c
  805. --- linux-3.18.10.orig/arch/ia64/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  806. +++ linux-3.18.10/arch/ia64/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  807. @@ -96,7 +96,7 @@
  808. /*
  809. * If we're in an interrupt or have no user context, we must not take the fault..
  810. */
  811. - if (in_atomic() || !mm)
  812. + if (!mm || pagefault_disabled())
  813. goto no_context;
  814. #ifdef CONFIG_VIRTUAL_MEM_MAP
  815. diff -Nur linux-3.18.10.orig/arch/Kconfig linux-3.18.10/arch/Kconfig
  816. --- linux-3.18.10.orig/arch/Kconfig 2015-03-24 02:05:12.000000000 +0100
  817. +++ linux-3.18.10/arch/Kconfig 2015-03-26 12:42:13.555582327 +0100
  818. @@ -6,6 +6,7 @@
  819. tristate "OProfile system profiling"
  820. depends on PROFILING
  821. depends on HAVE_OPROFILE
  822. + depends on !PREEMPT_RT_FULL
  823. select RING_BUFFER
  824. select RING_BUFFER_ALLOW_SWAP
  825. help
  826. diff -Nur linux-3.18.10.orig/arch/m32r/mm/fault.c linux-3.18.10/arch/m32r/mm/fault.c
  827. --- linux-3.18.10.orig/arch/m32r/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  828. +++ linux-3.18.10/arch/m32r/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  829. @@ -114,7 +114,7 @@
  830. * If we're in an interrupt or have no user context or are running in an
  831. * atomic region then we must not take the fault..
  832. */
  833. - if (in_atomic() || !mm)
  834. + if (!mm || pagefault_disabled())
  835. goto bad_area_nosemaphore;
  836. if (error_code & ACE_USERMODE)
  837. diff -Nur linux-3.18.10.orig/arch/m68k/mm/fault.c linux-3.18.10/arch/m68k/mm/fault.c
  838. --- linux-3.18.10.orig/arch/m68k/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  839. +++ linux-3.18.10/arch/m68k/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  840. @@ -81,7 +81,7 @@
  841. * If we're in an interrupt or have no user
  842. * context, we must not take the fault..
  843. */
  844. - if (in_atomic() || !mm)
  845. + if (!mm || pagefault_disabled())
  846. goto no_context;
  847. if (user_mode(regs))
  848. diff -Nur linux-3.18.10.orig/arch/microblaze/mm/fault.c linux-3.18.10/arch/microblaze/mm/fault.c
  849. --- linux-3.18.10.orig/arch/microblaze/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  850. +++ linux-3.18.10/arch/microblaze/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  851. @@ -107,7 +107,7 @@
  852. if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
  853. is_write = 0;
  854. - if (unlikely(in_atomic() || !mm)) {
  855. + if (unlikely(!mm || pagefault_disabled())) {
  856. if (kernel_mode(regs))
  857. goto bad_area_nosemaphore;
  858. diff -Nur linux-3.18.10.orig/arch/mips/Kconfig linux-3.18.10/arch/mips/Kconfig
  859. --- linux-3.18.10.orig/arch/mips/Kconfig 2015-03-24 02:05:12.000000000 +0100
  860. +++ linux-3.18.10/arch/mips/Kconfig 2015-03-26 12:42:13.559582331 +0100
  861. @@ -2196,7 +2196,7 @@
  862. #
  863. config HIGHMEM
  864. bool "High Memory Support"
  865. - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA
  866. + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL
  867. config CPU_SUPPORTS_HIGHMEM
  868. bool
  869. diff -Nur linux-3.18.10.orig/arch/mips/kernel/signal.c linux-3.18.10/arch/mips/kernel/signal.c
  870. --- linux-3.18.10.orig/arch/mips/kernel/signal.c 2015-03-24 02:05:12.000000000 +0100
  871. +++ linux-3.18.10/arch/mips/kernel/signal.c 2015-03-26 12:42:13.559582331 +0100
  872. @@ -613,6 +613,7 @@
  873. __u32 thread_info_flags)
  874. {
  875. local_irq_enable();
  876. + preempt_check_resched();
  877. user_exit();
  878. diff -Nur linux-3.18.10.orig/arch/mips/mm/fault.c linux-3.18.10/arch/mips/mm/fault.c
  879. --- linux-3.18.10.orig/arch/mips/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  880. +++ linux-3.18.10/arch/mips/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  881. @@ -89,7 +89,7 @@
  882. * If we're in an interrupt or have no user
  883. * context, we must not take the fault..
  884. */
  885. - if (in_atomic() || !mm)
  886. + if (!mm || pagefault_disabled())
  887. goto bad_area_nosemaphore;
  888. if (user_mode(regs))
  889. diff -Nur linux-3.18.10.orig/arch/mips/mm/init.c linux-3.18.10/arch/mips/mm/init.c
  890. --- linux-3.18.10.orig/arch/mips/mm/init.c 2015-03-24 02:05:12.000000000 +0100
  891. +++ linux-3.18.10/arch/mips/mm/init.c 2015-03-26 12:42:13.559582331 +0100
  892. @@ -90,7 +90,7 @@
  893. BUG_ON(Page_dcache_dirty(page));
  894. - pagefault_disable();
  895. + raw_pagefault_disable();
  896. idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
  897. idx += in_interrupt() ? FIX_N_COLOURS : 0;
  898. vaddr = __fix_to_virt(FIX_CMAP_END - idx);
  899. @@ -146,7 +146,7 @@
  900. tlbw_use_hazard();
  901. write_c0_entryhi(old_ctx);
  902. local_irq_restore(flags);
  903. - pagefault_enable();
  904. + raw_pagefault_enable();
  905. }
  906. void copy_user_highpage(struct page *to, struct page *from,
  907. diff -Nur linux-3.18.10.orig/arch/mn10300/mm/fault.c linux-3.18.10/arch/mn10300/mm/fault.c
  908. --- linux-3.18.10.orig/arch/mn10300/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  909. +++ linux-3.18.10/arch/mn10300/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  910. @@ -168,7 +168,7 @@
  911. * If we're in an interrupt or have no user
  912. * context, we must not take the fault..
  913. */
  914. - if (in_atomic() || !mm)
  915. + if (!mm || pagefault_disabled())
  916. goto no_context;
  917. if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
  918. diff -Nur linux-3.18.10.orig/arch/parisc/mm/fault.c linux-3.18.10/arch/parisc/mm/fault.c
  919. --- linux-3.18.10.orig/arch/parisc/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  920. +++ linux-3.18.10/arch/parisc/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  921. @@ -207,7 +207,7 @@
  922. int fault;
  923. unsigned int flags;
  924. - if (in_atomic())
  925. + if (pagefault_disabled())
  926. goto no_context;
  927. tsk = current;
  928. diff -Nur linux-3.18.10.orig/arch/powerpc/include/asm/thread_info.h linux-3.18.10/arch/powerpc/include/asm/thread_info.h
  929. --- linux-3.18.10.orig/arch/powerpc/include/asm/thread_info.h 2015-03-24 02:05:12.000000000 +0100
  930. +++ linux-3.18.10/arch/powerpc/include/asm/thread_info.h 2015-03-26 12:42:13.559582331 +0100
  931. @@ -43,6 +43,8 @@
  932. int cpu; /* cpu we're on */
  933. int preempt_count; /* 0 => preemptable,
  934. <0 => BUG */
  935. + int preempt_lazy_count; /* 0 => preemptable,
  936. + <0 => BUG */
  937. struct restart_block restart_block;
  938. unsigned long local_flags; /* private flags for thread */
  939. @@ -88,8 +90,7 @@
  940. #define TIF_SYSCALL_TRACE 0 /* syscall trace active */
  941. #define TIF_SIGPENDING 1 /* signal pending */
  942. #define TIF_NEED_RESCHED 2 /* rescheduling necessary */
  943. -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
  944. - TIF_NEED_RESCHED */
  945. +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */
  946. #define TIF_32BIT 4 /* 32 bit binary */
  947. #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
  948. #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
  949. @@ -107,6 +108,8 @@
  950. #if defined(CONFIG_PPC64)
  951. #define TIF_ELF2ABI 18 /* function descriptors must die! */
  952. #endif
  953. +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling
  954. + TIF_NEED_RESCHED */
  955. /* as above, but as bit values */
  956. #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
  957. @@ -125,14 +128,16 @@
  958. #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
  959. #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
  960. #define _TIF_NOHZ (1<<TIF_NOHZ)
  961. +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
  962. #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
  963. _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
  964. _TIF_NOHZ)
  965. #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
  966. _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  967. - _TIF_RESTORE_TM)
  968. + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY)
  969. #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
  970. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  971. /* Bits in local_flags */
  972. /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
  973. diff -Nur linux-3.18.10.orig/arch/powerpc/Kconfig linux-3.18.10/arch/powerpc/Kconfig
  974. --- linux-3.18.10.orig/arch/powerpc/Kconfig 2015-03-24 02:05:12.000000000 +0100
  975. +++ linux-3.18.10/arch/powerpc/Kconfig 2015-03-26 12:42:13.559582331 +0100
  976. @@ -60,10 +60,11 @@
  977. config RWSEM_GENERIC_SPINLOCK
  978. bool
  979. + default y if PREEMPT_RT_FULL
  980. config RWSEM_XCHGADD_ALGORITHM
  981. bool
  982. - default y
  983. + default y if !PREEMPT_RT_FULL
  984. config GENERIC_LOCKBREAK
  985. bool
  986. @@ -136,6 +137,7 @@
  987. select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
  988. select GENERIC_STRNCPY_FROM_USER
  989. select GENERIC_STRNLEN_USER
  990. + select HAVE_PREEMPT_LAZY
  991. select HAVE_MOD_ARCH_SPECIFIC
  992. select MODULES_USE_ELF_RELA
  993. select CLONE_BACKWARDS
  994. @@ -303,7 +305,7 @@
  995. config HIGHMEM
  996. bool "High memory support"
  997. - depends on PPC32
  998. + depends on PPC32 && !PREEMPT_RT_FULL
  999. source kernel/Kconfig.hz
  1000. source kernel/Kconfig.preempt
  1001. diff -Nur linux-3.18.10.orig/arch/powerpc/kernel/asm-offsets.c linux-3.18.10/arch/powerpc/kernel/asm-offsets.c
  1002. --- linux-3.18.10.orig/arch/powerpc/kernel/asm-offsets.c 2015-03-24 02:05:12.000000000 +0100
  1003. +++ linux-3.18.10/arch/powerpc/kernel/asm-offsets.c 2015-03-26 12:42:13.559582331 +0100
  1004. @@ -159,6 +159,7 @@
  1005. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  1006. DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
  1007. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  1008. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  1009. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  1010. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  1011. diff -Nur linux-3.18.10.orig/arch/powerpc/kernel/entry_32.S linux-3.18.10/arch/powerpc/kernel/entry_32.S
  1012. --- linux-3.18.10.orig/arch/powerpc/kernel/entry_32.S 2015-03-24 02:05:12.000000000 +0100
  1013. +++ linux-3.18.10/arch/powerpc/kernel/entry_32.S 2015-03-26 12:42:13.559582331 +0100
  1014. @@ -890,7 +890,14 @@
  1015. cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
  1016. bne restore
  1017. andi. r8,r8,_TIF_NEED_RESCHED
  1018. + bne+ 1f
  1019. + lwz r0,TI_PREEMPT_LAZY(r9)
  1020. + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
  1021. + bne restore
  1022. + lwz r0,TI_FLAGS(r9)
  1023. + andi. r0,r0,_TIF_NEED_RESCHED_LAZY
  1024. beq+ restore
  1025. +1:
  1026. lwz r3,_MSR(r1)
  1027. andi. r0,r3,MSR_EE /* interrupts off? */
  1028. beq restore /* don't schedule if so */
  1029. @@ -901,11 +908,11 @@
  1030. */
  1031. bl trace_hardirqs_off
  1032. #endif
  1033. -1: bl preempt_schedule_irq
  1034. +2: bl preempt_schedule_irq
  1035. CURRENT_THREAD_INFO(r9, r1)
  1036. lwz r3,TI_FLAGS(r9)
  1037. - andi. r0,r3,_TIF_NEED_RESCHED
  1038. - bne- 1b
  1039. + andi. r0,r3,_TIF_NEED_RESCHED_MASK
  1040. + bne- 2b
  1041. #ifdef CONFIG_TRACE_IRQFLAGS
  1042. /* And now, to properly rebalance the above, we tell lockdep they
  1043. * are being turned back on, which will happen when we return
  1044. @@ -1226,7 +1233,7 @@
  1045. #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
  1046. do_work: /* r10 contains MSR_KERNEL here */
  1047. - andi. r0,r9,_TIF_NEED_RESCHED
  1048. + andi. r0,r9,_TIF_NEED_RESCHED_MASK
  1049. beq do_user_signal
  1050. do_resched: /* r10 contains MSR_KERNEL here */
  1051. @@ -1247,7 +1254,7 @@
  1052. MTMSRD(r10) /* disable interrupts */
  1053. CURRENT_THREAD_INFO(r9, r1)
  1054. lwz r9,TI_FLAGS(r9)
  1055. - andi. r0,r9,_TIF_NEED_RESCHED
  1056. + andi. r0,r9,_TIF_NEED_RESCHED_MASK
  1057. bne- do_resched
  1058. andi. r0,r9,_TIF_USER_WORK_MASK
  1059. beq restore_user
  1060. diff -Nur linux-3.18.10.orig/arch/powerpc/kernel/entry_64.S linux-3.18.10/arch/powerpc/kernel/entry_64.S
  1061. --- linux-3.18.10.orig/arch/powerpc/kernel/entry_64.S 2015-03-24 02:05:12.000000000 +0100
  1062. +++ linux-3.18.10/arch/powerpc/kernel/entry_64.S 2015-03-26 12:42:13.559582331 +0100
  1063. @@ -644,7 +644,7 @@
  1064. #else
  1065. beq restore
  1066. #endif
  1067. -1: andi. r0,r4,_TIF_NEED_RESCHED
  1068. +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK
  1069. beq 2f
  1070. bl restore_interrupts
  1071. SCHEDULE_USER
  1072. @@ -706,10 +706,18 @@
  1073. #ifdef CONFIG_PREEMPT
  1074. /* Check if we need to preempt */
  1075. + lwz r8,TI_PREEMPT(r9)
  1076. + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */
  1077. + bne restore
  1078. andi. r0,r4,_TIF_NEED_RESCHED
  1079. + bne+ check_count
  1080. +
  1081. + andi. r0,r4,_TIF_NEED_RESCHED_LAZY
  1082. beq+ restore
  1083. + lwz r8,TI_PREEMPT_LAZY(r9)
  1084. +
  1085. /* Check that preempt_count() == 0 and interrupts are enabled */
  1086. - lwz r8,TI_PREEMPT(r9)
  1087. +check_count:
  1088. cmpwi cr1,r8,0
  1089. ld r0,SOFTE(r1)
  1090. cmpdi r0,0
  1091. @@ -726,7 +734,7 @@
  1092. /* Re-test flags and eventually loop */
  1093. CURRENT_THREAD_INFO(r9, r1)
  1094. ld r4,TI_FLAGS(r9)
  1095. - andi. r0,r4,_TIF_NEED_RESCHED
  1096. + andi. r0,r4,_TIF_NEED_RESCHED_MASK
  1097. bne 1b
  1098. /*
  1099. diff -Nur linux-3.18.10.orig/arch/powerpc/kernel/irq.c linux-3.18.10/arch/powerpc/kernel/irq.c
  1100. --- linux-3.18.10.orig/arch/powerpc/kernel/irq.c 2015-03-24 02:05:12.000000000 +0100
  1101. +++ linux-3.18.10/arch/powerpc/kernel/irq.c 2015-03-26 12:42:13.559582331 +0100
  1102. @@ -615,6 +615,7 @@
  1103. }
  1104. }
  1105. +#ifndef CONFIG_PREEMPT_RT_FULL
  1106. void do_softirq_own_stack(void)
  1107. {
  1108. struct thread_info *curtp, *irqtp;
  1109. @@ -632,6 +633,7 @@
  1110. if (irqtp->flags)
  1111. set_bits(irqtp->flags, &curtp->flags);
  1112. }
  1113. +#endif
  1114. irq_hw_number_t virq_to_hw(unsigned int virq)
  1115. {
  1116. diff -Nur linux-3.18.10.orig/arch/powerpc/kernel/misc_32.S linux-3.18.10/arch/powerpc/kernel/misc_32.S
  1117. --- linux-3.18.10.orig/arch/powerpc/kernel/misc_32.S 2015-03-24 02:05:12.000000000 +0100
  1118. +++ linux-3.18.10/arch/powerpc/kernel/misc_32.S 2015-03-26 12:42:13.559582331 +0100
  1119. @@ -40,6 +40,7 @@
  1120. * We store the saved ksp_limit in the unused part
  1121. * of the STACK_FRAME_OVERHEAD
  1122. */
  1123. +#ifndef CONFIG_PREEMPT_RT_FULL
  1124. _GLOBAL(call_do_softirq)
  1125. mflr r0
  1126. stw r0,4(r1)
  1127. @@ -56,6 +57,7 @@
  1128. stw r10,THREAD+KSP_LIMIT(r2)
  1129. mtlr r0
  1130. blr
  1131. +#endif
  1132. /*
  1133. * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
  1134. diff -Nur linux-3.18.10.orig/arch/powerpc/kernel/misc_64.S linux-3.18.10/arch/powerpc/kernel/misc_64.S
  1135. --- linux-3.18.10.orig/arch/powerpc/kernel/misc_64.S 2015-03-24 02:05:12.000000000 +0100
  1136. +++ linux-3.18.10/arch/powerpc/kernel/misc_64.S 2015-03-26 12:42:13.559582331 +0100
  1137. @@ -29,6 +29,7 @@
  1138. .text
  1139. +#ifndef CONFIG_PREEMPT_RT_FULL
  1140. _GLOBAL(call_do_softirq)
  1141. mflr r0
  1142. std r0,16(r1)
  1143. @@ -39,6 +40,7 @@
  1144. ld r0,16(r1)
  1145. mtlr r0
  1146. blr
  1147. +#endif
  1148. _GLOBAL(call_do_irq)
  1149. mflr r0
  1150. diff -Nur linux-3.18.10.orig/arch/powerpc/kernel/time.c linux-3.18.10/arch/powerpc/kernel/time.c
  1151. --- linux-3.18.10.orig/arch/powerpc/kernel/time.c 2015-03-24 02:05:12.000000000 +0100
  1152. +++ linux-3.18.10/arch/powerpc/kernel/time.c 2015-03-26 12:42:13.559582331 +0100
  1153. @@ -424,7 +424,7 @@
  1154. EXPORT_SYMBOL(profile_pc);
  1155. #endif
  1156. -#ifdef CONFIG_IRQ_WORK
  1157. +#if defined(CONFIG_IRQ_WORK) && !defined(CONFIG_PREEMPT_RT_FULL)
  1158. /*
  1159. * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
  1160. diff -Nur linux-3.18.10.orig/arch/powerpc/mm/fault.c linux-3.18.10/arch/powerpc/mm/fault.c
  1161. --- linux-3.18.10.orig/arch/powerpc/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  1162. +++ linux-3.18.10/arch/powerpc/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  1163. @@ -273,7 +273,7 @@
  1164. if (!arch_irq_disabled_regs(regs))
  1165. local_irq_enable();
  1166. - if (in_atomic() || mm == NULL) {
  1167. + if (in_atomic() || mm == NULL || pagefault_disabled()) {
  1168. if (!user_mode(regs)) {
  1169. rc = SIGSEGV;
  1170. goto bail;
  1171. diff -Nur linux-3.18.10.orig/arch/s390/mm/fault.c linux-3.18.10/arch/s390/mm/fault.c
  1172. --- linux-3.18.10.orig/arch/s390/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  1173. +++ linux-3.18.10/arch/s390/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  1174. @@ -435,7 +435,8 @@
  1175. * user context.
  1176. */
  1177. fault = VM_FAULT_BADCONTEXT;
  1178. - if (unlikely(!user_space_fault(regs) || in_atomic() || !mm))
  1179. + if (unlikely(!user_space_fault(regs) || !mm ||
  1180. + tsk->pagefault_disabled))
  1181. goto out;
  1182. address = trans_exc_code & __FAIL_ADDR_MASK;
  1183. diff -Nur linux-3.18.10.orig/arch/score/mm/fault.c linux-3.18.10/arch/score/mm/fault.c
  1184. --- linux-3.18.10.orig/arch/score/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  1185. +++ linux-3.18.10/arch/score/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  1186. @@ -73,7 +73,7 @@
  1187. * If we're in an interrupt or have no user
  1188. * context, we must not take the fault..
  1189. */
  1190. - if (in_atomic() || !mm)
  1191. + if (!mm || pagefault_disabled())
  1192. goto bad_area_nosemaphore;
  1193. if (user_mode(regs))
  1194. diff -Nur linux-3.18.10.orig/arch/sh/kernel/irq.c linux-3.18.10/arch/sh/kernel/irq.c
  1195. --- linux-3.18.10.orig/arch/sh/kernel/irq.c 2015-03-24 02:05:12.000000000 +0100
  1196. +++ linux-3.18.10/arch/sh/kernel/irq.c 2015-03-26 12:42:13.559582331 +0100
  1197. @@ -149,6 +149,7 @@
  1198. hardirq_ctx[cpu] = NULL;
  1199. }
  1200. +#ifndef CONFIG_PREEMPT_RT_FULL
  1201. void do_softirq_own_stack(void)
  1202. {
  1203. struct thread_info *curctx;
  1204. @@ -176,6 +177,7 @@
  1205. "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
  1206. );
  1207. }
  1208. +#endif
  1209. #else
  1210. static inline void handle_one_irq(unsigned int irq)
  1211. {
  1212. diff -Nur linux-3.18.10.orig/arch/sh/mm/fault.c linux-3.18.10/arch/sh/mm/fault.c
  1213. --- linux-3.18.10.orig/arch/sh/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  1214. +++ linux-3.18.10/arch/sh/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  1215. @@ -440,7 +440,7 @@
  1216. * If we're in an interrupt, have no user context or are running
  1217. * in an atomic region then we must not take the fault:
  1218. */
  1219. - if (unlikely(in_atomic() || !mm)) {
  1220. + if (unlikely(!mm || pagefault_disabled())) {
  1221. bad_area_nosemaphore(regs, error_code, address);
  1222. return;
  1223. }
  1224. diff -Nur linux-3.18.10.orig/arch/sparc/Kconfig linux-3.18.10/arch/sparc/Kconfig
  1225. --- linux-3.18.10.orig/arch/sparc/Kconfig 2015-03-24 02:05:12.000000000 +0100
  1226. +++ linux-3.18.10/arch/sparc/Kconfig 2015-03-26 12:42:13.559582331 +0100
  1227. @@ -182,12 +182,10 @@
  1228. source kernel/Kconfig.hz
  1229. config RWSEM_GENERIC_SPINLOCK
  1230. - bool
  1231. - default y if SPARC32
  1232. + def_bool PREEMPT_RT_FULL
  1233. config RWSEM_XCHGADD_ALGORITHM
  1234. - bool
  1235. - default y if SPARC64
  1236. + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
  1237. config GENERIC_HWEIGHT
  1238. bool
  1239. @@ -528,6 +526,10 @@
  1240. source "fs/Kconfig.binfmt"
  1241. +config EARLY_PRINTK
  1242. + bool
  1243. + default y
  1244. +
  1245. config COMPAT
  1246. bool
  1247. depends on SPARC64
  1248. diff -Nur linux-3.18.10.orig/arch/sparc/kernel/irq_64.c linux-3.18.10/arch/sparc/kernel/irq_64.c
  1249. --- linux-3.18.10.orig/arch/sparc/kernel/irq_64.c 2015-03-24 02:05:12.000000000 +0100
  1250. +++ linux-3.18.10/arch/sparc/kernel/irq_64.c 2015-03-26 12:42:13.559582331 +0100
  1251. @@ -849,6 +849,7 @@
  1252. set_irq_regs(old_regs);
  1253. }
  1254. +#ifndef CONFIG_PREEMPT_RT_FULL
  1255. void do_softirq_own_stack(void)
  1256. {
  1257. void *orig_sp, *sp = softirq_stack[smp_processor_id()];
  1258. @@ -863,6 +864,7 @@
  1259. __asm__ __volatile__("mov %0, %%sp"
  1260. : : "r" (orig_sp));
  1261. }
  1262. +#endif
  1263. #ifdef CONFIG_HOTPLUG_CPU
  1264. void fixup_irqs(void)
  1265. diff -Nur linux-3.18.10.orig/arch/sparc/kernel/pcr.c linux-3.18.10/arch/sparc/kernel/pcr.c
  1266. --- linux-3.18.10.orig/arch/sparc/kernel/pcr.c 2015-03-24 02:05:12.000000000 +0100
  1267. +++ linux-3.18.10/arch/sparc/kernel/pcr.c 2015-03-26 12:42:13.559582331 +0100
  1268. @@ -43,10 +43,12 @@
  1269. set_irq_regs(old_regs);
  1270. }
  1271. +#ifndef CONFIG_PREEMPT_RT_FULL
  1272. void arch_irq_work_raise(void)
  1273. {
  1274. set_softint(1 << PIL_DEFERRED_PCR_WORK);
  1275. }
  1276. +#endif
  1277. const struct pcr_ops *pcr_ops;
  1278. EXPORT_SYMBOL_GPL(pcr_ops);
  1279. diff -Nur linux-3.18.10.orig/arch/sparc/kernel/setup_32.c linux-3.18.10/arch/sparc/kernel/setup_32.c
  1280. --- linux-3.18.10.orig/arch/sparc/kernel/setup_32.c 2015-03-24 02:05:12.000000000 +0100
  1281. +++ linux-3.18.10/arch/sparc/kernel/setup_32.c 2015-03-26 12:42:13.559582331 +0100
  1282. @@ -309,6 +309,7 @@
  1283. boot_flags_init(*cmdline_p);
  1284. + early_console = &prom_early_console;
  1285. register_console(&prom_early_console);
  1286. printk("ARCH: ");
  1287. diff -Nur linux-3.18.10.orig/arch/sparc/kernel/setup_64.c linux-3.18.10/arch/sparc/kernel/setup_64.c
  1288. --- linux-3.18.10.orig/arch/sparc/kernel/setup_64.c 2015-03-24 02:05:12.000000000 +0100
  1289. +++ linux-3.18.10/arch/sparc/kernel/setup_64.c 2015-03-26 12:42:13.559582331 +0100
  1290. @@ -563,6 +563,12 @@
  1291. pause_patch();
  1292. }
  1293. +static inline void register_prom_console(void)
  1294. +{
  1295. + early_console = &prom_early_console;
  1296. + register_console(&prom_early_console);
  1297. +}
  1298. +
  1299. void __init setup_arch(char **cmdline_p)
  1300. {
  1301. /* Initialize PROM console and command line. */
  1302. @@ -574,7 +580,7 @@
  1303. #ifdef CONFIG_EARLYFB
  1304. if (btext_find_display())
  1305. #endif
  1306. - register_console(&prom_early_console);
  1307. + register_prom_console();
  1308. if (tlb_type == hypervisor)
  1309. printk("ARCH: SUN4V\n");
  1310. diff -Nur linux-3.18.10.orig/arch/sparc/mm/fault_32.c linux-3.18.10/arch/sparc/mm/fault_32.c
  1311. --- linux-3.18.10.orig/arch/sparc/mm/fault_32.c 2015-03-24 02:05:12.000000000 +0100
  1312. +++ linux-3.18.10/arch/sparc/mm/fault_32.c 2015-03-26 12:42:13.559582331 +0100
  1313. @@ -196,7 +196,7 @@
  1314. * If we're in an interrupt or have no user
  1315. * context, we must not take the fault..
  1316. */
  1317. - if (in_atomic() || !mm)
  1318. + if (!mm || pagefault_disabled())
  1319. goto no_context;
  1320. perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
  1321. diff -Nur linux-3.18.10.orig/arch/sparc/mm/fault_64.c linux-3.18.10/arch/sparc/mm/fault_64.c
  1322. --- linux-3.18.10.orig/arch/sparc/mm/fault_64.c 2015-03-24 02:05:12.000000000 +0100
  1323. +++ linux-3.18.10/arch/sparc/mm/fault_64.c 2015-03-26 12:42:13.559582331 +0100
  1324. @@ -330,7 +330,7 @@
  1325. * If we're in an interrupt or have no user
  1326. * context, we must not take the fault..
  1327. */
  1328. - if (in_atomic() || !mm)
  1329. + if (!mm || pagefault_disabled())
  1330. goto intr_or_no_mm;
  1331. perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
  1332. diff -Nur linux-3.18.10.orig/arch/tile/mm/fault.c linux-3.18.10/arch/tile/mm/fault.c
  1333. --- linux-3.18.10.orig/arch/tile/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  1334. +++ linux-3.18.10/arch/tile/mm/fault.c 2015-03-26 12:42:13.559582331 +0100
  1335. @@ -357,7 +357,7 @@
  1336. * If we're in an interrupt, have no user context or are running in an
  1337. * atomic region then we must not take the fault.
  1338. */
  1339. - if (in_atomic() || !mm) {
  1340. + if (!mm || pagefault_disabled()) {
  1341. vma = NULL; /* happy compiler */
  1342. goto bad_area_nosemaphore;
  1343. }
  1344. diff -Nur linux-3.18.10.orig/arch/um/kernel/trap.c linux-3.18.10/arch/um/kernel/trap.c
  1345. --- linux-3.18.10.orig/arch/um/kernel/trap.c 2015-03-24 02:05:12.000000000 +0100
  1346. +++ linux-3.18.10/arch/um/kernel/trap.c 2015-03-26 12:42:13.559582331 +0100
  1347. @@ -38,7 +38,7 @@
  1348. * If the fault was during atomic operation, don't take the fault, just
  1349. * fail.
  1350. */
  1351. - if (in_atomic())
  1352. + if (pagefault_disabled())
  1353. goto out_nosemaphore;
  1354. if (is_user)
  1355. diff -Nur linux-3.18.10.orig/arch/x86/crypto/aesni-intel_glue.c linux-3.18.10/arch/x86/crypto/aesni-intel_glue.c
  1356. --- linux-3.18.10.orig/arch/x86/crypto/aesni-intel_glue.c 2015-03-24 02:05:12.000000000 +0100
  1357. +++ linux-3.18.10/arch/x86/crypto/aesni-intel_glue.c 2015-03-26 12:42:13.559582331 +0100
  1358. @@ -381,14 +381,14 @@
  1359. err = blkcipher_walk_virt(desc, &walk);
  1360. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1361. - kernel_fpu_begin();
  1362. while ((nbytes = walk.nbytes)) {
  1363. + kernel_fpu_begin();
  1364. aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1365. - nbytes & AES_BLOCK_MASK);
  1366. + nbytes & AES_BLOCK_MASK);
  1367. + kernel_fpu_end();
  1368. nbytes &= AES_BLOCK_SIZE - 1;
  1369. err = blkcipher_walk_done(desc, &walk, nbytes);
  1370. }
  1371. - kernel_fpu_end();
  1372. return err;
  1373. }
  1374. @@ -405,14 +405,14 @@
  1375. err = blkcipher_walk_virt(desc, &walk);
  1376. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1377. - kernel_fpu_begin();
  1378. while ((nbytes = walk.nbytes)) {
  1379. + kernel_fpu_begin();
  1380. aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1381. nbytes & AES_BLOCK_MASK);
  1382. + kernel_fpu_end();
  1383. nbytes &= AES_BLOCK_SIZE - 1;
  1384. err = blkcipher_walk_done(desc, &walk, nbytes);
  1385. }
  1386. - kernel_fpu_end();
  1387. return err;
  1388. }
  1389. @@ -429,14 +429,14 @@
  1390. err = blkcipher_walk_virt(desc, &walk);
  1391. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1392. - kernel_fpu_begin();
  1393. while ((nbytes = walk.nbytes)) {
  1394. + kernel_fpu_begin();
  1395. aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1396. nbytes & AES_BLOCK_MASK, walk.iv);
  1397. + kernel_fpu_end();
  1398. nbytes &= AES_BLOCK_SIZE - 1;
  1399. err = blkcipher_walk_done(desc, &walk, nbytes);
  1400. }
  1401. - kernel_fpu_end();
  1402. return err;
  1403. }
  1404. @@ -453,14 +453,14 @@
  1405. err = blkcipher_walk_virt(desc, &walk);
  1406. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1407. - kernel_fpu_begin();
  1408. while ((nbytes = walk.nbytes)) {
  1409. + kernel_fpu_begin();
  1410. aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1411. nbytes & AES_BLOCK_MASK, walk.iv);
  1412. + kernel_fpu_end();
  1413. nbytes &= AES_BLOCK_SIZE - 1;
  1414. err = blkcipher_walk_done(desc, &walk, nbytes);
  1415. }
  1416. - kernel_fpu_end();
  1417. return err;
  1418. }
  1419. @@ -512,18 +512,20 @@
  1420. err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
  1421. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1422. - kernel_fpu_begin();
  1423. while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
  1424. + kernel_fpu_begin();
  1425. aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1426. nbytes & AES_BLOCK_MASK, walk.iv);
  1427. + kernel_fpu_end();
  1428. nbytes &= AES_BLOCK_SIZE - 1;
  1429. err = blkcipher_walk_done(desc, &walk, nbytes);
  1430. }
  1431. if (walk.nbytes) {
  1432. + kernel_fpu_begin();
  1433. ctr_crypt_final(ctx, &walk);
  1434. + kernel_fpu_end();
  1435. err = blkcipher_walk_done(desc, &walk, 0);
  1436. }
  1437. - kernel_fpu_end();
  1438. return err;
  1439. }
  1440. diff -Nur linux-3.18.10.orig/arch/x86/crypto/cast5_avx_glue.c linux-3.18.10/arch/x86/crypto/cast5_avx_glue.c
  1441. --- linux-3.18.10.orig/arch/x86/crypto/cast5_avx_glue.c 2015-03-24 02:05:12.000000000 +0100
  1442. +++ linux-3.18.10/arch/x86/crypto/cast5_avx_glue.c 2015-03-26 12:42:13.559582331 +0100
  1443. @@ -60,7 +60,7 @@
  1444. static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
  1445. bool enc)
  1446. {
  1447. - bool fpu_enabled = false;
  1448. + bool fpu_enabled;
  1449. struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  1450. const unsigned int bsize = CAST5_BLOCK_SIZE;
  1451. unsigned int nbytes;
  1452. @@ -76,7 +76,7 @@
  1453. u8 *wsrc = walk->src.virt.addr;
  1454. u8 *wdst = walk->dst.virt.addr;
  1455. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1456. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1457. /* Process multi-block batch */
  1458. if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
  1459. @@ -104,10 +104,9 @@
  1460. } while (nbytes >= bsize);
  1461. done:
  1462. + cast5_fpu_end(fpu_enabled);
  1463. err = blkcipher_walk_done(desc, walk, nbytes);
  1464. }
  1465. -
  1466. - cast5_fpu_end(fpu_enabled);
  1467. return err;
  1468. }
  1469. @@ -228,7 +227,7 @@
  1470. static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  1471. struct scatterlist *src, unsigned int nbytes)
  1472. {
  1473. - bool fpu_enabled = false;
  1474. + bool fpu_enabled;
  1475. struct blkcipher_walk walk;
  1476. int err;
  1477. @@ -237,12 +236,11 @@
  1478. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1479. while ((nbytes = walk.nbytes)) {
  1480. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1481. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1482. nbytes = __cbc_decrypt(desc, &walk);
  1483. + cast5_fpu_end(fpu_enabled);
  1484. err = blkcipher_walk_done(desc, &walk, nbytes);
  1485. }
  1486. -
  1487. - cast5_fpu_end(fpu_enabled);
  1488. return err;
  1489. }
  1490. @@ -312,7 +310,7 @@
  1491. static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  1492. struct scatterlist *src, unsigned int nbytes)
  1493. {
  1494. - bool fpu_enabled = false;
  1495. + bool fpu_enabled;
  1496. struct blkcipher_walk walk;
  1497. int err;
  1498. @@ -321,13 +319,12 @@
  1499. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1500. while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
  1501. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1502. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1503. nbytes = __ctr_crypt(desc, &walk);
  1504. + cast5_fpu_end(fpu_enabled);
  1505. err = blkcipher_walk_done(desc, &walk, nbytes);
  1506. }
  1507. - cast5_fpu_end(fpu_enabled);
  1508. -
  1509. if (walk.nbytes) {
  1510. ctr_crypt_final(desc, &walk);
  1511. err = blkcipher_walk_done(desc, &walk, 0);
  1512. diff -Nur linux-3.18.10.orig/arch/x86/crypto/glue_helper.c linux-3.18.10/arch/x86/crypto/glue_helper.c
  1513. --- linux-3.18.10.orig/arch/x86/crypto/glue_helper.c 2015-03-24 02:05:12.000000000 +0100
  1514. +++ linux-3.18.10/arch/x86/crypto/glue_helper.c 2015-03-26 12:42:13.559582331 +0100
  1515. @@ -39,7 +39,7 @@
  1516. void *ctx = crypto_blkcipher_ctx(desc->tfm);
  1517. const unsigned int bsize = 128 / 8;
  1518. unsigned int nbytes, i, func_bytes;
  1519. - bool fpu_enabled = false;
  1520. + bool fpu_enabled;
  1521. int err;
  1522. err = blkcipher_walk_virt(desc, walk);
  1523. @@ -49,7 +49,7 @@
  1524. u8 *wdst = walk->dst.virt.addr;
  1525. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1526. - desc, fpu_enabled, nbytes);
  1527. + desc, false, nbytes);
  1528. for (i = 0; i < gctx->num_funcs; i++) {
  1529. func_bytes = bsize * gctx->funcs[i].num_blocks;
  1530. @@ -71,10 +71,10 @@
  1531. }
  1532. done:
  1533. + glue_fpu_end(fpu_enabled);
  1534. err = blkcipher_walk_done(desc, walk, nbytes);
  1535. }
  1536. - glue_fpu_end(fpu_enabled);
  1537. return err;
  1538. }
  1539. @@ -194,7 +194,7 @@
  1540. struct scatterlist *src, unsigned int nbytes)
  1541. {
  1542. const unsigned int bsize = 128 / 8;
  1543. - bool fpu_enabled = false;
  1544. + bool fpu_enabled;
  1545. struct blkcipher_walk walk;
  1546. int err;
  1547. @@ -203,12 +203,12 @@
  1548. while ((nbytes = walk.nbytes)) {
  1549. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1550. - desc, fpu_enabled, nbytes);
  1551. + desc, false, nbytes);
  1552. nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
  1553. + glue_fpu_end(fpu_enabled);
  1554. err = blkcipher_walk_done(desc, &walk, nbytes);
  1555. }
  1556. - glue_fpu_end(fpu_enabled);
  1557. return err;
  1558. }
  1559. EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
  1560. @@ -278,7 +278,7 @@
  1561. struct scatterlist *src, unsigned int nbytes)
  1562. {
  1563. const unsigned int bsize = 128 / 8;
  1564. - bool fpu_enabled = false;
  1565. + bool fpu_enabled;
  1566. struct blkcipher_walk walk;
  1567. int err;
  1568. @@ -287,13 +287,12 @@
  1569. while ((nbytes = walk.nbytes) >= bsize) {
  1570. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1571. - desc, fpu_enabled, nbytes);
  1572. + desc, false, nbytes);
  1573. nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
  1574. + glue_fpu_end(fpu_enabled);
  1575. err = blkcipher_walk_done(desc, &walk, nbytes);
  1576. }
  1577. - glue_fpu_end(fpu_enabled);
  1578. -
  1579. if (walk.nbytes) {
  1580. glue_ctr_crypt_final_128bit(
  1581. gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
  1582. @@ -348,7 +347,7 @@
  1583. void *tweak_ctx, void *crypt_ctx)
  1584. {
  1585. const unsigned int bsize = 128 / 8;
  1586. - bool fpu_enabled = false;
  1587. + bool fpu_enabled;
  1588. struct blkcipher_walk walk;
  1589. int err;
  1590. @@ -361,21 +360,21 @@
  1591. /* set minimum length to bsize, for tweak_fn */
  1592. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1593. - desc, fpu_enabled,
  1594. + desc, false,
  1595. nbytes < bsize ? bsize : nbytes);
  1596. -
  1597. /* calculate first value of T */
  1598. tweak_fn(tweak_ctx, walk.iv, walk.iv);
  1599. + glue_fpu_end(fpu_enabled);
  1600. while (nbytes) {
  1601. + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1602. + desc, false, nbytes);
  1603. nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
  1604. + glue_fpu_end(fpu_enabled);
  1605. err = blkcipher_walk_done(desc, &walk, nbytes);
  1606. nbytes = walk.nbytes;
  1607. }
  1608. -
  1609. - glue_fpu_end(fpu_enabled);
  1610. -
  1611. return err;
  1612. }
  1613. EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
  1614. diff -Nur linux-3.18.10.orig/arch/x86/include/asm/preempt.h linux-3.18.10/arch/x86/include/asm/preempt.h
  1615. --- linux-3.18.10.orig/arch/x86/include/asm/preempt.h 2015-03-24 02:05:12.000000000 +0100
  1616. +++ linux-3.18.10/arch/x86/include/asm/preempt.h 2015-03-26 12:42:13.559582331 +0100
  1617. @@ -85,17 +85,33 @@
  1618. * a decrement which hits zero means we have no preempt_count and should
  1619. * reschedule.
  1620. */
  1621. -static __always_inline bool __preempt_count_dec_and_test(void)
  1622. +static __always_inline bool ____preempt_count_dec_and_test(void)
  1623. {
  1624. GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
  1625. }
  1626. +static __always_inline bool __preempt_count_dec_and_test(void)
  1627. +{
  1628. + if (____preempt_count_dec_and_test())
  1629. + return true;
  1630. +#ifdef CONFIG_PREEMPT_LAZY
  1631. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  1632. +#else
  1633. + return false;
  1634. +#endif
  1635. +}
  1636. +
  1637. /*
  1638. * Returns true when we need to resched and can (barring IRQ state).
  1639. */
  1640. static __always_inline bool should_resched(void)
  1641. {
  1642. +#ifdef CONFIG_PREEMPT_LAZY
  1643. + return unlikely(!raw_cpu_read_4(__preempt_count) || \
  1644. + test_thread_flag(TIF_NEED_RESCHED_LAZY));
  1645. +#else
  1646. return unlikely(!raw_cpu_read_4(__preempt_count));
  1647. +#endif
  1648. }
  1649. #ifdef CONFIG_PREEMPT
  1650. diff -Nur linux-3.18.10.orig/arch/x86/include/asm/signal.h linux-3.18.10/arch/x86/include/asm/signal.h
  1651. --- linux-3.18.10.orig/arch/x86/include/asm/signal.h 2015-03-24 02:05:12.000000000 +0100
  1652. +++ linux-3.18.10/arch/x86/include/asm/signal.h 2015-03-26 12:42:13.559582331 +0100
  1653. @@ -23,6 +23,19 @@
  1654. unsigned long sig[_NSIG_WORDS];
  1655. } sigset_t;
  1656. +/*
  1657. + * Because some traps use the IST stack, we must keep preemption
  1658. + * disabled while calling do_trap(), but do_trap() may call
  1659. + * force_sig_info() which will grab the signal spin_locks for the
  1660. + * task, which in PREEMPT_RT_FULL are mutexes. By defining
  1661. + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
  1662. + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
  1663. + * trap.
  1664. + */
  1665. +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64)
  1666. +#define ARCH_RT_DELAYS_SIGNAL_SEND
  1667. +#endif
  1668. +
  1669. #ifndef CONFIG_COMPAT
  1670. typedef sigset_t compat_sigset_t;
  1671. #endif
  1672. diff -Nur linux-3.18.10.orig/arch/x86/include/asm/stackprotector.h linux-3.18.10/arch/x86/include/asm/stackprotector.h
  1673. --- linux-3.18.10.orig/arch/x86/include/asm/stackprotector.h 2015-03-24 02:05:12.000000000 +0100
  1674. +++ linux-3.18.10/arch/x86/include/asm/stackprotector.h 2015-03-26 12:42:13.559582331 +0100
  1675. @@ -57,7 +57,7 @@
  1676. */
  1677. static __always_inline void boot_init_stack_canary(void)
  1678. {
  1679. - u64 canary;
  1680. + u64 uninitialized_var(canary);
  1681. u64 tsc;
  1682. #ifdef CONFIG_X86_64
  1683. @@ -68,8 +68,16 @@
  1684. * of randomness. The TSC only matters for very early init,
  1685. * there it already has some randomness on most systems. Later
  1686. * on during the bootup the random pool has true entropy too.
  1687. + *
  1688. + * For preempt-rt we need to weaken the randomness a bit, as
  1689. + * we can't call into the random generator from atomic context
  1690. + * due to locking constraints. We just leave canary
  1691. + * uninitialized and use the TSC based randomness on top of
  1692. + * it.
  1693. */
  1694. +#ifndef CONFIG_PREEMPT_RT_FULL
  1695. get_random_bytes(&canary, sizeof(canary));
  1696. +#endif
  1697. tsc = __native_read_tsc();
  1698. canary += tsc + (tsc << 32UL);
  1699. diff -Nur linux-3.18.10.orig/arch/x86/include/asm/thread_info.h linux-3.18.10/arch/x86/include/asm/thread_info.h
  1700. --- linux-3.18.10.orig/arch/x86/include/asm/thread_info.h 2015-03-24 02:05:12.000000000 +0100
  1701. +++ linux-3.18.10/arch/x86/include/asm/thread_info.h 2015-03-26 12:42:13.559582331 +0100
  1702. @@ -30,6 +30,8 @@
  1703. __u32 status; /* thread synchronous flags */
  1704. __u32 cpu; /* current CPU */
  1705. int saved_preempt_count;
  1706. + int preempt_lazy_count; /* 0 => lazy preemptable
  1707. + <0 => BUG */
  1708. mm_segment_t addr_limit;
  1709. struct restart_block restart_block;
  1710. void __user *sysenter_return;
  1711. @@ -75,6 +77,7 @@
  1712. #define TIF_SYSCALL_EMU 6 /* syscall emulation active */
  1713. #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
  1714. #define TIF_SECCOMP 8 /* secure computing */
  1715. +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
  1716. #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
  1717. #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
  1718. #define TIF_UPROBE 12 /* breakpointed or singlestepping */
  1719. @@ -100,6 +103,7 @@
  1720. #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
  1721. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  1722. #define _TIF_SECCOMP (1 << TIF_SECCOMP)
  1723. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  1724. #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
  1725. #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
  1726. #define _TIF_UPROBE (1 << TIF_UPROBE)
  1727. @@ -150,6 +154,8 @@
  1728. #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
  1729. #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
  1730. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  1731. +
  1732. #define STACK_WARN (THREAD_SIZE/8)
  1733. #define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8))
  1734. diff -Nur linux-3.18.10.orig/arch/x86/include/asm/uv/uv_bau.h linux-3.18.10/arch/x86/include/asm/uv/uv_bau.h
  1735. --- linux-3.18.10.orig/arch/x86/include/asm/uv/uv_bau.h 2015-03-24 02:05:12.000000000 +0100
  1736. +++ linux-3.18.10/arch/x86/include/asm/uv/uv_bau.h 2015-03-26 12:42:13.563582336 +0100
  1737. @@ -615,9 +615,9 @@
  1738. cycles_t send_message;
  1739. cycles_t period_end;
  1740. cycles_t period_time;
  1741. - spinlock_t uvhub_lock;
  1742. - spinlock_t queue_lock;
  1743. - spinlock_t disable_lock;
  1744. + raw_spinlock_t uvhub_lock;
  1745. + raw_spinlock_t queue_lock;
  1746. + raw_spinlock_t disable_lock;
  1747. /* tunables */
  1748. int max_concurr;
  1749. int max_concurr_const;
  1750. @@ -776,15 +776,15 @@
  1751. * to be lowered below the current 'v'. atomic_add_unless can only stop
  1752. * on equal.
  1753. */
  1754. -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
  1755. +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u)
  1756. {
  1757. - spin_lock(lock);
  1758. + raw_spin_lock(lock);
  1759. if (atomic_read(v) >= u) {
  1760. - spin_unlock(lock);
  1761. + raw_spin_unlock(lock);
  1762. return 0;
  1763. }
  1764. atomic_inc(v);
  1765. - spin_unlock(lock);
  1766. + raw_spin_unlock(lock);
  1767. return 1;
  1768. }
  1769. diff -Nur linux-3.18.10.orig/arch/x86/include/asm/uv/uv_hub.h linux-3.18.10/arch/x86/include/asm/uv/uv_hub.h
  1770. --- linux-3.18.10.orig/arch/x86/include/asm/uv/uv_hub.h 2015-03-24 02:05:12.000000000 +0100
  1771. +++ linux-3.18.10/arch/x86/include/asm/uv/uv_hub.h 2015-03-26 12:42:13.563582336 +0100
  1772. @@ -492,7 +492,7 @@
  1773. unsigned short nr_online_cpus;
  1774. unsigned short pnode;
  1775. short memory_nid;
  1776. - spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
  1777. + raw_spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
  1778. unsigned long nmi_count; /* obsolete, see uv_hub_nmi */
  1779. };
  1780. extern struct uv_blade_info *uv_blade_info;
  1781. diff -Nur linux-3.18.10.orig/arch/x86/Kconfig linux-3.18.10/arch/x86/Kconfig
  1782. --- linux-3.18.10.orig/arch/x86/Kconfig 2015-03-24 02:05:12.000000000 +0100
  1783. +++ linux-3.18.10/arch/x86/Kconfig 2015-03-26 12:42:13.559582331 +0100
  1784. @@ -21,6 +21,7 @@
  1785. ### Arch settings
  1786. config X86
  1787. def_bool y
  1788. + select HAVE_PREEMPT_LAZY
  1789. select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
  1790. select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
  1791. select ARCH_HAS_FAST_MULTIPLIER
  1792. @@ -197,8 +198,11 @@
  1793. def_bool y
  1794. depends on ISA_DMA_API
  1795. +config RWSEM_GENERIC_SPINLOCK
  1796. + def_bool PREEMPT_RT_FULL
  1797. +
  1798. config RWSEM_XCHGADD_ALGORITHM
  1799. - def_bool y
  1800. + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
  1801. config GENERIC_CALIBRATE_DELAY
  1802. def_bool y
  1803. @@ -811,7 +815,7 @@
  1804. config MAXSMP
  1805. bool "Enable Maximum number of SMP Processors and NUMA Nodes"
  1806. depends on X86_64 && SMP && DEBUG_KERNEL
  1807. - select CPUMASK_OFFSTACK
  1808. + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
  1809. ---help---
  1810. Enable maximum number of CPUS and NUMA Nodes for this architecture.
  1811. If unsure, say N.
  1812. diff -Nur linux-3.18.10.orig/arch/x86/kernel/apic/io_apic.c linux-3.18.10/arch/x86/kernel/apic/io_apic.c
  1813. --- linux-3.18.10.orig/arch/x86/kernel/apic/io_apic.c 2015-03-24 02:05:12.000000000 +0100
  1814. +++ linux-3.18.10/arch/x86/kernel/apic/io_apic.c 2015-03-26 12:42:13.563582336 +0100
  1815. @@ -2494,7 +2494,8 @@
  1816. static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
  1817. {
  1818. /* If we are moving the irq we need to mask it */
  1819. - if (unlikely(irqd_is_setaffinity_pending(data))) {
  1820. + if (unlikely(irqd_is_setaffinity_pending(data) &&
  1821. + !irqd_irq_inprogress(data))) {
  1822. mask_ioapic(cfg);
  1823. return true;
  1824. }
  1825. diff -Nur linux-3.18.10.orig/arch/x86/kernel/apic/x2apic_uv_x.c linux-3.18.10/arch/x86/kernel/apic/x2apic_uv_x.c
  1826. --- linux-3.18.10.orig/arch/x86/kernel/apic/x2apic_uv_x.c 2015-03-24 02:05:12.000000000 +0100
  1827. +++ linux-3.18.10/arch/x86/kernel/apic/x2apic_uv_x.c 2015-03-26 12:42:13.563582336 +0100
  1828. @@ -918,7 +918,7 @@
  1829. uv_blade_info[blade].pnode = pnode;
  1830. uv_blade_info[blade].nr_possible_cpus = 0;
  1831. uv_blade_info[blade].nr_online_cpus = 0;
  1832. - spin_lock_init(&uv_blade_info[blade].nmi_lock);
  1833. + raw_spin_lock_init(&uv_blade_info[blade].nmi_lock);
  1834. min_pnode = min(pnode, min_pnode);
  1835. max_pnode = max(pnode, max_pnode);
  1836. blade++;
  1837. diff -Nur linux-3.18.10.orig/arch/x86/kernel/asm-offsets.c linux-3.18.10/arch/x86/kernel/asm-offsets.c
  1838. --- linux-3.18.10.orig/arch/x86/kernel/asm-offsets.c 2015-03-24 02:05:12.000000000 +0100
  1839. +++ linux-3.18.10/arch/x86/kernel/asm-offsets.c 2015-03-26 12:42:13.563582336 +0100
  1840. @@ -32,6 +32,7 @@
  1841. OFFSET(TI_flags, thread_info, flags);
  1842. OFFSET(TI_status, thread_info, status);
  1843. OFFSET(TI_addr_limit, thread_info, addr_limit);
  1844. + OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count);
  1845. BLANK();
  1846. OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
  1847. @@ -71,4 +72,5 @@
  1848. BLANK();
  1849. DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
  1850. + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED);
  1851. }
  1852. diff -Nur linux-3.18.10.orig/arch/x86/kernel/cpu/mcheck/mce.c linux-3.18.10/arch/x86/kernel/cpu/mcheck/mce.c
  1853. --- linux-3.18.10.orig/arch/x86/kernel/cpu/mcheck/mce.c 2015-03-24 02:05:12.000000000 +0100
  1854. +++ linux-3.18.10/arch/x86/kernel/cpu/mcheck/mce.c 2015-03-26 12:42:13.563582336 +0100
  1855. @@ -41,6 +41,8 @@
  1856. #include <linux/debugfs.h>
  1857. #include <linux/irq_work.h>
  1858. #include <linux/export.h>
  1859. +#include <linux/jiffies.h>
  1860. +#include <linux/work-simple.h>
  1861. #include <asm/processor.h>
  1862. #include <asm/mce.h>
  1863. @@ -1266,7 +1268,7 @@
  1864. static unsigned long check_interval = 5 * 60; /* 5 minutes */
  1865. static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
  1866. -static DEFINE_PER_CPU(struct timer_list, mce_timer);
  1867. +static DEFINE_PER_CPU(struct hrtimer, mce_timer);
  1868. static unsigned long mce_adjust_timer_default(unsigned long interval)
  1869. {
  1870. @@ -1283,14 +1285,11 @@
  1871. return test_and_clear_bit(0, v);
  1872. }
  1873. -static void mce_timer_fn(unsigned long data)
  1874. +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
  1875. {
  1876. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  1877. unsigned long iv;
  1878. int notify;
  1879. - WARN_ON(smp_processor_id() != data);
  1880. -
  1881. if (mce_available(this_cpu_ptr(&cpu_info))) {
  1882. machine_check_poll(MCP_TIMESTAMP,
  1883. this_cpu_ptr(&mce_poll_banks));
  1884. @@ -1313,9 +1312,11 @@
  1885. __this_cpu_write(mce_next_interval, iv);
  1886. /* Might have become 0 after CMCI storm subsided */
  1887. if (iv) {
  1888. - t->expires = jiffies + iv;
  1889. - add_timer_on(t, smp_processor_id());
  1890. + hrtimer_forward_now(timer, ns_to_ktime(
  1891. + jiffies_to_usecs(iv) * 1000ULL));
  1892. + return HRTIMER_RESTART;
  1893. }
  1894. + return HRTIMER_NORESTART;
  1895. }
  1896. /*
  1897. @@ -1323,28 +1324,37 @@
  1898. */
  1899. void mce_timer_kick(unsigned long interval)
  1900. {
  1901. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  1902. - unsigned long when = jiffies + interval;
  1903. + struct hrtimer *t = this_cpu_ptr(&mce_timer);
  1904. unsigned long iv = __this_cpu_read(mce_next_interval);
  1905. - if (timer_pending(t)) {
  1906. - if (time_before(when, t->expires))
  1907. - mod_timer_pinned(t, when);
  1908. + if (hrtimer_active(t)) {
  1909. + s64 exp;
  1910. + s64 intv_us;
  1911. +
  1912. + intv_us = jiffies_to_usecs(interval);
  1913. + exp = ktime_to_us(hrtimer_expires_remaining(t));
  1914. + if (intv_us < exp) {
  1915. + hrtimer_cancel(t);
  1916. + hrtimer_start_range_ns(t,
  1917. + ns_to_ktime(intv_us * 1000),
  1918. + 0, HRTIMER_MODE_REL_PINNED);
  1919. + }
  1920. } else {
  1921. - t->expires = round_jiffies(when);
  1922. - add_timer_on(t, smp_processor_id());
  1923. + hrtimer_start_range_ns(t,
  1924. + ns_to_ktime(jiffies_to_usecs(interval) * 1000ULL),
  1925. + 0, HRTIMER_MODE_REL_PINNED);
  1926. }
  1927. if (interval < iv)
  1928. __this_cpu_write(mce_next_interval, interval);
  1929. }
  1930. -/* Must not be called in IRQ context where del_timer_sync() can deadlock */
  1931. +/* Must not be called in IRQ context where hrtimer_cancel() can deadlock */
  1932. static void mce_timer_delete_all(void)
  1933. {
  1934. int cpu;
  1935. for_each_online_cpu(cpu)
  1936. - del_timer_sync(&per_cpu(mce_timer, cpu));
  1937. + hrtimer_cancel(&per_cpu(mce_timer, cpu));
  1938. }
  1939. static void mce_do_trigger(struct work_struct *work)
  1940. @@ -1354,6 +1364,56 @@
  1941. static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
  1942. +static void __mce_notify_work(struct swork_event *event)
  1943. +{
  1944. + /* Not more than two messages every minute */
  1945. + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
  1946. +
  1947. + /* wake processes polling /dev/mcelog */
  1948. + wake_up_interruptible(&mce_chrdev_wait);
  1949. +
  1950. + /*
  1951. + * There is no risk of missing notifications because
  1952. + * work_pending is always cleared before the function is
  1953. + * executed.
  1954. + */
  1955. + if (mce_helper[0] && !work_pending(&mce_trigger_work))
  1956. + schedule_work(&mce_trigger_work);
  1957. +
  1958. + if (__ratelimit(&ratelimit))
  1959. + pr_info(HW_ERR "Machine check events logged\n");
  1960. +}
  1961. +
  1962. +#ifdef CONFIG_PREEMPT_RT_FULL
  1963. +static bool notify_work_ready __read_mostly;
  1964. +static struct swork_event notify_work;
  1965. +
  1966. +static int mce_notify_work_init(void)
  1967. +{
  1968. + int err;
  1969. +
  1970. + err = swork_get();
  1971. + if (err)
  1972. + return err;
  1973. +
  1974. + INIT_SWORK(&notify_work, __mce_notify_work);
  1975. + notify_work_ready = true;
  1976. + return 0;
  1977. +}
  1978. +
  1979. +static void mce_notify_work(void)
  1980. +{
  1981. + if (notify_work_ready)
  1982. + swork_queue(&notify_work);
  1983. +}
  1984. +#else
  1985. +static void mce_notify_work(void)
  1986. +{
  1987. + __mce_notify_work(NULL);
  1988. +}
  1989. +static inline int mce_notify_work_init(void) { return 0; }
  1990. +#endif
  1991. +
  1992. /*
  1993. * Notify the user(s) about new machine check events.
  1994. * Can be called from interrupt context, but not from machine check/NMI
  1995. @@ -1361,19 +1421,8 @@
  1996. */
  1997. int mce_notify_irq(void)
  1998. {
  1999. - /* Not more than two messages every minute */
  2000. - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
  2001. -
  2002. if (test_and_clear_bit(0, &mce_need_notify)) {
  2003. - /* wake processes polling /dev/mcelog */
  2004. - wake_up_interruptible(&mce_chrdev_wait);
  2005. -
  2006. - if (mce_helper[0])
  2007. - schedule_work(&mce_trigger_work);
  2008. -
  2009. - if (__ratelimit(&ratelimit))
  2010. - pr_info(HW_ERR "Machine check events logged\n");
  2011. -
  2012. + mce_notify_work();
  2013. return 1;
  2014. }
  2015. return 0;
  2016. @@ -1644,7 +1693,7 @@
  2017. }
  2018. }
  2019. -static void mce_start_timer(unsigned int cpu, struct timer_list *t)
  2020. +static void mce_start_timer(unsigned int cpu, struct hrtimer *t)
  2021. {
  2022. unsigned long iv = check_interval * HZ;
  2023. @@ -1653,16 +1702,17 @@
  2024. per_cpu(mce_next_interval, cpu) = iv;
  2025. - t->expires = round_jiffies(jiffies + iv);
  2026. - add_timer_on(t, cpu);
  2027. + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL),
  2028. + 0, HRTIMER_MODE_REL_PINNED);
  2029. }
  2030. static void __mcheck_cpu_init_timer(void)
  2031. {
  2032. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  2033. + struct hrtimer *t = this_cpu_ptr(&mce_timer);
  2034. unsigned int cpu = smp_processor_id();
  2035. - setup_timer(t, mce_timer_fn, cpu);
  2036. + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  2037. + t->function = mce_timer_fn;
  2038. mce_start_timer(cpu, t);
  2039. }
  2040. @@ -2339,6 +2389,8 @@
  2041. if (!mce_available(raw_cpu_ptr(&cpu_info)))
  2042. return;
  2043. + hrtimer_cancel(this_cpu_ptr(&mce_timer));
  2044. +
  2045. if (!(action & CPU_TASKS_FROZEN))
  2046. cmci_clear();
  2047. for (i = 0; i < mca_cfg.banks; i++) {
  2048. @@ -2365,6 +2417,7 @@
  2049. if (b->init)
  2050. wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
  2051. }
  2052. + __mcheck_cpu_init_timer();
  2053. }
  2054. /* Get notified when a cpu comes on/off. Be hotplug friendly. */
  2055. @@ -2372,7 +2425,6 @@
  2056. mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
  2057. {
  2058. unsigned int cpu = (unsigned long)hcpu;
  2059. - struct timer_list *t = &per_cpu(mce_timer, cpu);
  2060. switch (action & ~CPU_TASKS_FROZEN) {
  2061. case CPU_ONLINE:
  2062. @@ -2392,11 +2444,9 @@
  2063. break;
  2064. case CPU_DOWN_PREPARE:
  2065. smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
  2066. - del_timer_sync(t);
  2067. break;
  2068. case CPU_DOWN_FAILED:
  2069. smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
  2070. - mce_start_timer(cpu, t);
  2071. break;
  2072. }
  2073. @@ -2435,6 +2485,10 @@
  2074. goto err_out;
  2075. }
  2076. + err = mce_notify_work_init();
  2077. + if (err)
  2078. + goto err_out;
  2079. +
  2080. if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) {
  2081. err = -ENOMEM;
  2082. goto err_out;
  2083. diff -Nur linux-3.18.10.orig/arch/x86/kernel/entry_32.S linux-3.18.10/arch/x86/kernel/entry_32.S
  2084. --- linux-3.18.10.orig/arch/x86/kernel/entry_32.S 2015-03-24 02:05:12.000000000 +0100
  2085. +++ linux-3.18.10/arch/x86/kernel/entry_32.S 2015-03-26 12:42:13.563582336 +0100
  2086. @@ -359,8 +359,24 @@
  2087. ENTRY(resume_kernel)
  2088. DISABLE_INTERRUPTS(CLBR_ANY)
  2089. need_resched:
  2090. + # preempt count == 0 + NEED_RS set?
  2091. cmpl $0,PER_CPU_VAR(__preempt_count)
  2092. +#ifndef CONFIG_PREEMPT_LAZY
  2093. jnz restore_all
  2094. +#else
  2095. + jz test_int_off
  2096. +
  2097. + # atleast preempt count == 0 ?
  2098. + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
  2099. + jne restore_all
  2100. +
  2101. + cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ?
  2102. + jnz restore_all
  2103. +
  2104. + testl $_TIF_NEED_RESCHED_LAZY, TI_flags(%ebp)
  2105. + jz restore_all
  2106. +test_int_off:
  2107. +#endif
  2108. testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
  2109. jz restore_all
  2110. call preempt_schedule_irq
  2111. @@ -591,7 +607,7 @@
  2112. ALIGN
  2113. RING0_PTREGS_FRAME # can't unwind into user space anyway
  2114. work_pending:
  2115. - testb $_TIF_NEED_RESCHED, %cl
  2116. + testl $_TIF_NEED_RESCHED_MASK, %ecx
  2117. jz work_notifysig
  2118. work_resched:
  2119. call schedule
  2120. @@ -604,7 +620,7 @@
  2121. andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
  2122. # than syscall tracing?
  2123. jz restore_all
  2124. - testb $_TIF_NEED_RESCHED, %cl
  2125. + testl $_TIF_NEED_RESCHED_MASK, %ecx
  2126. jnz work_resched
  2127. work_notifysig: # deal with pending signals and
  2128. diff -Nur linux-3.18.10.orig/arch/x86/kernel/entry_64.S linux-3.18.10/arch/x86/kernel/entry_64.S
  2129. --- linux-3.18.10.orig/arch/x86/kernel/entry_64.S 2015-03-24 02:05:12.000000000 +0100
  2130. +++ linux-3.18.10/arch/x86/kernel/entry_64.S 2015-03-26 12:42:13.563582336 +0100
  2131. @@ -454,8 +454,8 @@
  2132. /* Handle reschedules */
  2133. /* edx: work, edi: workmask */
  2134. sysret_careful:
  2135. - bt $TIF_NEED_RESCHED,%edx
  2136. - jnc sysret_signal
  2137. + testl $_TIF_NEED_RESCHED_MASK,%edx
  2138. + jz sysret_signal
  2139. TRACE_IRQS_ON
  2140. ENABLE_INTERRUPTS(CLBR_NONE)
  2141. pushq_cfi %rdi
  2142. @@ -554,8 +554,8 @@
  2143. /* First do a reschedule test. */
  2144. /* edx: work, edi: workmask */
  2145. int_careful:
  2146. - bt $TIF_NEED_RESCHED,%edx
  2147. - jnc int_very_careful
  2148. + testl $_TIF_NEED_RESCHED_MASK,%edx
  2149. + jz int_very_careful
  2150. TRACE_IRQS_ON
  2151. ENABLE_INTERRUPTS(CLBR_NONE)
  2152. pushq_cfi %rdi
  2153. @@ -870,8 +870,8 @@
  2154. /* edi: workmask, edx: work */
  2155. retint_careful:
  2156. CFI_RESTORE_STATE
  2157. - bt $TIF_NEED_RESCHED,%edx
  2158. - jnc retint_signal
  2159. + testl $_TIF_NEED_RESCHED_MASK,%edx
  2160. + jz retint_signal
  2161. TRACE_IRQS_ON
  2162. ENABLE_INTERRUPTS(CLBR_NONE)
  2163. pushq_cfi %rdi
  2164. @@ -903,7 +903,22 @@
  2165. /* rcx: threadinfo. interrupts off. */
  2166. ENTRY(retint_kernel)
  2167. cmpl $0,PER_CPU_VAR(__preempt_count)
  2168. +#ifndef CONFIG_PREEMPT_LAZY
  2169. jnz retint_restore_args
  2170. +#else
  2171. + jz check_int_off
  2172. +
  2173. + # atleast preempt count == 0 ?
  2174. + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
  2175. + jnz retint_restore_args
  2176. +
  2177. + cmpl $0, TI_preempt_lazy_count(%rcx)
  2178. + jnz retint_restore_args
  2179. +
  2180. + bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx)
  2181. + jnc retint_restore_args
  2182. +check_int_off:
  2183. +#endif
  2184. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  2185. jnc retint_restore_args
  2186. call preempt_schedule_irq
  2187. @@ -1119,6 +1134,7 @@
  2188. jmp 2b
  2189. .previous
  2190. +#ifndef CONFIG_PREEMPT_RT_FULL
  2191. /* Call softirq on interrupt stack. Interrupts are off. */
  2192. ENTRY(do_softirq_own_stack)
  2193. CFI_STARTPROC
  2194. @@ -1138,6 +1154,7 @@
  2195. ret
  2196. CFI_ENDPROC
  2197. END(do_softirq_own_stack)
  2198. +#endif
  2199. #ifdef CONFIG_XEN
  2200. idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
  2201. @@ -1302,7 +1319,7 @@
  2202. movq %rsp,%rdi /* &pt_regs */
  2203. call sync_regs
  2204. movq %rax,%rsp /* switch stack for scheduling */
  2205. - testl $_TIF_NEED_RESCHED,%ebx
  2206. + testl $_TIF_NEED_RESCHED_MASK,%ebx
  2207. jnz paranoid_schedule
  2208. movl %ebx,%edx /* arg3: thread flags */
  2209. TRACE_IRQS_ON
  2210. diff -Nur linux-3.18.10.orig/arch/x86/kernel/irq_32.c linux-3.18.10/arch/x86/kernel/irq_32.c
  2211. --- linux-3.18.10.orig/arch/x86/kernel/irq_32.c 2015-03-24 02:05:12.000000000 +0100
  2212. +++ linux-3.18.10/arch/x86/kernel/irq_32.c 2015-03-26 12:42:13.563582336 +0100
  2213. @@ -142,6 +142,7 @@
  2214. cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
  2215. }
  2216. +#ifndef CONFIG_PREEMPT_RT_FULL
  2217. void do_softirq_own_stack(void)
  2218. {
  2219. struct thread_info *curstk;
  2220. @@ -160,6 +161,7 @@
  2221. call_on_stack(__do_softirq, isp);
  2222. }
  2223. +#endif
  2224. bool handle_irq(unsigned irq, struct pt_regs *regs)
  2225. {
  2226. diff -Nur linux-3.18.10.orig/arch/x86/kernel/irq_work.c linux-3.18.10/arch/x86/kernel/irq_work.c
  2227. --- linux-3.18.10.orig/arch/x86/kernel/irq_work.c 2015-03-24 02:05:12.000000000 +0100
  2228. +++ linux-3.18.10/arch/x86/kernel/irq_work.c 2015-03-26 12:42:13.563582336 +0100
  2229. @@ -38,6 +38,7 @@
  2230. exiting_irq();
  2231. }
  2232. +#ifndef CONFIG_PREEMPT_RT_FULL
  2233. void arch_irq_work_raise(void)
  2234. {
  2235. #ifdef CONFIG_X86_LOCAL_APIC
  2236. @@ -48,3 +49,4 @@
  2237. apic_wait_icr_idle();
  2238. #endif
  2239. }
  2240. +#endif
  2241. diff -Nur linux-3.18.10.orig/arch/x86/kernel/process_32.c linux-3.18.10/arch/x86/kernel/process_32.c
  2242. --- linux-3.18.10.orig/arch/x86/kernel/process_32.c 2015-03-24 02:05:12.000000000 +0100
  2243. +++ linux-3.18.10/arch/x86/kernel/process_32.c 2015-03-26 12:42:13.563582336 +0100
  2244. @@ -35,6 +35,7 @@
  2245. #include <linux/uaccess.h>
  2246. #include <linux/io.h>
  2247. #include <linux/kdebug.h>
  2248. +#include <linux/highmem.h>
  2249. #include <asm/pgtable.h>
  2250. #include <asm/ldt.h>
  2251. @@ -214,6 +215,35 @@
  2252. }
  2253. EXPORT_SYMBOL_GPL(start_thread);
  2254. +#ifdef CONFIG_PREEMPT_RT_FULL
  2255. +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
  2256. +{
  2257. + int i;
  2258. +
  2259. + /*
  2260. + * Clear @prev's kmap_atomic mappings
  2261. + */
  2262. + for (i = 0; i < prev_p->kmap_idx; i++) {
  2263. + int idx = i + KM_TYPE_NR * smp_processor_id();
  2264. + pte_t *ptep = kmap_pte - idx;
  2265. +
  2266. + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
  2267. + }
  2268. + /*
  2269. + * Restore @next_p's kmap_atomic mappings
  2270. + */
  2271. + for (i = 0; i < next_p->kmap_idx; i++) {
  2272. + int idx = i + KM_TYPE_NR * smp_processor_id();
  2273. +
  2274. + if (!pte_none(next_p->kmap_pte[i]))
  2275. + set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
  2276. + }
  2277. +}
  2278. +#else
  2279. +static inline void
  2280. +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
  2281. +#endif
  2282. +
  2283. /*
  2284. * switch_to(x,y) should switch tasks from x to y.
  2285. @@ -301,6 +331,8 @@
  2286. task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
  2287. __switch_to_xtra(prev_p, next_p, tss);
  2288. + switch_kmaps(prev_p, next_p);
  2289. +
  2290. /*
  2291. * Leave lazy mode, flushing any hypercalls made here.
  2292. * This must be done before restoring TLS segments so
  2293. diff -Nur linux-3.18.10.orig/arch/x86/kernel/signal.c linux-3.18.10/arch/x86/kernel/signal.c
  2294. --- linux-3.18.10.orig/arch/x86/kernel/signal.c 2015-03-24 02:05:12.000000000 +0100
  2295. +++ linux-3.18.10/arch/x86/kernel/signal.c 2015-03-26 12:42:13.563582336 +0100
  2296. @@ -746,6 +746,14 @@
  2297. mce_notify_process();
  2298. #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
  2299. +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
  2300. + if (unlikely(current->forced_info.si_signo)) {
  2301. + struct task_struct *t = current;
  2302. + force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
  2303. + t->forced_info.si_signo = 0;
  2304. + }
  2305. +#endif
  2306. +
  2307. if (thread_info_flags & _TIF_UPROBE)
  2308. uprobe_notify_resume(regs);
  2309. diff -Nur linux-3.18.10.orig/arch/x86/kernel/traps.c linux-3.18.10/arch/x86/kernel/traps.c
  2310. --- linux-3.18.10.orig/arch/x86/kernel/traps.c 2015-03-24 02:05:12.000000000 +0100
  2311. +++ linux-3.18.10/arch/x86/kernel/traps.c 2015-03-26 12:42:13.563582336 +0100
  2312. @@ -87,9 +87,21 @@
  2313. local_irq_enable();
  2314. }
  2315. -static inline void preempt_conditional_sti(struct pt_regs *regs)
  2316. +static inline void conditional_sti_ist(struct pt_regs *regs)
  2317. {
  2318. +#ifdef CONFIG_X86_64
  2319. + /*
  2320. + * X86_64 uses a per CPU stack on the IST for certain traps
  2321. + * like int3. The task can not be preempted when using one
  2322. + * of these stacks, thus preemption must be disabled, otherwise
  2323. + * the stack can be corrupted if the task is scheduled out,
  2324. + * and another task comes in and uses this stack.
  2325. + *
  2326. + * On x86_32 the task keeps its own stack and it is OK if the
  2327. + * task schedules out.
  2328. + */
  2329. preempt_count_inc();
  2330. +#endif
  2331. if (regs->flags & X86_EFLAGS_IF)
  2332. local_irq_enable();
  2333. }
  2334. @@ -100,11 +112,13 @@
  2335. local_irq_disable();
  2336. }
  2337. -static inline void preempt_conditional_cli(struct pt_regs *regs)
  2338. +static inline void conditional_cli_ist(struct pt_regs *regs)
  2339. {
  2340. if (regs->flags & X86_EFLAGS_IF)
  2341. local_irq_disable();
  2342. +#ifdef CONFIG_X86_64
  2343. preempt_count_dec();
  2344. +#endif
  2345. }
  2346. static nokprobe_inline int
  2347. @@ -372,9 +386,9 @@
  2348. * as we may switch to the interrupt stack.
  2349. */
  2350. debug_stack_usage_inc();
  2351. - preempt_conditional_sti(regs);
  2352. + conditional_sti_ist(regs);
  2353. do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
  2354. - preempt_conditional_cli(regs);
  2355. + conditional_cli_ist(regs);
  2356. debug_stack_usage_dec();
  2357. exit:
  2358. exception_exit(prev_state);
  2359. @@ -517,12 +531,12 @@
  2360. debug_stack_usage_inc();
  2361. /* It's safe to allow irq's after DR6 has been saved */
  2362. - preempt_conditional_sti(regs);
  2363. + conditional_sti_ist(regs);
  2364. if (regs->flags & X86_VM_MASK) {
  2365. handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
  2366. X86_TRAP_DB);
  2367. - preempt_conditional_cli(regs);
  2368. + conditional_cli_ist(regs);
  2369. debug_stack_usage_dec();
  2370. goto exit;
  2371. }
  2372. @@ -542,7 +556,7 @@
  2373. si_code = get_si_code(tsk->thread.debugreg6);
  2374. if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
  2375. send_sigtrap(tsk, regs, error_code, si_code);
  2376. - preempt_conditional_cli(regs);
  2377. + conditional_cli_ist(regs);
  2378. debug_stack_usage_dec();
  2379. exit:
  2380. diff -Nur linux-3.18.10.orig/arch/x86/kvm/x86.c linux-3.18.10/arch/x86/kvm/x86.c
  2381. --- linux-3.18.10.orig/arch/x86/kvm/x86.c 2015-03-24 02:05:12.000000000 +0100
  2382. +++ linux-3.18.10/arch/x86/kvm/x86.c 2015-03-26 12:42:13.563582336 +0100
  2383. @@ -5773,6 +5773,13 @@
  2384. goto out;
  2385. }
  2386. +#ifdef CONFIG_PREEMPT_RT_FULL
  2387. + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
  2388. + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
  2389. + return -EOPNOTSUPP;
  2390. + }
  2391. +#endif
  2392. +
  2393. r = kvm_mmu_module_init();
  2394. if (r)
  2395. goto out_free_percpu;
  2396. diff -Nur linux-3.18.10.orig/arch/x86/mm/fault.c linux-3.18.10/arch/x86/mm/fault.c
  2397. --- linux-3.18.10.orig/arch/x86/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  2398. +++ linux-3.18.10/arch/x86/mm/fault.c 2015-03-26 12:42:13.563582336 +0100
  2399. @@ -1128,7 +1128,7 @@
  2400. * If we're in an interrupt, have no user context or are running
  2401. * in an atomic region then we must not take the fault:
  2402. */
  2403. - if (unlikely(in_atomic() || !mm)) {
  2404. + if (unlikely(!mm || pagefault_disabled())) {
  2405. bad_area_nosemaphore(regs, error_code, address);
  2406. return;
  2407. }
  2408. diff -Nur linux-3.18.10.orig/arch/x86/mm/highmem_32.c linux-3.18.10/arch/x86/mm/highmem_32.c
  2409. --- linux-3.18.10.orig/arch/x86/mm/highmem_32.c 2015-03-24 02:05:12.000000000 +0100
  2410. +++ linux-3.18.10/arch/x86/mm/highmem_32.c 2015-03-26 12:42:13.563582336 +0100
  2411. @@ -32,6 +32,7 @@
  2412. */
  2413. void *kmap_atomic_prot(struct page *page, pgprot_t prot)
  2414. {
  2415. + pte_t pte = mk_pte(page, prot);
  2416. unsigned long vaddr;
  2417. int idx, type;
  2418. @@ -45,7 +46,10 @@
  2419. idx = type + KM_TYPE_NR*smp_processor_id();
  2420. vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
  2421. BUG_ON(!pte_none(*(kmap_pte-idx)));
  2422. - set_pte(kmap_pte-idx, mk_pte(page, prot));
  2423. +#ifdef CONFIG_PREEMPT_RT_FULL
  2424. + current->kmap_pte[type] = pte;
  2425. +#endif
  2426. + set_pte(kmap_pte-idx, pte);
  2427. arch_flush_lazy_mmu_mode();
  2428. return (void *)vaddr;
  2429. @@ -88,6 +92,9 @@
  2430. * is a bad idea also, in case the page changes cacheability
  2431. * attributes or becomes a protected page in a hypervisor.
  2432. */
  2433. +#ifdef CONFIG_PREEMPT_RT_FULL
  2434. + current->kmap_pte[type] = __pte(0);
  2435. +#endif
  2436. kpte_clear_flush(kmap_pte-idx, vaddr);
  2437. kmap_atomic_idx_pop();
  2438. arch_flush_lazy_mmu_mode();
  2439. diff -Nur linux-3.18.10.orig/arch/x86/mm/iomap_32.c linux-3.18.10/arch/x86/mm/iomap_32.c
  2440. --- linux-3.18.10.orig/arch/x86/mm/iomap_32.c 2015-03-24 02:05:12.000000000 +0100
  2441. +++ linux-3.18.10/arch/x86/mm/iomap_32.c 2015-03-26 12:42:13.563582336 +0100
  2442. @@ -56,6 +56,7 @@
  2443. void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
  2444. {
  2445. + pte_t pte = pfn_pte(pfn, prot);
  2446. unsigned long vaddr;
  2447. int idx, type;
  2448. @@ -64,7 +65,12 @@
  2449. type = kmap_atomic_idx_push();
  2450. idx = type + KM_TYPE_NR * smp_processor_id();
  2451. vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
  2452. - set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
  2453. + WARN_ON(!pte_none(*(kmap_pte - idx)));
  2454. +
  2455. +#ifdef CONFIG_PREEMPT_RT_FULL
  2456. + current->kmap_pte[type] = pte;
  2457. +#endif
  2458. + set_pte(kmap_pte - idx, pte);
  2459. arch_flush_lazy_mmu_mode();
  2460. return (void *)vaddr;
  2461. @@ -110,6 +116,9 @@
  2462. * is a bad idea also, in case the page changes cacheability
  2463. * attributes or becomes a protected page in a hypervisor.
  2464. */
  2465. +#ifdef CONFIG_PREEMPT_RT_FULL
  2466. + current->kmap_pte[type] = __pte(0);
  2467. +#endif
  2468. kpte_clear_flush(kmap_pte-idx, vaddr);
  2469. kmap_atomic_idx_pop();
  2470. }
  2471. diff -Nur linux-3.18.10.orig/arch/x86/platform/uv/tlb_uv.c linux-3.18.10/arch/x86/platform/uv/tlb_uv.c
  2472. --- linux-3.18.10.orig/arch/x86/platform/uv/tlb_uv.c 2015-03-24 02:05:12.000000000 +0100
  2473. +++ linux-3.18.10/arch/x86/platform/uv/tlb_uv.c 2015-03-26 12:42:13.563582336 +0100
  2474. @@ -714,9 +714,9 @@
  2475. quiesce_local_uvhub(hmaster);
  2476. - spin_lock(&hmaster->queue_lock);
  2477. + raw_spin_lock(&hmaster->queue_lock);
  2478. reset_with_ipi(&bau_desc->distribution, bcp);
  2479. - spin_unlock(&hmaster->queue_lock);
  2480. + raw_spin_unlock(&hmaster->queue_lock);
  2481. end_uvhub_quiesce(hmaster);
  2482. @@ -736,9 +736,9 @@
  2483. quiesce_local_uvhub(hmaster);
  2484. - spin_lock(&hmaster->queue_lock);
  2485. + raw_spin_lock(&hmaster->queue_lock);
  2486. reset_with_ipi(&bau_desc->distribution, bcp);
  2487. - spin_unlock(&hmaster->queue_lock);
  2488. + raw_spin_unlock(&hmaster->queue_lock);
  2489. end_uvhub_quiesce(hmaster);
  2490. @@ -759,7 +759,7 @@
  2491. cycles_t tm1;
  2492. hmaster = bcp->uvhub_master;
  2493. - spin_lock(&hmaster->disable_lock);
  2494. + raw_spin_lock(&hmaster->disable_lock);
  2495. if (!bcp->baudisabled) {
  2496. stat->s_bau_disabled++;
  2497. tm1 = get_cycles();
  2498. @@ -772,7 +772,7 @@
  2499. }
  2500. }
  2501. }
  2502. - spin_unlock(&hmaster->disable_lock);
  2503. + raw_spin_unlock(&hmaster->disable_lock);
  2504. }
  2505. static void count_max_concurr(int stat, struct bau_control *bcp,
  2506. @@ -835,7 +835,7 @@
  2507. */
  2508. static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
  2509. {
  2510. - spinlock_t *lock = &hmaster->uvhub_lock;
  2511. + raw_spinlock_t *lock = &hmaster->uvhub_lock;
  2512. atomic_t *v;
  2513. v = &hmaster->active_descriptor_count;
  2514. @@ -968,7 +968,7 @@
  2515. struct bau_control *hmaster;
  2516. hmaster = bcp->uvhub_master;
  2517. - spin_lock(&hmaster->disable_lock);
  2518. + raw_spin_lock(&hmaster->disable_lock);
  2519. if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
  2520. stat->s_bau_reenabled++;
  2521. for_each_present_cpu(tcpu) {
  2522. @@ -980,10 +980,10 @@
  2523. tbcp->period_giveups = 0;
  2524. }
  2525. }
  2526. - spin_unlock(&hmaster->disable_lock);
  2527. + raw_spin_unlock(&hmaster->disable_lock);
  2528. return 0;
  2529. }
  2530. - spin_unlock(&hmaster->disable_lock);
  2531. + raw_spin_unlock(&hmaster->disable_lock);
  2532. return -1;
  2533. }
  2534. @@ -1899,9 +1899,9 @@
  2535. bcp->cong_reps = congested_reps;
  2536. bcp->disabled_period = sec_2_cycles(disabled_period);
  2537. bcp->giveup_limit = giveup_limit;
  2538. - spin_lock_init(&bcp->queue_lock);
  2539. - spin_lock_init(&bcp->uvhub_lock);
  2540. - spin_lock_init(&bcp->disable_lock);
  2541. + raw_spin_lock_init(&bcp->queue_lock);
  2542. + raw_spin_lock_init(&bcp->uvhub_lock);
  2543. + raw_spin_lock_init(&bcp->disable_lock);
  2544. }
  2545. }
  2546. diff -Nur linux-3.18.10.orig/arch/x86/platform/uv/uv_time.c linux-3.18.10/arch/x86/platform/uv/uv_time.c
  2547. --- linux-3.18.10.orig/arch/x86/platform/uv/uv_time.c 2015-03-24 02:05:12.000000000 +0100
  2548. +++ linux-3.18.10/arch/x86/platform/uv/uv_time.c 2015-03-26 12:42:13.563582336 +0100
  2549. @@ -58,7 +58,7 @@
  2550. /* There is one of these allocated per node */
  2551. struct uv_rtc_timer_head {
  2552. - spinlock_t lock;
  2553. + raw_spinlock_t lock;
  2554. /* next cpu waiting for timer, local node relative: */
  2555. int next_cpu;
  2556. /* number of cpus on this node: */
  2557. @@ -178,7 +178,7 @@
  2558. uv_rtc_deallocate_timers();
  2559. return -ENOMEM;
  2560. }
  2561. - spin_lock_init(&head->lock);
  2562. + raw_spin_lock_init(&head->lock);
  2563. head->ncpus = uv_blade_nr_possible_cpus(bid);
  2564. head->next_cpu = -1;
  2565. blade_info[bid] = head;
  2566. @@ -232,7 +232,7 @@
  2567. unsigned long flags;
  2568. int next_cpu;
  2569. - spin_lock_irqsave(&head->lock, flags);
  2570. + raw_spin_lock_irqsave(&head->lock, flags);
  2571. next_cpu = head->next_cpu;
  2572. *t = expires;
  2573. @@ -244,12 +244,12 @@
  2574. if (uv_setup_intr(cpu, expires)) {
  2575. *t = ULLONG_MAX;
  2576. uv_rtc_find_next_timer(head, pnode);
  2577. - spin_unlock_irqrestore(&head->lock, flags);
  2578. + raw_spin_unlock_irqrestore(&head->lock, flags);
  2579. return -ETIME;
  2580. }
  2581. }
  2582. - spin_unlock_irqrestore(&head->lock, flags);
  2583. + raw_spin_unlock_irqrestore(&head->lock, flags);
  2584. return 0;
  2585. }
  2586. @@ -268,7 +268,7 @@
  2587. unsigned long flags;
  2588. int rc = 0;
  2589. - spin_lock_irqsave(&head->lock, flags);
  2590. + raw_spin_lock_irqsave(&head->lock, flags);
  2591. if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
  2592. rc = 1;
  2593. @@ -280,7 +280,7 @@
  2594. uv_rtc_find_next_timer(head, pnode);
  2595. }
  2596. - spin_unlock_irqrestore(&head->lock, flags);
  2597. + raw_spin_unlock_irqrestore(&head->lock, flags);
  2598. return rc;
  2599. }
  2600. @@ -300,13 +300,18 @@
  2601. static cycle_t uv_read_rtc(struct clocksource *cs)
  2602. {
  2603. unsigned long offset;
  2604. + cycle_t cycles;
  2605. + preempt_disable();
  2606. if (uv_get_min_hub_revision_id() == 1)
  2607. offset = 0;
  2608. else
  2609. offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
  2610. - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
  2611. + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
  2612. + preempt_enable();
  2613. +
  2614. + return cycles;
  2615. }
  2616. /*
  2617. diff -Nur linux-3.18.10.orig/arch/xtensa/mm/fault.c linux-3.18.10/arch/xtensa/mm/fault.c
  2618. --- linux-3.18.10.orig/arch/xtensa/mm/fault.c 2015-03-24 02:05:12.000000000 +0100
  2619. +++ linux-3.18.10/arch/xtensa/mm/fault.c 2015-03-26 12:42:13.563582336 +0100
  2620. @@ -57,7 +57,7 @@
  2621. /* If we're in an interrupt or have no user
  2622. * context, we must not take the fault..
  2623. */
  2624. - if (in_atomic() || !mm) {
  2625. + if (!mm || pagefault_disabled()) {
  2626. bad_page_fault(regs, address, SIGSEGV);
  2627. return;
  2628. }
  2629. diff -Nur linux-3.18.10.orig/block/blk-core.c linux-3.18.10/block/blk-core.c
  2630. --- linux-3.18.10.orig/block/blk-core.c 2015-03-24 02:05:12.000000000 +0100
  2631. +++ linux-3.18.10/block/blk-core.c 2015-03-26 12:42:13.563582336 +0100
  2632. @@ -100,6 +100,9 @@
  2633. INIT_LIST_HEAD(&rq->queuelist);
  2634. INIT_LIST_HEAD(&rq->timeout_list);
  2635. +#if CONFIG_PREEMPT_RT_FULL
  2636. + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
  2637. +#endif
  2638. rq->cpu = -1;
  2639. rq->q = q;
  2640. rq->__sector = (sector_t) -1;
  2641. @@ -194,7 +197,7 @@
  2642. **/
  2643. void blk_start_queue(struct request_queue *q)
  2644. {
  2645. - WARN_ON(!irqs_disabled());
  2646. + WARN_ON_NONRT(!irqs_disabled());
  2647. queue_flag_clear(QUEUE_FLAG_STOPPED, q);
  2648. __blk_run_queue(q);
  2649. @@ -627,7 +630,7 @@
  2650. q->bypass_depth = 1;
  2651. __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
  2652. - init_waitqueue_head(&q->mq_freeze_wq);
  2653. + init_swait_head(&q->mq_freeze_wq);
  2654. if (blkcg_init_queue(q))
  2655. goto fail_bdi;
  2656. @@ -3037,7 +3040,7 @@
  2657. blk_run_queue_async(q);
  2658. else
  2659. __blk_run_queue(q);
  2660. - spin_unlock(q->queue_lock);
  2661. + spin_unlock_irq(q->queue_lock);
  2662. }
  2663. static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
  2664. @@ -3085,7 +3088,6 @@
  2665. void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  2666. {
  2667. struct request_queue *q;
  2668. - unsigned long flags;
  2669. struct request *rq;
  2670. LIST_HEAD(list);
  2671. unsigned int depth;
  2672. @@ -3105,11 +3107,6 @@
  2673. q = NULL;
  2674. depth = 0;
  2675. - /*
  2676. - * Save and disable interrupts here, to avoid doing it for every
  2677. - * queue lock we have to take.
  2678. - */
  2679. - local_irq_save(flags);
  2680. while (!list_empty(&list)) {
  2681. rq = list_entry_rq(list.next);
  2682. list_del_init(&rq->queuelist);
  2683. @@ -3122,7 +3119,7 @@
  2684. queue_unplugged(q, depth, from_schedule);
  2685. q = rq->q;
  2686. depth = 0;
  2687. - spin_lock(q->queue_lock);
  2688. + spin_lock_irq(q->queue_lock);
  2689. }
  2690. /*
  2691. @@ -3149,8 +3146,6 @@
  2692. */
  2693. if (q)
  2694. queue_unplugged(q, depth, from_schedule);
  2695. -
  2696. - local_irq_restore(flags);
  2697. }
  2698. void blk_finish_plug(struct blk_plug *plug)
  2699. diff -Nur linux-3.18.10.orig/block/blk-ioc.c linux-3.18.10/block/blk-ioc.c
  2700. --- linux-3.18.10.orig/block/blk-ioc.c 2015-03-24 02:05:12.000000000 +0100
  2701. +++ linux-3.18.10/block/blk-ioc.c 2015-03-26 12:42:13.563582336 +0100
  2702. @@ -7,6 +7,7 @@
  2703. #include <linux/bio.h>
  2704. #include <linux/blkdev.h>
  2705. #include <linux/slab.h>
  2706. +#include <linux/delay.h>
  2707. #include "blk.h"
  2708. @@ -109,7 +110,7 @@
  2709. spin_unlock(q->queue_lock);
  2710. } else {
  2711. spin_unlock_irqrestore(&ioc->lock, flags);
  2712. - cpu_relax();
  2713. + cpu_chill();
  2714. spin_lock_irqsave_nested(&ioc->lock, flags, 1);
  2715. }
  2716. }
  2717. @@ -187,7 +188,7 @@
  2718. spin_unlock(icq->q->queue_lock);
  2719. } else {
  2720. spin_unlock_irqrestore(&ioc->lock, flags);
  2721. - cpu_relax();
  2722. + cpu_chill();
  2723. goto retry;
  2724. }
  2725. }
  2726. diff -Nur linux-3.18.10.orig/block/blk-iopoll.c linux-3.18.10/block/blk-iopoll.c
  2727. --- linux-3.18.10.orig/block/blk-iopoll.c 2015-03-24 02:05:12.000000000 +0100
  2728. +++ linux-3.18.10/block/blk-iopoll.c 2015-03-26 12:42:13.563582336 +0100
  2729. @@ -35,6 +35,7 @@
  2730. list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
  2731. __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
  2732. local_irq_restore(flags);
  2733. + preempt_check_resched_rt();
  2734. }
  2735. EXPORT_SYMBOL(blk_iopoll_sched);
  2736. @@ -132,6 +133,7 @@
  2737. __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
  2738. local_irq_enable();
  2739. + preempt_check_resched_rt();
  2740. }
  2741. /**
  2742. @@ -201,6 +203,7 @@
  2743. this_cpu_ptr(&blk_cpu_iopoll));
  2744. __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
  2745. local_irq_enable();
  2746. + preempt_check_resched_rt();
  2747. }
  2748. return NOTIFY_OK;
  2749. diff -Nur linux-3.18.10.orig/block/blk-mq.c linux-3.18.10/block/blk-mq.c
  2750. --- linux-3.18.10.orig/block/blk-mq.c 2015-03-24 02:05:12.000000000 +0100
  2751. +++ linux-3.18.10/block/blk-mq.c 2015-03-26 12:42:13.563582336 +0100
  2752. @@ -85,7 +85,7 @@
  2753. if (percpu_ref_tryget_live(&q->mq_usage_counter))
  2754. return 0;
  2755. - ret = wait_event_interruptible(q->mq_freeze_wq,
  2756. + ret = swait_event_interruptible(q->mq_freeze_wq,
  2757. !q->mq_freeze_depth || blk_queue_dying(q));
  2758. if (blk_queue_dying(q))
  2759. return -ENODEV;
  2760. @@ -104,7 +104,7 @@
  2761. struct request_queue *q =
  2762. container_of(ref, struct request_queue, mq_usage_counter);
  2763. - wake_up_all(&q->mq_freeze_wq);
  2764. + swait_wake_all(&q->mq_freeze_wq);
  2765. }
  2766. static void blk_mq_freeze_queue_start(struct request_queue *q)
  2767. @@ -123,7 +123,7 @@
  2768. static void blk_mq_freeze_queue_wait(struct request_queue *q)
  2769. {
  2770. - wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
  2771. + swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
  2772. }
  2773. /*
  2774. @@ -146,7 +146,7 @@
  2775. spin_unlock_irq(q->queue_lock);
  2776. if (wake) {
  2777. percpu_ref_reinit(&q->mq_usage_counter);
  2778. - wake_up_all(&q->mq_freeze_wq);
  2779. + swait_wake_all(&q->mq_freeze_wq);
  2780. }
  2781. }
  2782. @@ -194,6 +194,9 @@
  2783. rq->resid_len = 0;
  2784. rq->sense = NULL;
  2785. +#if CONFIG_PREEMPT_RT_FULL
  2786. + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
  2787. +#endif
  2788. INIT_LIST_HEAD(&rq->timeout_list);
  2789. rq->timeout = 0;
  2790. @@ -313,6 +316,17 @@
  2791. }
  2792. EXPORT_SYMBOL(blk_mq_end_request);
  2793. +#ifdef CONFIG_PREEMPT_RT_FULL
  2794. +
  2795. +void __blk_mq_complete_request_remote_work(struct work_struct *work)
  2796. +{
  2797. + struct request *rq = container_of(work, struct request, work);
  2798. +
  2799. + rq->q->softirq_done_fn(rq);
  2800. +}
  2801. +
  2802. +#else
  2803. +
  2804. static void __blk_mq_complete_request_remote(void *data)
  2805. {
  2806. struct request *rq = data;
  2807. @@ -320,6 +334,8 @@
  2808. rq->q->softirq_done_fn(rq);
  2809. }
  2810. +#endif
  2811. +
  2812. static void blk_mq_ipi_complete_request(struct request *rq)
  2813. {
  2814. struct blk_mq_ctx *ctx = rq->mq_ctx;
  2815. @@ -331,19 +347,23 @@
  2816. return;
  2817. }
  2818. - cpu = get_cpu();
  2819. + cpu = get_cpu_light();
  2820. if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
  2821. shared = cpus_share_cache(cpu, ctx->cpu);
  2822. if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
  2823. +#if CONFIG_PREEMPT_RT_FULL
  2824. + schedule_work_on(ctx->cpu, &rq->work);
  2825. +#else
  2826. rq->csd.func = __blk_mq_complete_request_remote;
  2827. rq->csd.info = rq;
  2828. rq->csd.flags = 0;
  2829. smp_call_function_single_async(ctx->cpu, &rq->csd);
  2830. +#endif
  2831. } else {
  2832. rq->q->softirq_done_fn(rq);
  2833. }
  2834. - put_cpu();
  2835. + put_cpu_light();
  2836. }
  2837. void __blk_mq_complete_request(struct request *rq)
  2838. @@ -814,9 +834,9 @@
  2839. test_bit(BLK_MQ_S_STOPPED, &hctx->state))
  2840. continue;
  2841. - preempt_disable();
  2842. + migrate_disable();
  2843. blk_mq_run_hw_queue(hctx, async);
  2844. - preempt_enable();
  2845. + migrate_enable();
  2846. }
  2847. }
  2848. EXPORT_SYMBOL(blk_mq_run_queues);
  2849. @@ -843,9 +863,9 @@
  2850. {
  2851. clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
  2852. - preempt_disable();
  2853. + migrate_disable();
  2854. blk_mq_run_hw_queue(hctx, false);
  2855. - preempt_enable();
  2856. + migrate_enable();
  2857. }
  2858. EXPORT_SYMBOL(blk_mq_start_hw_queue);
  2859. @@ -870,9 +890,9 @@
  2860. continue;
  2861. clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
  2862. - preempt_disable();
  2863. + migrate_disable();
  2864. blk_mq_run_hw_queue(hctx, async);
  2865. - preempt_enable();
  2866. + migrate_enable();
  2867. }
  2868. }
  2869. EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
  2870. @@ -1494,7 +1514,7 @@
  2871. {
  2872. struct blk_mq_hw_ctx *hctx = data;
  2873. - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
  2874. + if (action == CPU_POST_DEAD)
  2875. return blk_mq_hctx_cpu_offline(hctx, cpu);
  2876. else if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
  2877. return blk_mq_hctx_cpu_online(hctx, cpu);
  2878. diff -Nur linux-3.18.10.orig/block/blk-mq-cpu.c linux-3.18.10/block/blk-mq-cpu.c
  2879. --- linux-3.18.10.orig/block/blk-mq-cpu.c 2015-03-24 02:05:12.000000000 +0100
  2880. +++ linux-3.18.10/block/blk-mq-cpu.c 2015-03-26 12:42:13.563582336 +0100
  2881. @@ -16,7 +16,7 @@
  2882. #include "blk-mq.h"
  2883. static LIST_HEAD(blk_mq_cpu_notify_list);
  2884. -static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
  2885. +static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
  2886. static int blk_mq_main_cpu_notify(struct notifier_block *self,
  2887. unsigned long action, void *hcpu)
  2888. @@ -25,7 +25,10 @@
  2889. struct blk_mq_cpu_notifier *notify;
  2890. int ret = NOTIFY_OK;
  2891. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  2892. + if (action != CPU_POST_DEAD)
  2893. + return NOTIFY_OK;
  2894. +
  2895. + spin_lock(&blk_mq_cpu_notify_lock);
  2896. list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) {
  2897. ret = notify->notify(notify->data, action, cpu);
  2898. @@ -33,7 +36,7 @@
  2899. break;
  2900. }
  2901. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  2902. + spin_unlock(&blk_mq_cpu_notify_lock);
  2903. return ret;
  2904. }
  2905. @@ -41,16 +44,16 @@
  2906. {
  2907. BUG_ON(!notifier->notify);
  2908. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  2909. + spin_lock(&blk_mq_cpu_notify_lock);
  2910. list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
  2911. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  2912. + spin_unlock(&blk_mq_cpu_notify_lock);
  2913. }
  2914. void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
  2915. {
  2916. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  2917. + spin_lock(&blk_mq_cpu_notify_lock);
  2918. list_del(&notifier->list);
  2919. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  2920. + spin_unlock(&blk_mq_cpu_notify_lock);
  2921. }
  2922. void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
  2923. diff -Nur linux-3.18.10.orig/block/blk-mq.h linux-3.18.10/block/blk-mq.h
  2924. --- linux-3.18.10.orig/block/blk-mq.h 2015-03-24 02:05:12.000000000 +0100
  2925. +++ linux-3.18.10/block/blk-mq.h 2015-03-26 12:42:13.563582336 +0100
  2926. @@ -73,7 +73,10 @@
  2927. static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
  2928. unsigned int cpu)
  2929. {
  2930. - return per_cpu_ptr(q->queue_ctx, cpu);
  2931. + struct blk_mq_ctx *ctx;
  2932. +
  2933. + ctx = per_cpu_ptr(q->queue_ctx, cpu);
  2934. + return ctx;
  2935. }
  2936. /*
  2937. @@ -84,12 +87,12 @@
  2938. */
  2939. static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
  2940. {
  2941. - return __blk_mq_get_ctx(q, get_cpu());
  2942. + return __blk_mq_get_ctx(q, get_cpu_light());
  2943. }
  2944. static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
  2945. {
  2946. - put_cpu();
  2947. + put_cpu_light();
  2948. }
  2949. struct blk_mq_alloc_data {
  2950. diff -Nur linux-3.18.10.orig/block/blk-softirq.c linux-3.18.10/block/blk-softirq.c
  2951. --- linux-3.18.10.orig/block/blk-softirq.c 2015-03-24 02:05:12.000000000 +0100
  2952. +++ linux-3.18.10/block/blk-softirq.c 2015-03-26 12:42:13.563582336 +0100
  2953. @@ -51,6 +51,7 @@
  2954. raise_softirq_irqoff(BLOCK_SOFTIRQ);
  2955. local_irq_restore(flags);
  2956. + preempt_check_resched_rt();
  2957. }
  2958. /*
  2959. @@ -93,6 +94,7 @@
  2960. this_cpu_ptr(&blk_cpu_done));
  2961. raise_softirq_irqoff(BLOCK_SOFTIRQ);
  2962. local_irq_enable();
  2963. + preempt_check_resched_rt();
  2964. }
  2965. return NOTIFY_OK;
  2966. @@ -150,6 +152,7 @@
  2967. goto do_local;
  2968. local_irq_restore(flags);
  2969. + preempt_check_resched_rt();
  2970. }
  2971. /**
  2972. diff -Nur linux-3.18.10.orig/block/bounce.c linux-3.18.10/block/bounce.c
  2973. --- linux-3.18.10.orig/block/bounce.c 2015-03-24 02:05:12.000000000 +0100
  2974. +++ linux-3.18.10/block/bounce.c 2015-03-26 12:42:13.567582340 +0100
  2975. @@ -54,11 +54,11 @@
  2976. unsigned long flags;
  2977. unsigned char *vto;
  2978. - local_irq_save(flags);
  2979. + local_irq_save_nort(flags);
  2980. vto = kmap_atomic(to->bv_page);
  2981. memcpy(vto + to->bv_offset, vfrom, to->bv_len);
  2982. kunmap_atomic(vto);
  2983. - local_irq_restore(flags);
  2984. + local_irq_restore_nort(flags);
  2985. }
  2986. #else /* CONFIG_HIGHMEM */
  2987. diff -Nur linux-3.18.10.orig/crypto/algapi.c linux-3.18.10/crypto/algapi.c
  2988. --- linux-3.18.10.orig/crypto/algapi.c 2015-03-24 02:05:12.000000000 +0100
  2989. +++ linux-3.18.10/crypto/algapi.c 2015-03-26 12:42:13.567582340 +0100
  2990. @@ -698,13 +698,13 @@
  2991. int crypto_register_notifier(struct notifier_block *nb)
  2992. {
  2993. - return blocking_notifier_chain_register(&crypto_chain, nb);
  2994. + return srcu_notifier_chain_register(&crypto_chain, nb);
  2995. }
  2996. EXPORT_SYMBOL_GPL(crypto_register_notifier);
  2997. int crypto_unregister_notifier(struct notifier_block *nb)
  2998. {
  2999. - return blocking_notifier_chain_unregister(&crypto_chain, nb);
  3000. + return srcu_notifier_chain_unregister(&crypto_chain, nb);
  3001. }
  3002. EXPORT_SYMBOL_GPL(crypto_unregister_notifier);
  3003. diff -Nur linux-3.18.10.orig/crypto/api.c linux-3.18.10/crypto/api.c
  3004. --- linux-3.18.10.orig/crypto/api.c 2015-03-24 02:05:12.000000000 +0100
  3005. +++ linux-3.18.10/crypto/api.c 2015-03-26 12:42:13.567582340 +0100
  3006. @@ -31,7 +31,7 @@
  3007. DECLARE_RWSEM(crypto_alg_sem);
  3008. EXPORT_SYMBOL_GPL(crypto_alg_sem);
  3009. -BLOCKING_NOTIFIER_HEAD(crypto_chain);
  3010. +SRCU_NOTIFIER_HEAD(crypto_chain);
  3011. EXPORT_SYMBOL_GPL(crypto_chain);
  3012. static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg);
  3013. @@ -236,10 +236,10 @@
  3014. {
  3015. int ok;
  3016. - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
  3017. + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
  3018. if (ok == NOTIFY_DONE) {
  3019. request_module("cryptomgr");
  3020. - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
  3021. + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
  3022. }
  3023. return ok;
  3024. diff -Nur linux-3.18.10.orig/crypto/internal.h linux-3.18.10/crypto/internal.h
  3025. --- linux-3.18.10.orig/crypto/internal.h 2015-03-24 02:05:12.000000000 +0100
  3026. +++ linux-3.18.10/crypto/internal.h 2015-03-26 12:42:13.567582340 +0100
  3027. @@ -48,7 +48,7 @@
  3028. extern struct list_head crypto_alg_list;
  3029. extern struct rw_semaphore crypto_alg_sem;
  3030. -extern struct blocking_notifier_head crypto_chain;
  3031. +extern struct srcu_notifier_head crypto_chain;
  3032. #ifdef CONFIG_PROC_FS
  3033. void __init crypto_init_proc(void);
  3034. @@ -142,7 +142,7 @@
  3035. static inline void crypto_notify(unsigned long val, void *v)
  3036. {
  3037. - blocking_notifier_call_chain(&crypto_chain, val, v);
  3038. + srcu_notifier_call_chain(&crypto_chain, val, v);
  3039. }
  3040. #endif /* _CRYPTO_INTERNAL_H */
  3041. diff -Nur linux-3.18.10.orig/Documentation/hwlat_detector.txt linux-3.18.10/Documentation/hwlat_detector.txt
  3042. --- linux-3.18.10.orig/Documentation/hwlat_detector.txt 1970-01-01 01:00:00.000000000 +0100
  3043. +++ linux-3.18.10/Documentation/hwlat_detector.txt 2015-03-26 12:42:13.555582327 +0100
  3044. @@ -0,0 +1,64 @@
  3045. +Introduction:
  3046. +-------------
  3047. +
  3048. +The module hwlat_detector is a special purpose kernel module that is used to
  3049. +detect large system latencies induced by the behavior of certain underlying
  3050. +hardware or firmware, independent of Linux itself. The code was developed
  3051. +originally to detect SMIs (System Management Interrupts) on x86 systems,
  3052. +however there is nothing x86 specific about this patchset. It was
  3053. +originally written for use by the "RT" patch since the Real Time
  3054. +kernel is highly latency sensitive.
  3055. +
  3056. +SMIs are usually not serviced by the Linux kernel, which typically does not
  3057. +even know that they are occuring. SMIs are instead are set up by BIOS code
  3058. +and are serviced by BIOS code, usually for "critical" events such as
  3059. +management of thermal sensors and fans. Sometimes though, SMIs are used for
  3060. +other tasks and those tasks can spend an inordinate amount of time in the
  3061. +handler (sometimes measured in milliseconds). Obviously this is a problem if
  3062. +you are trying to keep event service latencies down in the microsecond range.
  3063. +
  3064. +The hardware latency detector works by hogging all of the cpus for configurable
  3065. +amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter
  3066. +for some period, then looking for gaps in the TSC data. Any gap indicates a
  3067. +time when the polling was interrupted and since the machine is stopped and
  3068. +interrupts turned off the only thing that could do that would be an SMI.
  3069. +
  3070. +Note that the SMI detector should *NEVER* be used in a production environment.
  3071. +It is intended to be run manually to determine if the hardware platform has a
  3072. +problem with long system firmware service routines.
  3073. +
  3074. +Usage:
  3075. +------
  3076. +
  3077. +Loading the module hwlat_detector passing the parameter "enabled=1" (or by
  3078. +setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only
  3079. +step required to start the hwlat_detector. It is possible to redefine the
  3080. +threshold in microseconds (us) above which latency spikes will be taken
  3081. +into account (parameter "threshold=").
  3082. +
  3083. +Example:
  3084. +
  3085. + # modprobe hwlat_detector enabled=1 threshold=100
  3086. +
  3087. +After the module is loaded, it creates a directory named "hwlat_detector" under
  3088. +the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary
  3089. +to have debugfs mounted, which might be on /sys/debug on your system.
  3090. +
  3091. +The /debug/hwlat_detector interface contains the following files:
  3092. +
  3093. +count - number of latency spikes observed since last reset
  3094. +enable - a global enable/disable toggle (0/1), resets count
  3095. +max - maximum hardware latency actually observed (usecs)
  3096. +sample - a pipe from which to read current raw sample data
  3097. + in the format <timestamp> <latency observed usecs>
  3098. + (can be opened O_NONBLOCK for a single sample)
  3099. +threshold - minimum latency value to be considered (usecs)
  3100. +width - time period to sample with CPUs held (usecs)
  3101. + must be less than the total window size (enforced)
  3102. +window - total period of sampling, width being inside (usecs)
  3103. +
  3104. +By default we will set width to 500,000 and window to 1,000,000, meaning that
  3105. +we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we
  3106. +observe any latencies that exceed the threshold (initially 100 usecs),
  3107. +then we write to a global sample ring buffer of 8K samples, which is
  3108. +consumed by reading from the "sample" (pipe) debugfs file interface.
  3109. diff -Nur linux-3.18.10.orig/Documentation/sysrq.txt linux-3.18.10/Documentation/sysrq.txt
  3110. --- linux-3.18.10.orig/Documentation/sysrq.txt 2015-03-24 02:05:12.000000000 +0100
  3111. +++ linux-3.18.10/Documentation/sysrq.txt 2015-03-26 12:42:13.555582327 +0100
  3112. @@ -59,10 +59,17 @@
  3113. On other - If you know of the key combos for other architectures, please
  3114. let me know so I can add them to this section.
  3115. -On all - write a character to /proc/sysrq-trigger. e.g.:
  3116. -
  3117. +On all - write a character to /proc/sysrq-trigger, e.g.:
  3118. echo t > /proc/sysrq-trigger
  3119. +On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g.
  3120. + echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq
  3121. + Send an ICMP echo request with this pattern plus the particular
  3122. + SysRq command key. Example:
  3123. + # ping -c1 -s57 -p0102030468
  3124. + will trigger the SysRq-H (help) command.
  3125. +
  3126. +
  3127. * What are the 'command' keys?
  3128. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  3129. 'b' - Will immediately reboot the system without syncing or unmounting
  3130. diff -Nur linux-3.18.10.orig/Documentation/trace/histograms.txt linux-3.18.10/Documentation/trace/histograms.txt
  3131. --- linux-3.18.10.orig/Documentation/trace/histograms.txt 1970-01-01 01:00:00.000000000 +0100
  3132. +++ linux-3.18.10/Documentation/trace/histograms.txt 2015-03-26 12:42:13.555582327 +0100
  3133. @@ -0,0 +1,186 @@
  3134. + Using the Linux Kernel Latency Histograms
  3135. +
  3136. +
  3137. +This document gives a short explanation how to enable, configure and use
  3138. +latency histograms. Latency histograms are primarily relevant in the
  3139. +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT)
  3140. +and are used in the quality management of the Linux real-time
  3141. +capabilities.
  3142. +
  3143. +
  3144. +* Purpose of latency histograms
  3145. +
  3146. +A latency histogram continuously accumulates the frequencies of latency
  3147. +data. There are two types of histograms
  3148. +- potential sources of latencies
  3149. +- effective latencies
  3150. +
  3151. +
  3152. +* Potential sources of latencies
  3153. +
  3154. +Potential sources of latencies are code segments where interrupts,
  3155. +preemption or both are disabled (aka critical sections). To create
  3156. +histograms of potential sources of latency, the kernel stores the time
  3157. +stamp at the start of a critical section, determines the time elapsed
  3158. +when the end of the section is reached, and increments the frequency
  3159. +counter of that latency value - irrespective of whether any concurrently
  3160. +running process is affected by latency or not.
  3161. +- Configuration items (in the Kernel hacking/Tracers submenu)
  3162. + CONFIG_INTERRUPT_OFF_LATENCY
  3163. + CONFIG_PREEMPT_OFF_LATENCY
  3164. +
  3165. +
  3166. +* Effective latencies
  3167. +
  3168. +Effective latencies are actually occuring during wakeup of a process. To
  3169. +determine effective latencies, the kernel stores the time stamp when a
  3170. +process is scheduled to be woken up, and determines the duration of the
  3171. +wakeup time shortly before control is passed over to this process. Note
  3172. +that the apparent latency in user space may be somewhat longer, since the
  3173. +process may be interrupted after control is passed over to it but before
  3174. +the execution in user space takes place. Simply measuring the interval
  3175. +between enqueuing and wakeup may also not appropriate in cases when a
  3176. +process is scheduled as a result of a timer expiration. The timer may have
  3177. +missed its deadline, e.g. due to disabled interrupts, but this latency
  3178. +would not be registered. Therefore, the offsets of missed timers are
  3179. +recorded in a separate histogram. If both wakeup latency and missed timer
  3180. +offsets are configured and enabled, a third histogram may be enabled that
  3181. +records the overall latency as a sum of the timer latency, if any, and the
  3182. +wakeup latency. This histogram is called "timerandwakeup".
  3183. +- Configuration items (in the Kernel hacking/Tracers submenu)
  3184. + CONFIG_WAKEUP_LATENCY
  3185. + CONFIG_MISSED_TIMER_OFSETS
  3186. +
  3187. +
  3188. +* Usage
  3189. +
  3190. +The interface to the administration of the latency histograms is located
  3191. +in the debugfs file system. To mount it, either enter
  3192. +
  3193. +mount -t sysfs nodev /sys
  3194. +mount -t debugfs nodev /sys/kernel/debug
  3195. +
  3196. +from shell command line level, or add
  3197. +
  3198. +nodev /sys sysfs defaults 0 0
  3199. +nodev /sys/kernel/debug debugfs defaults 0 0
  3200. +
  3201. +to the file /etc/fstab. All latency histogram related files are then
  3202. +available in the directory /sys/kernel/debug/tracing/latency_hist. A
  3203. +particular histogram type is enabled by writing non-zero to the related
  3204. +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory.
  3205. +Select "preemptirqsoff" for the histograms of potential sources of
  3206. +latencies and "wakeup" for histograms of effective latencies etc. The
  3207. +histogram data - one per CPU - are available in the files
  3208. +
  3209. +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx
  3210. +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx
  3211. +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx
  3212. +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx
  3213. +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx
  3214. +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx
  3215. +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx
  3216. +
  3217. +The histograms are reset by writing non-zero to the file "reset" in a
  3218. +particular latency directory. To reset all latency data, use
  3219. +
  3220. +#!/bin/sh
  3221. +
  3222. +TRACINGDIR=/sys/kernel/debug/tracing
  3223. +HISTDIR=$TRACINGDIR/latency_hist
  3224. +
  3225. +if test -d $HISTDIR
  3226. +then
  3227. + cd $HISTDIR
  3228. + for i in `find . | grep /reset$`
  3229. + do
  3230. + echo 1 >$i
  3231. + done
  3232. +fi
  3233. +
  3234. +
  3235. +* Data format
  3236. +
  3237. +Latency data are stored with a resolution of one microsecond. The
  3238. +maximum latency is 10,240 microseconds. The data are only valid, if the
  3239. +overflow register is empty. Every output line contains the latency in
  3240. +microseconds in the first row and the number of samples in the second
  3241. +row. To display only lines with a positive latency count, use, for
  3242. +example,
  3243. +
  3244. +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0
  3245. +
  3246. +#Minimum latency: 0 microseconds.
  3247. +#Average latency: 0 microseconds.
  3248. +#Maximum latency: 25 microseconds.
  3249. +#Total samples: 3104770694
  3250. +#There are 0 samples greater or equal than 10240 microseconds
  3251. +#usecs samples
  3252. + 0 2984486876
  3253. + 1 49843506
  3254. + 2 58219047
  3255. + 3 5348126
  3256. + 4 2187960
  3257. + 5 3388262
  3258. + 6 959289
  3259. + 7 208294
  3260. + 8 40420
  3261. + 9 4485
  3262. + 10 14918
  3263. + 11 18340
  3264. + 12 25052
  3265. + 13 19455
  3266. + 14 5602
  3267. + 15 969
  3268. + 16 47
  3269. + 17 18
  3270. + 18 14
  3271. + 19 1
  3272. + 20 3
  3273. + 21 2
  3274. + 22 5
  3275. + 23 2
  3276. + 25 1
  3277. +
  3278. +
  3279. +* Wakeup latency of a selected process
  3280. +
  3281. +To only collect wakeup latency data of a particular process, write the
  3282. +PID of the requested process to
  3283. +
  3284. +/sys/kernel/debug/tracing/latency_hist/wakeup/pid
  3285. +
  3286. +PIDs are not considered, if this variable is set to 0.
  3287. +
  3288. +
  3289. +* Details of the process with the highest wakeup latency so far
  3290. +
  3291. +Selected data of the process that suffered from the highest wakeup
  3292. +latency that occurred in a particular CPU are available in the file
  3293. +
  3294. +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx.
  3295. +
  3296. +In addition, other relevant system data at the time when the
  3297. +latency occurred are given.
  3298. +
  3299. +The format of the data is (all in one line):
  3300. +<PID> <Priority> <Latency> (<Timeroffset>) <Command> \
  3301. +<- <PID> <Priority> <Command> <Timestamp>
  3302. +
  3303. +The value of <Timeroffset> is only relevant in the combined timer
  3304. +and wakeup latency recording. In the wakeup recording, it is
  3305. +always 0, in the missed_timer_offsets recording, it is the same
  3306. +as <Latency>.
  3307. +
  3308. +When retrospectively searching for the origin of a latency and
  3309. +tracing was not enabled, it may be helpful to know the name and
  3310. +some basic data of the task that (finally) was switching to the
  3311. +late real-tlme task. In addition to the victim's data, also the
  3312. +data of the possible culprit are therefore displayed after the
  3313. +"<-" symbol.
  3314. +
  3315. +Finally, the timestamp of the time when the latency occurred
  3316. +in <seconds>.<microseconds> after the most recent system boot
  3317. +is provided.
  3318. +
  3319. +These data are also reset when the wakeup histogram is reset.
  3320. diff -Nur linux-3.18.10.orig/drivers/acpi/acpica/acglobal.h linux-3.18.10/drivers/acpi/acpica/acglobal.h
  3321. --- linux-3.18.10.orig/drivers/acpi/acpica/acglobal.h 2015-03-24 02:05:12.000000000 +0100
  3322. +++ linux-3.18.10/drivers/acpi/acpica/acglobal.h 2015-03-26 12:42:13.567582340 +0100
  3323. @@ -112,7 +112,7 @@
  3324. * interrupt level
  3325. */
  3326. ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */
  3327. -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
  3328. +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
  3329. ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock);
  3330. /* Mutex for _OSI support */
  3331. diff -Nur linux-3.18.10.orig/drivers/acpi/acpica/hwregs.c linux-3.18.10/drivers/acpi/acpica/hwregs.c
  3332. --- linux-3.18.10.orig/drivers/acpi/acpica/hwregs.c 2015-03-24 02:05:12.000000000 +0100
  3333. +++ linux-3.18.10/drivers/acpi/acpica/hwregs.c 2015-03-26 12:42:13.567582340 +0100
  3334. @@ -269,14 +269,14 @@
  3335. ACPI_BITMASK_ALL_FIXED_STATUS,
  3336. ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address)));
  3337. - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
  3338. + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
  3339. /* Clear the fixed events in PM1 A/B */
  3340. status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
  3341. ACPI_BITMASK_ALL_FIXED_STATUS);
  3342. - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
  3343. + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
  3344. if (ACPI_FAILURE(status)) {
  3345. goto exit;
  3346. diff -Nur linux-3.18.10.orig/drivers/acpi/acpica/hwxface.c linux-3.18.10/drivers/acpi/acpica/hwxface.c
  3347. --- linux-3.18.10.orig/drivers/acpi/acpica/hwxface.c 2015-03-24 02:05:12.000000000 +0100
  3348. +++ linux-3.18.10/drivers/acpi/acpica/hwxface.c 2015-03-26 12:42:13.567582340 +0100
  3349. @@ -374,7 +374,7 @@
  3350. return_ACPI_STATUS(AE_BAD_PARAMETER);
  3351. }
  3352. - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
  3353. + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
  3354. /*
  3355. * At this point, we know that the parent register is one of the
  3356. @@ -435,7 +435,7 @@
  3357. unlock_and_exit:
  3358. - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
  3359. + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
  3360. return_ACPI_STATUS(status);
  3361. }
  3362. diff -Nur linux-3.18.10.orig/drivers/acpi/acpica/utmutex.c linux-3.18.10/drivers/acpi/acpica/utmutex.c
  3363. --- linux-3.18.10.orig/drivers/acpi/acpica/utmutex.c 2015-03-24 02:05:12.000000000 +0100
  3364. +++ linux-3.18.10/drivers/acpi/acpica/utmutex.c 2015-03-26 12:42:13.567582340 +0100
  3365. @@ -88,7 +88,7 @@
  3366. return_ACPI_STATUS (status);
  3367. }
  3368. - status = acpi_os_create_lock (&acpi_gbl_hardware_lock);
  3369. + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock);
  3370. if (ACPI_FAILURE (status)) {
  3371. return_ACPI_STATUS (status);
  3372. }
  3373. @@ -141,7 +141,7 @@
  3374. /* Delete the spinlocks */
  3375. acpi_os_delete_lock(acpi_gbl_gpe_lock);
  3376. - acpi_os_delete_lock(acpi_gbl_hardware_lock);
  3377. + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock);
  3378. acpi_os_delete_lock(acpi_gbl_reference_count_lock);
  3379. /* Delete the reader/writer lock */
  3380. diff -Nur linux-3.18.10.orig/drivers/ata/libata-sff.c linux-3.18.10/drivers/ata/libata-sff.c
  3381. --- linux-3.18.10.orig/drivers/ata/libata-sff.c 2015-03-24 02:05:12.000000000 +0100
  3382. +++ linux-3.18.10/drivers/ata/libata-sff.c 2015-03-26 12:42:13.567582340 +0100
  3383. @@ -678,9 +678,9 @@
  3384. unsigned long flags;
  3385. unsigned int consumed;
  3386. - local_irq_save(flags);
  3387. + local_irq_save_nort(flags);
  3388. consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
  3389. - local_irq_restore(flags);
  3390. + local_irq_restore_nort(flags);
  3391. return consumed;
  3392. }
  3393. @@ -719,7 +719,7 @@
  3394. unsigned long flags;
  3395. /* FIXME: use a bounce buffer */
  3396. - local_irq_save(flags);
  3397. + local_irq_save_nort(flags);
  3398. buf = kmap_atomic(page);
  3399. /* do the actual data transfer */
  3400. @@ -727,7 +727,7 @@
  3401. do_write);
  3402. kunmap_atomic(buf);
  3403. - local_irq_restore(flags);
  3404. + local_irq_restore_nort(flags);
  3405. } else {
  3406. buf = page_address(page);
  3407. ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
  3408. @@ -864,7 +864,7 @@
  3409. unsigned long flags;
  3410. /* FIXME: use bounce buffer */
  3411. - local_irq_save(flags);
  3412. + local_irq_save_nort(flags);
  3413. buf = kmap_atomic(page);
  3414. /* do the actual data transfer */
  3415. @@ -872,7 +872,7 @@
  3416. count, rw);
  3417. kunmap_atomic(buf);
  3418. - local_irq_restore(flags);
  3419. + local_irq_restore_nort(flags);
  3420. } else {
  3421. buf = page_address(page);
  3422. consumed = ap->ops->sff_data_xfer(dev, buf + offset,
  3423. diff -Nur linux-3.18.10.orig/drivers/char/random.c linux-3.18.10/drivers/char/random.c
  3424. --- linux-3.18.10.orig/drivers/char/random.c 2015-03-24 02:05:12.000000000 +0100
  3425. +++ linux-3.18.10/drivers/char/random.c 2015-03-26 12:42:13.567582340 +0100
  3426. @@ -776,8 +776,6 @@
  3427. } sample;
  3428. long delta, delta2, delta3;
  3429. - preempt_disable();
  3430. -
  3431. sample.jiffies = jiffies;
  3432. sample.cycles = random_get_entropy();
  3433. sample.num = num;
  3434. @@ -818,7 +816,6 @@
  3435. */
  3436. credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
  3437. }
  3438. - preempt_enable();
  3439. }
  3440. void add_input_randomness(unsigned int type, unsigned int code,
  3441. @@ -871,28 +868,27 @@
  3442. return *(ptr + f->reg_idx++);
  3443. }
  3444. -void add_interrupt_randomness(int irq, int irq_flags)
  3445. +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip)
  3446. {
  3447. struct entropy_store *r;
  3448. struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
  3449. - struct pt_regs *regs = get_irq_regs();
  3450. unsigned long now = jiffies;
  3451. cycles_t cycles = random_get_entropy();
  3452. __u32 c_high, j_high;
  3453. - __u64 ip;
  3454. unsigned long seed;
  3455. int credit = 0;
  3456. if (cycles == 0)
  3457. - cycles = get_reg(fast_pool, regs);
  3458. + cycles = get_reg(fast_pool, NULL);
  3459. c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
  3460. j_high = (sizeof(now) > 4) ? now >> 32 : 0;
  3461. fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
  3462. fast_pool->pool[1] ^= now ^ c_high;
  3463. - ip = regs ? instruction_pointer(regs) : _RET_IP_;
  3464. + if (!ip)
  3465. + ip = _RET_IP_;
  3466. fast_pool->pool[2] ^= ip;
  3467. fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 :
  3468. - get_reg(fast_pool, regs);
  3469. + get_reg(fast_pool, NULL);
  3470. fast_mix(fast_pool);
  3471. add_interrupt_bench(cycles);
  3472. diff -Nur linux-3.18.10.orig/drivers/clocksource/tcb_clksrc.c linux-3.18.10/drivers/clocksource/tcb_clksrc.c
  3473. --- linux-3.18.10.orig/drivers/clocksource/tcb_clksrc.c 2015-03-24 02:05:12.000000000 +0100
  3474. +++ linux-3.18.10/drivers/clocksource/tcb_clksrc.c 2015-03-26 12:42:13.567582340 +0100
  3475. @@ -23,8 +23,7 @@
  3476. * this 32 bit free-running counter. the second channel is not used.
  3477. *
  3478. * - The third channel may be used to provide a 16-bit clockevent
  3479. - * source, used in either periodic or oneshot mode. This runs
  3480. - * at 32 KiHZ, and can handle delays of up to two seconds.
  3481. + * source, used in either periodic or oneshot mode.
  3482. *
  3483. * A boot clocksource and clockevent source are also currently needed,
  3484. * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
  3485. @@ -74,6 +73,7 @@
  3486. struct tc_clkevt_device {
  3487. struct clock_event_device clkevt;
  3488. struct clk *clk;
  3489. + u32 freq;
  3490. void __iomem *regs;
  3491. };
  3492. @@ -82,13 +82,6 @@
  3493. return container_of(clkevt, struct tc_clkevt_device, clkevt);
  3494. }
  3495. -/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
  3496. - * because using one of the divided clocks would usually mean the
  3497. - * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
  3498. - *
  3499. - * A divided clock could be good for high resolution timers, since
  3500. - * 30.5 usec resolution can seem "low".
  3501. - */
  3502. static u32 timer_clock;
  3503. static void tc_mode(enum clock_event_mode m, struct clock_event_device *d)
  3504. @@ -111,11 +104,12 @@
  3505. case CLOCK_EVT_MODE_PERIODIC:
  3506. clk_enable(tcd->clk);
  3507. - /* slow clock, count up to RC, then irq and restart */
  3508. + /* count up to RC, then irq and restart */
  3509. __raw_writel(timer_clock
  3510. | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
  3511. regs + ATMEL_TC_REG(2, CMR));
  3512. - __raw_writel((32768 + HZ/2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
  3513. + __raw_writel((tcd->freq + HZ / 2) / HZ,
  3514. + tcaddr + ATMEL_TC_REG(2, RC));
  3515. /* Enable clock and interrupts on RC compare */
  3516. __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
  3517. @@ -128,7 +122,7 @@
  3518. case CLOCK_EVT_MODE_ONESHOT:
  3519. clk_enable(tcd->clk);
  3520. - /* slow clock, count up to RC, then irq and stop */
  3521. + /* count up to RC, then irq and stop */
  3522. __raw_writel(timer_clock | ATMEL_TC_CPCSTOP
  3523. | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
  3524. regs + ATMEL_TC_REG(2, CMR));
  3525. @@ -157,8 +151,12 @@
  3526. .name = "tc_clkevt",
  3527. .features = CLOCK_EVT_FEAT_PERIODIC
  3528. | CLOCK_EVT_FEAT_ONESHOT,
  3529. +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  3530. /* Should be lower than at91rm9200's system timer */
  3531. .rating = 125,
  3532. +#else
  3533. + .rating = 200,
  3534. +#endif
  3535. .set_next_event = tc_next_event,
  3536. .set_mode = tc_mode,
  3537. },
  3538. @@ -178,8 +176,9 @@
  3539. return IRQ_NONE;
  3540. }
  3541. -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
  3542. +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
  3543. {
  3544. + unsigned divisor = atmel_tc_divisors[divisor_idx];
  3545. int ret;
  3546. struct clk *t2_clk = tc->clk[2];
  3547. int irq = tc->irq[2];
  3548. @@ -193,7 +192,11 @@
  3549. clkevt.regs = tc->regs;
  3550. clkevt.clk = t2_clk;
  3551. - timer_clock = clk32k_divisor_idx;
  3552. + timer_clock = divisor_idx;
  3553. + if (!divisor)
  3554. + clkevt.freq = 32768;
  3555. + else
  3556. + clkevt.freq = clk_get_rate(t2_clk) / divisor;
  3557. clkevt.clkevt.cpumask = cpumask_of(0);
  3558. @@ -203,7 +206,7 @@
  3559. return ret;
  3560. }
  3561. - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
  3562. + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff);
  3563. return ret;
  3564. }
  3565. @@ -340,7 +343,11 @@
  3566. goto err_disable_t1;
  3567. /* channel 2: periodic and oneshot timer support */
  3568. +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  3569. ret = setup_clkevents(tc, clk32k_divisor_idx);
  3570. +#else
  3571. + ret = setup_clkevents(tc, best_divisor_idx);
  3572. +#endif
  3573. if (ret)
  3574. goto err_unregister_clksrc;
  3575. diff -Nur linux-3.18.10.orig/drivers/clocksource/timer-atmel-pit.c linux-3.18.10/drivers/clocksource/timer-atmel-pit.c
  3576. --- linux-3.18.10.orig/drivers/clocksource/timer-atmel-pit.c 2015-03-24 02:05:12.000000000 +0100
  3577. +++ linux-3.18.10/drivers/clocksource/timer-atmel-pit.c 2015-03-26 12:42:13.567582340 +0100
  3578. @@ -90,6 +90,7 @@
  3579. return elapsed;
  3580. }
  3581. +static struct irqaction at91sam926x_pit_irq;
  3582. /*
  3583. * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16)
  3584. */
  3585. @@ -100,6 +101,8 @@
  3586. switch (mode) {
  3587. case CLOCK_EVT_MODE_PERIODIC:
  3588. + /* Set up irq handler */
  3589. + setup_irq(at91sam926x_pit_irq.irq, &at91sam926x_pit_irq);
  3590. /* update clocksource counter */
  3591. data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR));
  3592. pit_write(data->base, AT91_PIT_MR,
  3593. @@ -113,6 +116,7 @@
  3594. /* disable irq, leaving the clocksource active */
  3595. pit_write(data->base, AT91_PIT_MR,
  3596. (data->cycle - 1) | AT91_PIT_PITEN);
  3597. + remove_irq(at91sam926x_pit_irq.irq, &at91sam926x_pit_irq);
  3598. break;
  3599. case CLOCK_EVT_MODE_RESUME:
  3600. break;
  3601. diff -Nur linux-3.18.10.orig/drivers/gpio/gpio-omap.c linux-3.18.10/drivers/gpio/gpio-omap.c
  3602. --- linux-3.18.10.orig/drivers/gpio/gpio-omap.c 2015-03-24 02:05:12.000000000 +0100
  3603. +++ linux-3.18.10/drivers/gpio/gpio-omap.c 2015-03-26 12:42:13.567582340 +0100
  3604. @@ -57,7 +57,7 @@
  3605. u32 saved_datain;
  3606. u32 level_mask;
  3607. u32 toggle_mask;
  3608. - spinlock_t lock;
  3609. + raw_spinlock_t lock;
  3610. struct gpio_chip chip;
  3611. struct clk *dbck;
  3612. u32 mod_usage;
  3613. @@ -503,19 +503,19 @@
  3614. (type & (IRQ_TYPE_LEVEL_LOW|IRQ_TYPE_LEVEL_HIGH)))
  3615. return -EINVAL;
  3616. - spin_lock_irqsave(&bank->lock, flags);
  3617. + raw_spin_lock_irqsave(&bank->lock, flags);
  3618. offset = GPIO_INDEX(bank, gpio);
  3619. retval = omap_set_gpio_triggering(bank, offset, type);
  3620. if (!LINE_USED(bank->mod_usage, offset)) {
  3621. omap_enable_gpio_module(bank, offset);
  3622. omap_set_gpio_direction(bank, offset, 1);
  3623. } else if (!omap_gpio_is_input(bank, BIT(offset))) {
  3624. - spin_unlock_irqrestore(&bank->lock, flags);
  3625. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3626. return -EINVAL;
  3627. }
  3628. bank->irq_usage |= BIT(GPIO_INDEX(bank, gpio));
  3629. - spin_unlock_irqrestore(&bank->lock, flags);
  3630. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3631. if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
  3632. __irq_set_handler_locked(d->irq, handle_level_irq);
  3633. @@ -633,14 +633,14 @@
  3634. return -EINVAL;
  3635. }
  3636. - spin_lock_irqsave(&bank->lock, flags);
  3637. + raw_spin_lock_irqsave(&bank->lock, flags);
  3638. if (enable)
  3639. bank->context.wake_en |= gpio_bit;
  3640. else
  3641. bank->context.wake_en &= ~gpio_bit;
  3642. writel_relaxed(bank->context.wake_en, bank->base + bank->regs->wkup_en);
  3643. - spin_unlock_irqrestore(&bank->lock, flags);
  3644. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3645. return 0;
  3646. }
  3647. @@ -675,7 +675,7 @@
  3648. if (!BANK_USED(bank))
  3649. pm_runtime_get_sync(bank->dev);
  3650. - spin_lock_irqsave(&bank->lock, flags);
  3651. + raw_spin_lock_irqsave(&bank->lock, flags);
  3652. /* Set trigger to none. You need to enable the desired trigger with
  3653. * request_irq() or set_irq_type(). Only do this if the IRQ line has
  3654. * not already been requested.
  3655. @@ -685,7 +685,7 @@
  3656. omap_enable_gpio_module(bank, offset);
  3657. }
  3658. bank->mod_usage |= BIT(offset);
  3659. - spin_unlock_irqrestore(&bank->lock, flags);
  3660. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3661. return 0;
  3662. }
  3663. @@ -695,11 +695,11 @@
  3664. struct gpio_bank *bank = container_of(chip, struct gpio_bank, chip);
  3665. unsigned long flags;
  3666. - spin_lock_irqsave(&bank->lock, flags);
  3667. + raw_spin_lock_irqsave(&bank->lock, flags);
  3668. bank->mod_usage &= ~(BIT(offset));
  3669. omap_disable_gpio_module(bank, offset);
  3670. omap_reset_gpio(bank, bank->chip.base + offset);
  3671. - spin_unlock_irqrestore(&bank->lock, flags);
  3672. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3673. /*
  3674. * If this is the last gpio to be freed in the bank,
  3675. @@ -799,12 +799,12 @@
  3676. unsigned long flags;
  3677. unsigned offset = GPIO_INDEX(bank, gpio);
  3678. - spin_lock_irqsave(&bank->lock, flags);
  3679. + raw_spin_lock_irqsave(&bank->lock, flags);
  3680. gpio_unlock_as_irq(&bank->chip, offset);
  3681. bank->irq_usage &= ~(BIT(offset));
  3682. omap_disable_gpio_module(bank, offset);
  3683. omap_reset_gpio(bank, gpio);
  3684. - spin_unlock_irqrestore(&bank->lock, flags);
  3685. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3686. /*
  3687. * If this is the last IRQ to be freed in the bank,
  3688. @@ -828,10 +828,10 @@
  3689. unsigned int gpio = omap_irq_to_gpio(bank, d->hwirq);
  3690. unsigned long flags;
  3691. - spin_lock_irqsave(&bank->lock, flags);
  3692. + raw_spin_lock_irqsave(&bank->lock, flags);
  3693. omap_set_gpio_irqenable(bank, gpio, 0);
  3694. omap_set_gpio_triggering(bank, GPIO_INDEX(bank, gpio), IRQ_TYPE_NONE);
  3695. - spin_unlock_irqrestore(&bank->lock, flags);
  3696. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3697. }
  3698. static void omap_gpio_unmask_irq(struct irq_data *d)
  3699. @@ -842,7 +842,7 @@
  3700. u32 trigger = irqd_get_trigger_type(d);
  3701. unsigned long flags;
  3702. - spin_lock_irqsave(&bank->lock, flags);
  3703. + raw_spin_lock_irqsave(&bank->lock, flags);
  3704. if (trigger)
  3705. omap_set_gpio_triggering(bank, GPIO_INDEX(bank, gpio), trigger);
  3706. @@ -854,7 +854,7 @@
  3707. }
  3708. omap_set_gpio_irqenable(bank, gpio, 1);
  3709. - spin_unlock_irqrestore(&bank->lock, flags);
  3710. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3711. }
  3712. /*---------------------------------------------------------------------*/
  3713. @@ -867,9 +867,9 @@
  3714. OMAP_MPUIO_GPIO_MASKIT / bank->stride;
  3715. unsigned long flags;
  3716. - spin_lock_irqsave(&bank->lock, flags);
  3717. + raw_spin_lock_irqsave(&bank->lock, flags);
  3718. writel_relaxed(0xffff & ~bank->context.wake_en, mask_reg);
  3719. - spin_unlock_irqrestore(&bank->lock, flags);
  3720. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3721. return 0;
  3722. }
  3723. @@ -882,9 +882,9 @@
  3724. OMAP_MPUIO_GPIO_MASKIT / bank->stride;
  3725. unsigned long flags;
  3726. - spin_lock_irqsave(&bank->lock, flags);
  3727. + raw_spin_lock_irqsave(&bank->lock, flags);
  3728. writel_relaxed(bank->context.wake_en, mask_reg);
  3729. - spin_unlock_irqrestore(&bank->lock, flags);
  3730. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3731. return 0;
  3732. }
  3733. @@ -930,9 +930,9 @@
  3734. bank = container_of(chip, struct gpio_bank, chip);
  3735. reg = bank->base + bank->regs->direction;
  3736. - spin_lock_irqsave(&bank->lock, flags);
  3737. + raw_spin_lock_irqsave(&bank->lock, flags);
  3738. dir = !!(readl_relaxed(reg) & BIT(offset));
  3739. - spin_unlock_irqrestore(&bank->lock, flags);
  3740. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3741. return dir;
  3742. }
  3743. @@ -942,9 +942,9 @@
  3744. unsigned long flags;
  3745. bank = container_of(chip, struct gpio_bank, chip);
  3746. - spin_lock_irqsave(&bank->lock, flags);
  3747. + raw_spin_lock_irqsave(&bank->lock, flags);
  3748. omap_set_gpio_direction(bank, offset, 1);
  3749. - spin_unlock_irqrestore(&bank->lock, flags);
  3750. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3751. return 0;
  3752. }
  3753. @@ -968,10 +968,10 @@
  3754. unsigned long flags;
  3755. bank = container_of(chip, struct gpio_bank, chip);
  3756. - spin_lock_irqsave(&bank->lock, flags);
  3757. + raw_spin_lock_irqsave(&bank->lock, flags);
  3758. bank->set_dataout(bank, offset, value);
  3759. omap_set_gpio_direction(bank, offset, 0);
  3760. - spin_unlock_irqrestore(&bank->lock, flags);
  3761. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3762. return 0;
  3763. }
  3764. @@ -983,9 +983,9 @@
  3765. bank = container_of(chip, struct gpio_bank, chip);
  3766. - spin_lock_irqsave(&bank->lock, flags);
  3767. + raw_spin_lock_irqsave(&bank->lock, flags);
  3768. omap2_set_gpio_debounce(bank, offset, debounce);
  3769. - spin_unlock_irqrestore(&bank->lock, flags);
  3770. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3771. return 0;
  3772. }
  3773. @@ -996,9 +996,9 @@
  3774. unsigned long flags;
  3775. bank = container_of(chip, struct gpio_bank, chip);
  3776. - spin_lock_irqsave(&bank->lock, flags);
  3777. + raw_spin_lock_irqsave(&bank->lock, flags);
  3778. bank->set_dataout(bank, offset, value);
  3779. - spin_unlock_irqrestore(&bank->lock, flags);
  3780. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3781. }
  3782. /*---------------------------------------------------------------------*/
  3783. @@ -1223,7 +1223,7 @@
  3784. else
  3785. bank->set_dataout = omap_set_gpio_dataout_mask;
  3786. - spin_lock_init(&bank->lock);
  3787. + raw_spin_lock_init(&bank->lock);
  3788. /* Static mapping, never released */
  3789. res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
  3790. @@ -1270,7 +1270,7 @@
  3791. unsigned long flags;
  3792. u32 wake_low, wake_hi;
  3793. - spin_lock_irqsave(&bank->lock, flags);
  3794. + raw_spin_lock_irqsave(&bank->lock, flags);
  3795. /*
  3796. * Only edges can generate a wakeup event to the PRCM.
  3797. @@ -1323,7 +1323,7 @@
  3798. bank->get_context_loss_count(bank->dev);
  3799. omap_gpio_dbck_disable(bank);
  3800. - spin_unlock_irqrestore(&bank->lock, flags);
  3801. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3802. return 0;
  3803. }
  3804. @@ -1338,7 +1338,7 @@
  3805. unsigned long flags;
  3806. int c;
  3807. - spin_lock_irqsave(&bank->lock, flags);
  3808. + raw_spin_lock_irqsave(&bank->lock, flags);
  3809. /*
  3810. * On the first resume during the probe, the context has not
  3811. @@ -1374,14 +1374,14 @@
  3812. if (c != bank->context_loss_count) {
  3813. omap_gpio_restore_context(bank);
  3814. } else {
  3815. - spin_unlock_irqrestore(&bank->lock, flags);
  3816. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3817. return 0;
  3818. }
  3819. }
  3820. }
  3821. if (!bank->workaround_enabled) {
  3822. - spin_unlock_irqrestore(&bank->lock, flags);
  3823. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3824. return 0;
  3825. }
  3826. @@ -1436,7 +1436,7 @@
  3827. }
  3828. bank->workaround_enabled = false;
  3829. - spin_unlock_irqrestore(&bank->lock, flags);
  3830. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  3831. return 0;
  3832. }
  3833. diff -Nur linux-3.18.10.orig/drivers/gpu/drm/i915/i915_gem.c linux-3.18.10/drivers/gpu/drm/i915/i915_gem.c
  3834. --- linux-3.18.10.orig/drivers/gpu/drm/i915/i915_gem.c 2015-03-24 02:05:12.000000000 +0100
  3835. +++ linux-3.18.10/drivers/gpu/drm/i915/i915_gem.c 2015-03-26 12:42:13.567582340 +0100
  3836. @@ -5144,7 +5144,7 @@
  3837. if (!mutex_is_locked(mutex))
  3838. return false;
  3839. -#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES)
  3840. +#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) && !defined(CONFIG_PREEMPT_RT_BASE)
  3841. return mutex->owner == task;
  3842. #else
  3843. /* Since UP may be pre-empted, we cannot assume that we own the lock */
  3844. diff -Nur linux-3.18.10.orig/drivers/gpu/drm/i915/i915_gem_execbuffer.c linux-3.18.10/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  3845. --- linux-3.18.10.orig/drivers/gpu/drm/i915/i915_gem_execbuffer.c 2015-03-24 02:05:12.000000000 +0100
  3846. +++ linux-3.18.10/drivers/gpu/drm/i915/i915_gem_execbuffer.c 2015-03-26 12:42:13.567582340 +0100
  3847. @@ -1170,7 +1170,9 @@
  3848. return ret;
  3849. }
  3850. +#ifndef CONFIG_PREEMPT_RT_BASE
  3851. trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
  3852. +#endif
  3853. i915_gem_execbuffer_move_to_active(vmas, ring);
  3854. i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
  3855. diff -Nur linux-3.18.10.orig/drivers/gpu/drm/radeon/evergreen.c linux-3.18.10/drivers/gpu/drm/radeon/evergreen.c
  3856. --- linux-3.18.10.orig/drivers/gpu/drm/radeon/evergreen.c 2015-03-24 02:05:12.000000000 +0100
  3857. +++ linux-3.18.10/drivers/gpu/drm/radeon/evergreen.c 2015-03-26 12:42:16.827586167 +0100
  3858. @@ -4589,6 +4589,9 @@
  3859. WREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, afmt5);
  3860. WREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, afmt6);
  3861. + /* posting read */
  3862. + RREG32(SRBM_STATUS);
  3863. +
  3864. return 0;
  3865. }
  3866. diff -Nur linux-3.18.10.orig/drivers/gpu/drm/radeon/r600.c linux-3.18.10/drivers/gpu/drm/radeon/r600.c
  3867. --- linux-3.18.10.orig/drivers/gpu/drm/radeon/r600.c 2015-03-24 02:05:12.000000000 +0100
  3868. +++ linux-3.18.10/drivers/gpu/drm/radeon/r600.c 2015-03-26 12:42:18.651588307 +0100
  3869. @@ -3787,6 +3787,9 @@
  3870. WREG32(RV770_CG_THERMAL_INT, thermal_int);
  3871. }
  3872. + /* posting read */
  3873. + RREG32(R_000E50_SRBM_STATUS);
  3874. +
  3875. return 0;
  3876. }
  3877. diff -Nur linux-3.18.10.orig/drivers/gpu/drm/radeon/radeon_fence.c linux-3.18.10/drivers/gpu/drm/radeon/radeon_fence.c
  3878. --- linux-3.18.10.orig/drivers/gpu/drm/radeon/radeon_fence.c 2015-03-24 02:05:12.000000000 +0100
  3879. +++ linux-3.18.10/drivers/gpu/drm/radeon/radeon_fence.c 2015-03-26 12:42:18.651588307 +0100
  3880. @@ -1029,37 +1029,59 @@
  3881. return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
  3882. }
  3883. +struct radeon_wait_cb {
  3884. + struct fence_cb base;
  3885. + struct task_struct *task;
  3886. +};
  3887. +
  3888. +static void
  3889. +radeon_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
  3890. +{
  3891. + struct radeon_wait_cb *wait =
  3892. + container_of(cb, struct radeon_wait_cb, base);
  3893. +
  3894. + wake_up_process(wait->task);
  3895. +}
  3896. +
  3897. static signed long radeon_fence_default_wait(struct fence *f, bool intr,
  3898. signed long t)
  3899. {
  3900. struct radeon_fence *fence = to_radeon_fence(f);
  3901. struct radeon_device *rdev = fence->rdev;
  3902. - bool signaled;
  3903. + struct radeon_wait_cb cb;
  3904. +
  3905. + cb.task = current;
  3906. +
  3907. + if (fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
  3908. + return t;
  3909. - fence_enable_sw_signaling(&fence->base);
  3910. + while (t > 0) {
  3911. + if (intr)
  3912. + set_current_state(TASK_INTERRUPTIBLE);
  3913. + else
  3914. + set_current_state(TASK_UNINTERRUPTIBLE);
  3915. +
  3916. + /*
  3917. + * radeon_test_signaled must be called after
  3918. + * set_current_state to prevent a race with wake_up_process
  3919. + */
  3920. + if (radeon_test_signaled(fence))
  3921. + break;
  3922. +
  3923. + if (rdev->needs_reset) {
  3924. + t = -EDEADLK;
  3925. + break;
  3926. + }
  3927. +
  3928. + t = schedule_timeout(t);
  3929. +
  3930. + if (t > 0 && intr && signal_pending(current))
  3931. + t = -ERESTARTSYS;
  3932. + }
  3933. - /*
  3934. - * This function has to return -EDEADLK, but cannot hold
  3935. - * exclusive_lock during the wait because some callers
  3936. - * may already hold it. This means checking needs_reset without
  3937. - * lock, and not fiddling with any gpu internals.
  3938. - *
  3939. - * The callback installed with fence_enable_sw_signaling will
  3940. - * run before our wait_event_*timeout call, so we will see
  3941. - * both the signaled fence and the changes to needs_reset.
  3942. - */
  3943. -
  3944. - if (intr)
  3945. - t = wait_event_interruptible_timeout(rdev->fence_queue,
  3946. - ((signaled = radeon_test_signaled(fence)) ||
  3947. - rdev->needs_reset), t);
  3948. - else
  3949. - t = wait_event_timeout(rdev->fence_queue,
  3950. - ((signaled = radeon_test_signaled(fence)) ||
  3951. - rdev->needs_reset), t);
  3952. + __set_current_state(TASK_RUNNING);
  3953. + fence_remove_callback(f, &cb.base);
  3954. - if (t > 0 && !signaled)
  3955. - return -EDEADLK;
  3956. return t;
  3957. }
  3958. diff -Nur linux-3.18.10.orig/drivers/gpu/drm/radeon/rs600.c linux-3.18.10/drivers/gpu/drm/radeon/rs600.c
  3959. --- linux-3.18.10.orig/drivers/gpu/drm/radeon/rs600.c 2015-03-24 02:05:12.000000000 +0100
  3960. +++ linux-3.18.10/drivers/gpu/drm/radeon/rs600.c 2015-03-26 12:42:18.651588307 +0100
  3961. @@ -693,6 +693,10 @@
  3962. WREG32(R_007D18_DC_HOT_PLUG_DETECT2_INT_CONTROL, hpd2);
  3963. if (ASIC_IS_DCE2(rdev))
  3964. WREG32(R_007408_HDMI0_AUDIO_PACKET_CONTROL, hdmi0);
  3965. +
  3966. + /* posting read */
  3967. + RREG32(R_000040_GEN_INT_CNTL);
  3968. +
  3969. return 0;
  3970. }
  3971. diff -Nur linux-3.18.10.orig/drivers/gpu/drm/radeon/si.c linux-3.18.10/drivers/gpu/drm/radeon/si.c
  3972. --- linux-3.18.10.orig/drivers/gpu/drm/radeon/si.c 2015-03-24 02:05:12.000000000 +0100
  3973. +++ linux-3.18.10/drivers/gpu/drm/radeon/si.c 2015-03-26 12:42:18.655588312 +0100
  3974. @@ -6192,6 +6192,9 @@
  3975. WREG32(CG_THERMAL_INT, thermal_int);
  3976. + /* posting read */
  3977. + RREG32(SRBM_STATUS);
  3978. +
  3979. return 0;
  3980. }
  3981. diff -Nur linux-3.18.10.orig/drivers/i2c/busses/i2c-omap.c linux-3.18.10/drivers/i2c/busses/i2c-omap.c
  3982. --- linux-3.18.10.orig/drivers/i2c/busses/i2c-omap.c 2015-03-24 02:05:12.000000000 +0100
  3983. +++ linux-3.18.10/drivers/i2c/busses/i2c-omap.c 2015-03-26 12:42:18.655588312 +0100
  3984. @@ -875,15 +875,12 @@
  3985. u16 mask;
  3986. u16 stat;
  3987. - spin_lock(&dev->lock);
  3988. - mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
  3989. stat = omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG);
  3990. + mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
  3991. if (stat & mask)
  3992. ret = IRQ_WAKE_THREAD;
  3993. - spin_unlock(&dev->lock);
  3994. -
  3995. return ret;
  3996. }
  3997. diff -Nur linux-3.18.10.orig/drivers/ide/alim15x3.c linux-3.18.10/drivers/ide/alim15x3.c
  3998. --- linux-3.18.10.orig/drivers/ide/alim15x3.c 2015-03-24 02:05:12.000000000 +0100
  3999. +++ linux-3.18.10/drivers/ide/alim15x3.c 2015-03-26 12:42:18.655588312 +0100
  4000. @@ -234,7 +234,7 @@
  4001. isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
  4002. - local_irq_save(flags);
  4003. + local_irq_save_nort(flags);
  4004. if (m5229_revision < 0xC2) {
  4005. /*
  4006. @@ -325,7 +325,7 @@
  4007. }
  4008. pci_dev_put(north);
  4009. pci_dev_put(isa_dev);
  4010. - local_irq_restore(flags);
  4011. + local_irq_restore_nort(flags);
  4012. return 0;
  4013. }
  4014. diff -Nur linux-3.18.10.orig/drivers/ide/hpt366.c linux-3.18.10/drivers/ide/hpt366.c
  4015. --- linux-3.18.10.orig/drivers/ide/hpt366.c 2015-03-24 02:05:12.000000000 +0100
  4016. +++ linux-3.18.10/drivers/ide/hpt366.c 2015-03-26 12:42:18.655588312 +0100
  4017. @@ -1241,7 +1241,7 @@
  4018. dma_old = inb(base + 2);
  4019. - local_irq_save(flags);
  4020. + local_irq_save_nort(flags);
  4021. dma_new = dma_old;
  4022. pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
  4023. @@ -1252,7 +1252,7 @@
  4024. if (dma_new != dma_old)
  4025. outb(dma_new, base + 2);
  4026. - local_irq_restore(flags);
  4027. + local_irq_restore_nort(flags);
  4028. printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n",
  4029. hwif->name, base, base + 7);
  4030. diff -Nur linux-3.18.10.orig/drivers/ide/ide-io.c linux-3.18.10/drivers/ide/ide-io.c
  4031. --- linux-3.18.10.orig/drivers/ide/ide-io.c 2015-03-24 02:05:12.000000000 +0100
  4032. +++ linux-3.18.10/drivers/ide/ide-io.c 2015-03-26 12:42:18.655588312 +0100
  4033. @@ -659,7 +659,7 @@
  4034. /* disable_irq_nosync ?? */
  4035. disable_irq(hwif->irq);
  4036. /* local CPU only, as if we were handling an interrupt */
  4037. - local_irq_disable();
  4038. + local_irq_disable_nort();
  4039. if (hwif->polling) {
  4040. startstop = handler(drive);
  4041. } else if (drive_is_ready(drive)) {
  4042. diff -Nur linux-3.18.10.orig/drivers/ide/ide-iops.c linux-3.18.10/drivers/ide/ide-iops.c
  4043. --- linux-3.18.10.orig/drivers/ide/ide-iops.c 2015-03-24 02:05:12.000000000 +0100
  4044. +++ linux-3.18.10/drivers/ide/ide-iops.c 2015-03-26 12:42:18.655588312 +0100
  4045. @@ -129,12 +129,12 @@
  4046. if ((stat & ATA_BUSY) == 0)
  4047. break;
  4048. - local_irq_restore(flags);
  4049. + local_irq_restore_nort(flags);
  4050. *rstat = stat;
  4051. return -EBUSY;
  4052. }
  4053. }
  4054. - local_irq_restore(flags);
  4055. + local_irq_restore_nort(flags);
  4056. }
  4057. /*
  4058. * Allow status to settle, then read it again.
  4059. diff -Nur linux-3.18.10.orig/drivers/ide/ide-io-std.c linux-3.18.10/drivers/ide/ide-io-std.c
  4060. --- linux-3.18.10.orig/drivers/ide/ide-io-std.c 2015-03-24 02:05:12.000000000 +0100
  4061. +++ linux-3.18.10/drivers/ide/ide-io-std.c 2015-03-26 12:42:18.655588312 +0100
  4062. @@ -175,7 +175,7 @@
  4063. unsigned long uninitialized_var(flags);
  4064. if ((io_32bit & 2) && !mmio) {
  4065. - local_irq_save(flags);
  4066. + local_irq_save_nort(flags);
  4067. ata_vlb_sync(io_ports->nsect_addr);
  4068. }
  4069. @@ -186,7 +186,7 @@
  4070. insl(data_addr, buf, words);
  4071. if ((io_32bit & 2) && !mmio)
  4072. - local_irq_restore(flags);
  4073. + local_irq_restore_nort(flags);
  4074. if (((len + 1) & 3) < 2)
  4075. return;
  4076. @@ -219,7 +219,7 @@
  4077. unsigned long uninitialized_var(flags);
  4078. if ((io_32bit & 2) && !mmio) {
  4079. - local_irq_save(flags);
  4080. + local_irq_save_nort(flags);
  4081. ata_vlb_sync(io_ports->nsect_addr);
  4082. }
  4083. @@ -230,7 +230,7 @@
  4084. outsl(data_addr, buf, words);
  4085. if ((io_32bit & 2) && !mmio)
  4086. - local_irq_restore(flags);
  4087. + local_irq_restore_nort(flags);
  4088. if (((len + 1) & 3) < 2)
  4089. return;
  4090. diff -Nur linux-3.18.10.orig/drivers/ide/ide-probe.c linux-3.18.10/drivers/ide/ide-probe.c
  4091. --- linux-3.18.10.orig/drivers/ide/ide-probe.c 2015-03-24 02:05:12.000000000 +0100
  4092. +++ linux-3.18.10/drivers/ide/ide-probe.c 2015-03-26 12:42:18.655588312 +0100
  4093. @@ -196,10 +196,10 @@
  4094. int bswap = 1;
  4095. /* local CPU only; some systems need this */
  4096. - local_irq_save(flags);
  4097. + local_irq_save_nort(flags);
  4098. /* read 512 bytes of id info */
  4099. hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
  4100. - local_irq_restore(flags);
  4101. + local_irq_restore_nort(flags);
  4102. drive->dev_flags |= IDE_DFLAG_ID_READ;
  4103. #ifdef DEBUG
  4104. diff -Nur linux-3.18.10.orig/drivers/ide/ide-taskfile.c linux-3.18.10/drivers/ide/ide-taskfile.c
  4105. --- linux-3.18.10.orig/drivers/ide/ide-taskfile.c 2015-03-24 02:05:12.000000000 +0100
  4106. +++ linux-3.18.10/drivers/ide/ide-taskfile.c 2015-03-26 12:42:18.655588312 +0100
  4107. @@ -250,7 +250,7 @@
  4108. page_is_high = PageHighMem(page);
  4109. if (page_is_high)
  4110. - local_irq_save(flags);
  4111. + local_irq_save_nort(flags);
  4112. buf = kmap_atomic(page) + offset;
  4113. @@ -271,7 +271,7 @@
  4114. kunmap_atomic(buf);
  4115. if (page_is_high)
  4116. - local_irq_restore(flags);
  4117. + local_irq_restore_nort(flags);
  4118. len -= nr_bytes;
  4119. }
  4120. @@ -414,7 +414,7 @@
  4121. }
  4122. if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
  4123. - local_irq_disable();
  4124. + local_irq_disable_nort();
  4125. ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
  4126. diff -Nur linux-3.18.10.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c linux-3.18.10/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  4127. --- linux-3.18.10.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2015-03-24 02:05:12.000000000 +0100
  4128. +++ linux-3.18.10/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2015-03-26 12:42:18.655588312 +0100
  4129. @@ -796,7 +796,7 @@
  4130. ipoib_mcast_stop_thread(dev, 0);
  4131. - local_irq_save(flags);
  4132. + local_irq_save_nort(flags);
  4133. netif_addr_lock(dev);
  4134. spin_lock(&priv->lock);
  4135. @@ -878,7 +878,7 @@
  4136. spin_unlock(&priv->lock);
  4137. netif_addr_unlock(dev);
  4138. - local_irq_restore(flags);
  4139. + local_irq_restore_nort(flags);
  4140. /* We have to cancel outside of the spinlock */
  4141. list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
  4142. diff -Nur linux-3.18.10.orig/drivers/input/gameport/gameport.c linux-3.18.10/drivers/input/gameport/gameport.c
  4143. --- linux-3.18.10.orig/drivers/input/gameport/gameport.c 2015-03-24 02:05:12.000000000 +0100
  4144. +++ linux-3.18.10/drivers/input/gameport/gameport.c 2015-03-26 12:42:18.655588312 +0100
  4145. @@ -124,12 +124,12 @@
  4146. tx = 1 << 30;
  4147. for(i = 0; i < 50; i++) {
  4148. - local_irq_save(flags);
  4149. + local_irq_save_nort(flags);
  4150. GET_TIME(t1);
  4151. for (t = 0; t < 50; t++) gameport_read(gameport);
  4152. GET_TIME(t2);
  4153. GET_TIME(t3);
  4154. - local_irq_restore(flags);
  4155. + local_irq_restore_nort(flags);
  4156. udelay(i * 10);
  4157. if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t;
  4158. }
  4159. @@ -148,11 +148,11 @@
  4160. tx = 1 << 30;
  4161. for(i = 0; i < 50; i++) {
  4162. - local_irq_save(flags);
  4163. + local_irq_save_nort(flags);
  4164. rdtscl(t1);
  4165. for (t = 0; t < 50; t++) gameport_read(gameport);
  4166. rdtscl(t2);
  4167. - local_irq_restore(flags);
  4168. + local_irq_restore_nort(flags);
  4169. udelay(i * 10);
  4170. if (t2 - t1 < tx) tx = t2 - t1;
  4171. }
  4172. diff -Nur linux-3.18.10.orig/drivers/leds/trigger/Kconfig linux-3.18.10/drivers/leds/trigger/Kconfig
  4173. --- linux-3.18.10.orig/drivers/leds/trigger/Kconfig 2015-03-24 02:05:12.000000000 +0100
  4174. +++ linux-3.18.10/drivers/leds/trigger/Kconfig 2015-03-26 12:42:18.655588312 +0100
  4175. @@ -61,7 +61,7 @@
  4176. config LEDS_TRIGGER_CPU
  4177. bool "LED CPU Trigger"
  4178. - depends on LEDS_TRIGGERS
  4179. + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE
  4180. help
  4181. This allows LEDs to be controlled by active CPUs. This shows
  4182. the active CPUs across an array of LEDs so you can see which
  4183. diff -Nur linux-3.18.10.orig/drivers/md/bcache/Kconfig linux-3.18.10/drivers/md/bcache/Kconfig
  4184. --- linux-3.18.10.orig/drivers/md/bcache/Kconfig 2015-03-24 02:05:12.000000000 +0100
  4185. +++ linux-3.18.10/drivers/md/bcache/Kconfig 2015-03-26 12:42:18.655588312 +0100
  4186. @@ -1,6 +1,7 @@
  4187. config BCACHE
  4188. tristate "Block device as cache"
  4189. + depends on !PREEMPT_RT_FULL
  4190. ---help---
  4191. Allows a block device to be used as cache for other devices; uses
  4192. a btree for indexing and the layout is optimized for SSDs.
  4193. diff -Nur linux-3.18.10.orig/drivers/md/dm.c linux-3.18.10/drivers/md/dm.c
  4194. --- linux-3.18.10.orig/drivers/md/dm.c 2015-03-24 02:05:12.000000000 +0100
  4195. +++ linux-3.18.10/drivers/md/dm.c 2015-03-26 12:42:18.655588312 +0100
  4196. @@ -1898,14 +1898,14 @@
  4197. if (map_request(ti, clone, md))
  4198. goto requeued;
  4199. - BUG_ON(!irqs_disabled());
  4200. + BUG_ON_NONRT(!irqs_disabled());
  4201. spin_lock(q->queue_lock);
  4202. }
  4203. goto out;
  4204. requeued:
  4205. - BUG_ON(!irqs_disabled());
  4206. + BUG_ON_NONRT(!irqs_disabled());
  4207. spin_lock(q->queue_lock);
  4208. delay_and_out:
  4209. diff -Nur linux-3.18.10.orig/drivers/md/raid5.c linux-3.18.10/drivers/md/raid5.c
  4210. --- linux-3.18.10.orig/drivers/md/raid5.c 2015-03-24 02:05:12.000000000 +0100
  4211. +++ linux-3.18.10/drivers/md/raid5.c 2015-03-26 12:42:18.655588312 +0100
  4212. @@ -1649,8 +1649,9 @@
  4213. struct raid5_percpu *percpu;
  4214. unsigned long cpu;
  4215. - cpu = get_cpu();
  4216. + cpu = get_cpu_light();
  4217. percpu = per_cpu_ptr(conf->percpu, cpu);
  4218. + spin_lock(&percpu->lock);
  4219. if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
  4220. ops_run_biofill(sh);
  4221. overlap_clear++;
  4222. @@ -1702,7 +1703,8 @@
  4223. if (test_and_clear_bit(R5_Overlap, &dev->flags))
  4224. wake_up(&sh->raid_conf->wait_for_overlap);
  4225. }
  4226. - put_cpu();
  4227. + spin_unlock(&percpu->lock);
  4228. + put_cpu_light();
  4229. }
  4230. static int grow_one_stripe(struct r5conf *conf, int hash)
  4231. @@ -5708,6 +5710,7 @@
  4232. __func__, cpu);
  4233. break;
  4234. }
  4235. + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
  4236. }
  4237. put_online_cpus();
  4238. diff -Nur linux-3.18.10.orig/drivers/md/raid5.h linux-3.18.10/drivers/md/raid5.h
  4239. --- linux-3.18.10.orig/drivers/md/raid5.h 2015-03-24 02:05:12.000000000 +0100
  4240. +++ linux-3.18.10/drivers/md/raid5.h 2015-03-26 12:42:18.655588312 +0100
  4241. @@ -457,6 +457,7 @@
  4242. int recovery_disabled;
  4243. /* per cpu variables */
  4244. struct raid5_percpu {
  4245. + spinlock_t lock; /* Protection for -RT */
  4246. struct page *spare_page; /* Used when checking P/Q in raid6 */
  4247. void *scribble; /* space for constructing buffer
  4248. * lists and performing address
  4249. diff -Nur linux-3.18.10.orig/drivers/misc/hwlat_detector.c linux-3.18.10/drivers/misc/hwlat_detector.c
  4250. --- linux-3.18.10.orig/drivers/misc/hwlat_detector.c 1970-01-01 01:00:00.000000000 +0100
  4251. +++ linux-3.18.10/drivers/misc/hwlat_detector.c 2015-03-26 12:42:18.655588312 +0100
  4252. @@ -0,0 +1,1240 @@
  4253. +/*
  4254. + * hwlat_detector.c - A simple Hardware Latency detector.
  4255. + *
  4256. + * Use this module to detect large system latencies induced by the behavior of
  4257. + * certain underlying system hardware or firmware, independent of Linux itself.
  4258. + * The code was developed originally to detect the presence of SMIs on Intel
  4259. + * and AMD systems, although there is no dependency upon x86 herein.
  4260. + *
  4261. + * The classical example usage of this module is in detecting the presence of
  4262. + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
  4263. + * somewhat special form of hardware interrupt spawned from earlier CPU debug
  4264. + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
  4265. + * LPC (or other device) to generate a special interrupt under certain
  4266. + * circumstances, for example, upon expiration of a special SMI timer device,
  4267. + * due to certain external thermal readings, on certain I/O address accesses,
  4268. + * and other situations. An SMI hits a special CPU pin, triggers a special
  4269. + * SMI mode (complete with special memory map), and the OS is unaware.
  4270. + *
  4271. + * Although certain hardware-inducing latencies are necessary (for example,
  4272. + * a modern system often requires an SMI handler for correct thermal control
  4273. + * and remote management) they can wreak havoc upon any OS-level performance
  4274. + * guarantees toward low-latency, especially when the OS is not even made
  4275. + * aware of the presence of these interrupts. For this reason, we need a
  4276. + * somewhat brute force mechanism to detect these interrupts. In this case,
  4277. + * we do it by hogging all of the CPU(s) for configurable timer intervals,
  4278. + * sampling the built-in CPU timer, looking for discontiguous readings.
  4279. + *
  4280. + * WARNING: This implementation necessarily introduces latencies. Therefore,
  4281. + * you should NEVER use this module in a production environment
  4282. + * requiring any kind of low-latency performance guarantee(s).
  4283. + *
  4284. + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
  4285. + *
  4286. + * Includes useful feedback from Clark Williams <clark@redhat.com>
  4287. + *
  4288. + * This file is licensed under the terms of the GNU General Public
  4289. + * License version 2. This program is licensed "as is" without any
  4290. + * warranty of any kind, whether express or implied.
  4291. + */
  4292. +
  4293. +#include <linux/module.h>
  4294. +#include <linux/init.h>
  4295. +#include <linux/ring_buffer.h>
  4296. +#include <linux/time.h>
  4297. +#include <linux/hrtimer.h>
  4298. +#include <linux/kthread.h>
  4299. +#include <linux/debugfs.h>
  4300. +#include <linux/seq_file.h>
  4301. +#include <linux/uaccess.h>
  4302. +#include <linux/version.h>
  4303. +#include <linux/delay.h>
  4304. +#include <linux/slab.h>
  4305. +#include <linux/trace_clock.h>
  4306. +
  4307. +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */
  4308. +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */
  4309. +#define U64STR_SIZE 22 /* 20 digits max */
  4310. +
  4311. +#define VERSION "1.0.0"
  4312. +#define BANNER "hwlat_detector: "
  4313. +#define DRVNAME "hwlat_detector"
  4314. +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */
  4315. +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
  4316. +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */
  4317. +
  4318. +/* Module metadata */
  4319. +
  4320. +MODULE_LICENSE("GPL");
  4321. +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
  4322. +MODULE_DESCRIPTION("A simple hardware latency detector");
  4323. +MODULE_VERSION(VERSION);
  4324. +
  4325. +/* Module parameters */
  4326. +
  4327. +static int debug;
  4328. +static int enabled;
  4329. +static int threshold;
  4330. +
  4331. +module_param(debug, int, 0); /* enable debug */
  4332. +module_param(enabled, int, 0); /* enable detector */
  4333. +module_param(threshold, int, 0); /* latency threshold */
  4334. +
  4335. +/* Buffering and sampling */
  4336. +
  4337. +static struct ring_buffer *ring_buffer; /* sample buffer */
  4338. +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */
  4339. +static unsigned long buf_size = BUF_SIZE_DEFAULT;
  4340. +static struct task_struct *kthread; /* sampling thread */
  4341. +
  4342. +/* DebugFS filesystem entries */
  4343. +
  4344. +static struct dentry *debug_dir; /* debugfs directory */
  4345. +static struct dentry *debug_max; /* maximum TSC delta */
  4346. +static struct dentry *debug_count; /* total detect count */
  4347. +static struct dentry *debug_sample_width; /* sample width us */
  4348. +static struct dentry *debug_sample_window; /* sample window us */
  4349. +static struct dentry *debug_sample; /* raw samples us */
  4350. +static struct dentry *debug_threshold; /* threshold us */
  4351. +static struct dentry *debug_enable; /* enable/disable */
  4352. +
  4353. +/* Individual samples and global state */
  4354. +
  4355. +struct sample; /* latency sample */
  4356. +struct data; /* Global state */
  4357. +
  4358. +/* Sampling functions */
  4359. +static int __buffer_add_sample(struct sample *sample);
  4360. +static struct sample *buffer_get_sample(struct sample *sample);
  4361. +
  4362. +/* Threading and state */
  4363. +static int kthread_fn(void *unused);
  4364. +static int start_kthread(void);
  4365. +static int stop_kthread(void);
  4366. +static void __reset_stats(void);
  4367. +static int init_stats(void);
  4368. +
  4369. +/* Debugfs interface */
  4370. +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
  4371. + size_t cnt, loff_t *ppos, const u64 *entry);
  4372. +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
  4373. + size_t cnt, loff_t *ppos, u64 *entry);
  4374. +static int debug_sample_fopen(struct inode *inode, struct file *filp);
  4375. +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
  4376. + size_t cnt, loff_t *ppos);
  4377. +static int debug_sample_release(struct inode *inode, struct file *filp);
  4378. +static int debug_enable_fopen(struct inode *inode, struct file *filp);
  4379. +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
  4380. + size_t cnt, loff_t *ppos);
  4381. +static ssize_t debug_enable_fwrite(struct file *file,
  4382. + const char __user *user_buffer,
  4383. + size_t user_size, loff_t *offset);
  4384. +
  4385. +/* Initialization functions */
  4386. +static int init_debugfs(void);
  4387. +static void free_debugfs(void);
  4388. +static int detector_init(void);
  4389. +static void detector_exit(void);
  4390. +
  4391. +/* Individual latency samples are stored here when detected and packed into
  4392. + * the ring_buffer circular buffer, where they are overwritten when
  4393. + * more than buf_size/sizeof(sample) samples are received. */
  4394. +struct sample {
  4395. + u64 seqnum; /* unique sequence */
  4396. + u64 duration; /* ktime delta */
  4397. + u64 outer_duration; /* ktime delta (outer loop) */
  4398. + struct timespec timestamp; /* wall time */
  4399. + unsigned long lost;
  4400. +};
  4401. +
  4402. +/* keep the global state somewhere. */
  4403. +static struct data {
  4404. +
  4405. + struct mutex lock; /* protect changes */
  4406. +
  4407. + u64 count; /* total since reset */
  4408. + u64 max_sample; /* max hardware latency */
  4409. + u64 threshold; /* sample threshold level */
  4410. +
  4411. + u64 sample_window; /* total sampling window (on+off) */
  4412. + u64 sample_width; /* active sampling portion of window */
  4413. +
  4414. + atomic_t sample_open; /* whether the sample file is open */
  4415. +
  4416. + wait_queue_head_t wq; /* waitqeue for new sample values */
  4417. +
  4418. +} data;
  4419. +
  4420. +/**
  4421. + * __buffer_add_sample - add a new latency sample recording to the ring buffer
  4422. + * @sample: The new latency sample value
  4423. + *
  4424. + * This receives a new latency sample and records it in a global ring buffer.
  4425. + * No additional locking is used in this case.
  4426. + */
  4427. +static int __buffer_add_sample(struct sample *sample)
  4428. +{
  4429. + return ring_buffer_write(ring_buffer,
  4430. + sizeof(struct sample), sample);
  4431. +}
  4432. +
  4433. +/**
  4434. + * buffer_get_sample - remove a hardware latency sample from the ring buffer
  4435. + * @sample: Pre-allocated storage for the sample
  4436. + *
  4437. + * This retrieves a hardware latency sample from the global circular buffer
  4438. + */
  4439. +static struct sample *buffer_get_sample(struct sample *sample)
  4440. +{
  4441. + struct ring_buffer_event *e = NULL;
  4442. + struct sample *s = NULL;
  4443. + unsigned int cpu = 0;
  4444. +
  4445. + if (!sample)
  4446. + return NULL;
  4447. +
  4448. + mutex_lock(&ring_buffer_mutex);
  4449. + for_each_online_cpu(cpu) {
  4450. + e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost);
  4451. + if (e)
  4452. + break;
  4453. + }
  4454. +
  4455. + if (e) {
  4456. + s = ring_buffer_event_data(e);
  4457. + memcpy(sample, s, sizeof(struct sample));
  4458. + } else
  4459. + sample = NULL;
  4460. + mutex_unlock(&ring_buffer_mutex);
  4461. +
  4462. + return sample;
  4463. +}
  4464. +
  4465. +#ifndef CONFIG_TRACING
  4466. +#define time_type ktime_t
  4467. +#define time_get() ktime_get()
  4468. +#define time_to_us(x) ktime_to_us(x)
  4469. +#define time_sub(a, b) ktime_sub(a, b)
  4470. +#define init_time(a, b) (a).tv64 = b
  4471. +#define time_u64(a) ((a).tv64)
  4472. +#else
  4473. +#define time_type u64
  4474. +#define time_get() trace_clock_local()
  4475. +#define time_to_us(x) div_u64(x, 1000)
  4476. +#define time_sub(a, b) ((a) - (b))
  4477. +#define init_time(a, b) (a = b)
  4478. +#define time_u64(a) a
  4479. +#endif
  4480. +/**
  4481. + * get_sample - sample the CPU TSC and look for likely hardware latencies
  4482. + *
  4483. + * Used to repeatedly capture the CPU TSC (or similar), looking for potential
  4484. + * hardware-induced latency. Called with interrupts disabled and with
  4485. + * data.lock held.
  4486. + */
  4487. +static int get_sample(void)
  4488. +{
  4489. + time_type start, t1, t2, last_t2;
  4490. + s64 diff, total = 0;
  4491. + u64 sample = 0;
  4492. + u64 outer_sample = 0;
  4493. + int ret = -1;
  4494. +
  4495. + init_time(last_t2, 0);
  4496. + start = time_get(); /* start timestamp */
  4497. +
  4498. + do {
  4499. +
  4500. + t1 = time_get(); /* we'll look for a discontinuity */
  4501. + t2 = time_get();
  4502. +
  4503. + if (time_u64(last_t2)) {
  4504. + /* Check the delta from outer loop (t2 to next t1) */
  4505. + diff = time_to_us(time_sub(t1, last_t2));
  4506. + /* This shouldn't happen */
  4507. + if (diff < 0) {
  4508. + pr_err(BANNER "time running backwards\n");
  4509. + goto out;
  4510. + }
  4511. + if (diff > outer_sample)
  4512. + outer_sample = diff;
  4513. + }
  4514. + last_t2 = t2;
  4515. +
  4516. + total = time_to_us(time_sub(t2, start)); /* sample width */
  4517. +
  4518. + /* This checks the inner loop (t1 to t2) */
  4519. + diff = time_to_us(time_sub(t2, t1)); /* current diff */
  4520. +
  4521. + /* This shouldn't happen */
  4522. + if (diff < 0) {
  4523. + pr_err(BANNER "time running backwards\n");
  4524. + goto out;
  4525. + }
  4526. +
  4527. + if (diff > sample)
  4528. + sample = diff; /* only want highest value */
  4529. +
  4530. + } while (total <= data.sample_width);
  4531. +
  4532. + ret = 0;
  4533. +
  4534. + /* If we exceed the threshold value, we have found a hardware latency */
  4535. + if (sample > data.threshold || outer_sample > data.threshold) {
  4536. + struct sample s;
  4537. +
  4538. + ret = 1;
  4539. +
  4540. + data.count++;
  4541. + s.seqnum = data.count;
  4542. + s.duration = sample;
  4543. + s.outer_duration = outer_sample;
  4544. + s.timestamp = CURRENT_TIME;
  4545. + __buffer_add_sample(&s);
  4546. +
  4547. + /* Keep a running maximum ever recorded hardware latency */
  4548. + if (sample > data.max_sample)
  4549. + data.max_sample = sample;
  4550. + }
  4551. +
  4552. +out:
  4553. + return ret;
  4554. +}
  4555. +
  4556. +/*
  4557. + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
  4558. + * @unused: A required part of the kthread API.
  4559. + *
  4560. + * Used to periodically sample the CPU TSC via a call to get_sample. We
  4561. + * disable interrupts, which does (intentionally) introduce latency since we
  4562. + * need to ensure nothing else might be running (and thus pre-empting).
  4563. + * Obviously this should never be used in production environments.
  4564. + *
  4565. + * Currently this runs on which ever CPU it was scheduled on, but most
  4566. + * real-worald hardware latency situations occur across several CPUs,
  4567. + * but we might later generalize this if we find there are any actualy
  4568. + * systems with alternate SMI delivery or other hardware latencies.
  4569. + */
  4570. +static int kthread_fn(void *unused)
  4571. +{
  4572. + int ret;
  4573. + u64 interval;
  4574. +
  4575. + while (!kthread_should_stop()) {
  4576. +
  4577. + mutex_lock(&data.lock);
  4578. +
  4579. + local_irq_disable();
  4580. + ret = get_sample();
  4581. + local_irq_enable();
  4582. +
  4583. + if (ret > 0)
  4584. + wake_up(&data.wq); /* wake up reader(s) */
  4585. +
  4586. + interval = data.sample_window - data.sample_width;
  4587. + do_div(interval, USEC_PER_MSEC); /* modifies interval value */
  4588. +
  4589. + mutex_unlock(&data.lock);
  4590. +
  4591. + if (msleep_interruptible(interval))
  4592. + break;
  4593. + }
  4594. +
  4595. + return 0;
  4596. +}
  4597. +
  4598. +/**
  4599. + * start_kthread - Kick off the hardware latency sampling/detector kthread
  4600. + *
  4601. + * This starts a kernel thread that will sit and sample the CPU timestamp
  4602. + * counter (TSC or similar) and look for potential hardware latencies.
  4603. + */
  4604. +static int start_kthread(void)
  4605. +{
  4606. + kthread = kthread_run(kthread_fn, NULL,
  4607. + DRVNAME);
  4608. + if (IS_ERR(kthread)) {
  4609. + pr_err(BANNER "could not start sampling thread\n");
  4610. + enabled = 0;
  4611. + return -ENOMEM;
  4612. + }
  4613. +
  4614. + return 0;
  4615. +}
  4616. +
  4617. +/**
  4618. + * stop_kthread - Inform the hardware latency samping/detector kthread to stop
  4619. + *
  4620. + * This kicks the running hardware latency sampling/detector kernel thread and
  4621. + * tells it to stop sampling now. Use this on unload and at system shutdown.
  4622. + */
  4623. +static int stop_kthread(void)
  4624. +{
  4625. + int ret;
  4626. +
  4627. + ret = kthread_stop(kthread);
  4628. +
  4629. + return ret;
  4630. +}
  4631. +
  4632. +/**
  4633. + * __reset_stats - Reset statistics for the hardware latency detector
  4634. + *
  4635. + * We use data to store various statistics and global state. We call this
  4636. + * function in order to reset those when "enable" is toggled on or off, and
  4637. + * also at initialization. Should be called with data.lock held.
  4638. + */
  4639. +static void __reset_stats(void)
  4640. +{
  4641. + data.count = 0;
  4642. + data.max_sample = 0;
  4643. + ring_buffer_reset(ring_buffer); /* flush out old sample entries */
  4644. +}
  4645. +
  4646. +/**
  4647. + * init_stats - Setup global state statistics for the hardware latency detector
  4648. + *
  4649. + * We use data to store various statistics and global state. We also use
  4650. + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
  4651. + * induced system latencies. This function initializes these structures and
  4652. + * allocates the global ring buffer also.
  4653. + */
  4654. +static int init_stats(void)
  4655. +{
  4656. + int ret = -ENOMEM;
  4657. +
  4658. + mutex_init(&data.lock);
  4659. + init_waitqueue_head(&data.wq);
  4660. + atomic_set(&data.sample_open, 0);
  4661. +
  4662. + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
  4663. +
  4664. + if (WARN(!ring_buffer, KERN_ERR BANNER
  4665. + "failed to allocate ring buffer!\n"))
  4666. + goto out;
  4667. +
  4668. + __reset_stats();
  4669. + data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */
  4670. + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
  4671. + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */
  4672. +
  4673. + ret = 0;
  4674. +
  4675. +out:
  4676. + return ret;
  4677. +
  4678. +}
  4679. +
  4680. +/*
  4681. + * simple_data_read - Wrapper read function for global state debugfs entries
  4682. + * @filp: The active open file structure for the debugfs "file"
  4683. + * @ubuf: The userspace provided buffer to read value into
  4684. + * @cnt: The maximum number of bytes to read
  4685. + * @ppos: The current "file" position
  4686. + * @entry: The entry to read from
  4687. + *
  4688. + * This function provides a generic read implementation for the global state
  4689. + * "data" structure debugfs filesystem entries. It would be nice to use
  4690. + * simple_attr_read directly, but we need to make sure that the data.lock
  4691. + * is held during the actual read.
  4692. + */
  4693. +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
  4694. + size_t cnt, loff_t *ppos, const u64 *entry)
  4695. +{
  4696. + char buf[U64STR_SIZE];
  4697. + u64 val = 0;
  4698. + int len = 0;
  4699. +
  4700. + memset(buf, 0, sizeof(buf));
  4701. +
  4702. + if (!entry)
  4703. + return -EFAULT;
  4704. +
  4705. + mutex_lock(&data.lock);
  4706. + val = *entry;
  4707. + mutex_unlock(&data.lock);
  4708. +
  4709. + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
  4710. +
  4711. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
  4712. +
  4713. +}
  4714. +
  4715. +/*
  4716. + * simple_data_write - Wrapper write function for global state debugfs entries
  4717. + * @filp: The active open file structure for the debugfs "file"
  4718. + * @ubuf: The userspace provided buffer to write value from
  4719. + * @cnt: The maximum number of bytes to write
  4720. + * @ppos: The current "file" position
  4721. + * @entry: The entry to write to
  4722. + *
  4723. + * This function provides a generic write implementation for the global state
  4724. + * "data" structure debugfs filesystem entries. It would be nice to use
  4725. + * simple_attr_write directly, but we need to make sure that the data.lock
  4726. + * is held during the actual write.
  4727. + */
  4728. +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
  4729. + size_t cnt, loff_t *ppos, u64 *entry)
  4730. +{
  4731. + char buf[U64STR_SIZE];
  4732. + int csize = min(cnt, sizeof(buf));
  4733. + u64 val = 0;
  4734. + int err = 0;
  4735. +
  4736. + memset(buf, '\0', sizeof(buf));
  4737. + if (copy_from_user(buf, ubuf, csize))
  4738. + return -EFAULT;
  4739. +
  4740. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  4741. + err = kstrtoull(buf, 10, &val);
  4742. + if (err)
  4743. + return -EINVAL;
  4744. +
  4745. + mutex_lock(&data.lock);
  4746. + *entry = val;
  4747. + mutex_unlock(&data.lock);
  4748. +
  4749. + return csize;
  4750. +}
  4751. +
  4752. +/**
  4753. + * debug_count_fopen - Open function for "count" debugfs entry
  4754. + * @inode: The in-kernel inode representation of the debugfs "file"
  4755. + * @filp: The active open file structure for the debugfs "file"
  4756. + *
  4757. + * This function provides an open implementation for the "count" debugfs
  4758. + * interface to the hardware latency detector.
  4759. + */
  4760. +static int debug_count_fopen(struct inode *inode, struct file *filp)
  4761. +{
  4762. + return 0;
  4763. +}
  4764. +
  4765. +/**
  4766. + * debug_count_fread - Read function for "count" debugfs entry
  4767. + * @filp: The active open file structure for the debugfs "file"
  4768. + * @ubuf: The userspace provided buffer to read value into
  4769. + * @cnt: The maximum number of bytes to read
  4770. + * @ppos: The current "file" position
  4771. + *
  4772. + * This function provides a read implementation for the "count" debugfs
  4773. + * interface to the hardware latency detector. Can be used to read the
  4774. + * number of latency readings exceeding the configured threshold since
  4775. + * the detector was last reset (e.g. by writing a zero into "count").
  4776. + */
  4777. +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
  4778. + size_t cnt, loff_t *ppos)
  4779. +{
  4780. + return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
  4781. +}
  4782. +
  4783. +/**
  4784. + * debug_count_fwrite - Write function for "count" debugfs entry
  4785. + * @filp: The active open file structure for the debugfs "file"
  4786. + * @ubuf: The user buffer that contains the value to write
  4787. + * @cnt: The maximum number of bytes to write to "file"
  4788. + * @ppos: The current position in the debugfs "file"
  4789. + *
  4790. + * This function provides a write implementation for the "count" debugfs
  4791. + * interface to the hardware latency detector. Can be used to write a
  4792. + * desired value, especially to zero the total count.
  4793. + */
  4794. +static ssize_t debug_count_fwrite(struct file *filp,
  4795. + const char __user *ubuf,
  4796. + size_t cnt,
  4797. + loff_t *ppos)
  4798. +{
  4799. + return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
  4800. +}
  4801. +
  4802. +/**
  4803. + * debug_enable_fopen - Dummy open function for "enable" debugfs interface
  4804. + * @inode: The in-kernel inode representation of the debugfs "file"
  4805. + * @filp: The active open file structure for the debugfs "file"
  4806. + *
  4807. + * This function provides an open implementation for the "enable" debugfs
  4808. + * interface to the hardware latency detector.
  4809. + */
  4810. +static int debug_enable_fopen(struct inode *inode, struct file *filp)
  4811. +{
  4812. + return 0;
  4813. +}
  4814. +
  4815. +/**
  4816. + * debug_enable_fread - Read function for "enable" debugfs interface
  4817. + * @filp: The active open file structure for the debugfs "file"
  4818. + * @ubuf: The userspace provided buffer to read value into
  4819. + * @cnt: The maximum number of bytes to read
  4820. + * @ppos: The current "file" position
  4821. + *
  4822. + * This function provides a read implementation for the "enable" debugfs
  4823. + * interface to the hardware latency detector. Can be used to determine
  4824. + * whether the detector is currently enabled ("0\n" or "1\n" returned).
  4825. + */
  4826. +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
  4827. + size_t cnt, loff_t *ppos)
  4828. +{
  4829. + char buf[4];
  4830. +
  4831. + if ((cnt < sizeof(buf)) || (*ppos))
  4832. + return 0;
  4833. +
  4834. + buf[0] = enabled ? '1' : '0';
  4835. + buf[1] = '\n';
  4836. + buf[2] = '\0';
  4837. + if (copy_to_user(ubuf, buf, strlen(buf)))
  4838. + return -EFAULT;
  4839. + return *ppos = strlen(buf);
  4840. +}
  4841. +
  4842. +/**
  4843. + * debug_enable_fwrite - Write function for "enable" debugfs interface
  4844. + * @filp: The active open file structure for the debugfs "file"
  4845. + * @ubuf: The user buffer that contains the value to write
  4846. + * @cnt: The maximum number of bytes to write to "file"
  4847. + * @ppos: The current position in the debugfs "file"
  4848. + *
  4849. + * This function provides a write implementation for the "enable" debugfs
  4850. + * interface to the hardware latency detector. Can be used to enable or
  4851. + * disable the detector, which will have the side-effect of possibly
  4852. + * also resetting the global stats and kicking off the measuring
  4853. + * kthread (on an enable) or the converse (upon a disable).
  4854. + */
  4855. +static ssize_t debug_enable_fwrite(struct file *filp,
  4856. + const char __user *ubuf,
  4857. + size_t cnt,
  4858. + loff_t *ppos)
  4859. +{
  4860. + char buf[4];
  4861. + int csize = min(cnt, sizeof(buf));
  4862. + long val = 0;
  4863. + int err = 0;
  4864. +
  4865. + memset(buf, '\0', sizeof(buf));
  4866. + if (copy_from_user(buf, ubuf, csize))
  4867. + return -EFAULT;
  4868. +
  4869. + buf[sizeof(buf)-1] = '\0'; /* just in case */
  4870. + err = kstrtoul(buf, 10, &val);
  4871. + if (0 != err)
  4872. + return -EINVAL;
  4873. +
  4874. + if (val) {
  4875. + if (enabled)
  4876. + goto unlock;
  4877. + enabled = 1;
  4878. + __reset_stats();
  4879. + if (start_kthread())
  4880. + return -EFAULT;
  4881. + } else {
  4882. + if (!enabled)
  4883. + goto unlock;
  4884. + enabled = 0;
  4885. + err = stop_kthread();
  4886. + if (err) {
  4887. + pr_err(BANNER "cannot stop kthread\n");
  4888. + return -EFAULT;
  4889. + }
  4890. + wake_up(&data.wq); /* reader(s) should return */
  4891. + }
  4892. +unlock:
  4893. + return csize;
  4894. +}
  4895. +
  4896. +/**
  4897. + * debug_max_fopen - Open function for "max" debugfs entry
  4898. + * @inode: The in-kernel inode representation of the debugfs "file"
  4899. + * @filp: The active open file structure for the debugfs "file"
  4900. + *
  4901. + * This function provides an open implementation for the "max" debugfs
  4902. + * interface to the hardware latency detector.
  4903. + */
  4904. +static int debug_max_fopen(struct inode *inode, struct file *filp)
  4905. +{
  4906. + return 0;
  4907. +}
  4908. +
  4909. +/**
  4910. + * debug_max_fread - Read function for "max" debugfs entry
  4911. + * @filp: The active open file structure for the debugfs "file"
  4912. + * @ubuf: The userspace provided buffer to read value into
  4913. + * @cnt: The maximum number of bytes to read
  4914. + * @ppos: The current "file" position
  4915. + *
  4916. + * This function provides a read implementation for the "max" debugfs
  4917. + * interface to the hardware latency detector. Can be used to determine
  4918. + * the maximum latency value observed since it was last reset.
  4919. + */
  4920. +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
  4921. + size_t cnt, loff_t *ppos)
  4922. +{
  4923. + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
  4924. +}
  4925. +
  4926. +/**
  4927. + * debug_max_fwrite - Write function for "max" debugfs entry
  4928. + * @filp: The active open file structure for the debugfs "file"
  4929. + * @ubuf: The user buffer that contains the value to write
  4930. + * @cnt: The maximum number of bytes to write to "file"
  4931. + * @ppos: The current position in the debugfs "file"
  4932. + *
  4933. + * This function provides a write implementation for the "max" debugfs
  4934. + * interface to the hardware latency detector. Can be used to reset the
  4935. + * maximum or set it to some other desired value - if, then, subsequent
  4936. + * measurements exceed this value, the maximum will be updated.
  4937. + */
  4938. +static ssize_t debug_max_fwrite(struct file *filp,
  4939. + const char __user *ubuf,
  4940. + size_t cnt,
  4941. + loff_t *ppos)
  4942. +{
  4943. + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
  4944. +}
  4945. +
  4946. +
  4947. +/**
  4948. + * debug_sample_fopen - An open function for "sample" debugfs interface
  4949. + * @inode: The in-kernel inode representation of this debugfs "file"
  4950. + * @filp: The active open file structure for the debugfs "file"
  4951. + *
  4952. + * This function handles opening the "sample" file within the hardware
  4953. + * latency detector debugfs directory interface. This file is used to read
  4954. + * raw samples from the global ring_buffer and allows the user to see a
  4955. + * running latency history. Can be opened blocking or non-blocking,
  4956. + * affecting whether it behaves as a buffer read pipe, or does not.
  4957. + * Implements simple locking to prevent multiple simultaneous use.
  4958. + */
  4959. +static int debug_sample_fopen(struct inode *inode, struct file *filp)
  4960. +{
  4961. + if (!atomic_add_unless(&data.sample_open, 1, 1))
  4962. + return -EBUSY;
  4963. + else
  4964. + return 0;
  4965. +}
  4966. +
  4967. +/**
  4968. + * debug_sample_fread - A read function for "sample" debugfs interface
  4969. + * @filp: The active open file structure for the debugfs "file"
  4970. + * @ubuf: The user buffer that will contain the samples read
  4971. + * @cnt: The maximum bytes to read from the debugfs "file"
  4972. + * @ppos: The current position in the debugfs "file"
  4973. + *
  4974. + * This function handles reading from the "sample" file within the hardware
  4975. + * latency detector debugfs directory interface. This file is used to read
  4976. + * raw samples from the global ring_buffer and allows the user to see a
  4977. + * running latency history. By default this will block pending a new
  4978. + * value written into the sample buffer, unless there are already a
  4979. + * number of value(s) waiting in the buffer, or the sample file was
  4980. + * previously opened in a non-blocking mode of operation.
  4981. + */
  4982. +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
  4983. + size_t cnt, loff_t *ppos)
  4984. +{
  4985. + int len = 0;
  4986. + char buf[64];
  4987. + struct sample *sample = NULL;
  4988. +
  4989. + if (!enabled)
  4990. + return 0;
  4991. +
  4992. + sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
  4993. + if (!sample)
  4994. + return -ENOMEM;
  4995. +
  4996. + while (!buffer_get_sample(sample)) {
  4997. +
  4998. + DEFINE_WAIT(wait);
  4999. +
  5000. + if (filp->f_flags & O_NONBLOCK) {
  5001. + len = -EAGAIN;
  5002. + goto out;
  5003. + }
  5004. +
  5005. + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
  5006. + schedule();
  5007. + finish_wait(&data.wq, &wait);
  5008. +
  5009. + if (signal_pending(current)) {
  5010. + len = -EINTR;
  5011. + goto out;
  5012. + }
  5013. +
  5014. + if (!enabled) { /* enable was toggled */
  5015. + len = 0;
  5016. + goto out;
  5017. + }
  5018. + }
  5019. +
  5020. + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n",
  5021. + sample->timestamp.tv_sec,
  5022. + sample->timestamp.tv_nsec,
  5023. + sample->duration,
  5024. + sample->outer_duration);
  5025. +
  5026. +
  5027. + /* handling partial reads is more trouble than it's worth */
  5028. + if (len > cnt)
  5029. + goto out;
  5030. +
  5031. + if (copy_to_user(ubuf, buf, len))
  5032. + len = -EFAULT;
  5033. +
  5034. +out:
  5035. + kfree(sample);
  5036. + return len;
  5037. +}
  5038. +
  5039. +/**
  5040. + * debug_sample_release - Release function for "sample" debugfs interface
  5041. + * @inode: The in-kernel inode represenation of the debugfs "file"
  5042. + * @filp: The active open file structure for the debugfs "file"
  5043. + *
  5044. + * This function completes the close of the debugfs interface "sample" file.
  5045. + * Frees the sample_open "lock" so that other users may open the interface.
  5046. + */
  5047. +static int debug_sample_release(struct inode *inode, struct file *filp)
  5048. +{
  5049. + atomic_dec(&data.sample_open);
  5050. +
  5051. + return 0;
  5052. +}
  5053. +
  5054. +/**
  5055. + * debug_threshold_fopen - Open function for "threshold" debugfs entry
  5056. + * @inode: The in-kernel inode representation of the debugfs "file"
  5057. + * @filp: The active open file structure for the debugfs "file"
  5058. + *
  5059. + * This function provides an open implementation for the "threshold" debugfs
  5060. + * interface to the hardware latency detector.
  5061. + */
  5062. +static int debug_threshold_fopen(struct inode *inode, struct file *filp)
  5063. +{
  5064. + return 0;
  5065. +}
  5066. +
  5067. +/**
  5068. + * debug_threshold_fread - Read function for "threshold" debugfs entry
  5069. + * @filp: The active open file structure for the debugfs "file"
  5070. + * @ubuf: The userspace provided buffer to read value into
  5071. + * @cnt: The maximum number of bytes to read
  5072. + * @ppos: The current "file" position
  5073. + *
  5074. + * This function provides a read implementation for the "threshold" debugfs
  5075. + * interface to the hardware latency detector. It can be used to determine
  5076. + * the current threshold level at which a latency will be recorded in the
  5077. + * global ring buffer, typically on the order of 10us.
  5078. + */
  5079. +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
  5080. + size_t cnt, loff_t *ppos)
  5081. +{
  5082. + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
  5083. +}
  5084. +
  5085. +/**
  5086. + * debug_threshold_fwrite - Write function for "threshold" debugfs entry
  5087. + * @filp: The active open file structure for the debugfs "file"
  5088. + * @ubuf: The user buffer that contains the value to write
  5089. + * @cnt: The maximum number of bytes to write to "file"
  5090. + * @ppos: The current position in the debugfs "file"
  5091. + *
  5092. + * This function provides a write implementation for the "threshold" debugfs
  5093. + * interface to the hardware latency detector. It can be used to configure
  5094. + * the threshold level at which any subsequently detected latencies will
  5095. + * be recorded into the global ring buffer.
  5096. + */
  5097. +static ssize_t debug_threshold_fwrite(struct file *filp,
  5098. + const char __user *ubuf,
  5099. + size_t cnt,
  5100. + loff_t *ppos)
  5101. +{
  5102. + int ret;
  5103. +
  5104. + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
  5105. +
  5106. + if (enabled)
  5107. + wake_up_process(kthread);
  5108. +
  5109. + return ret;
  5110. +}
  5111. +
  5112. +/**
  5113. + * debug_width_fopen - Open function for "width" debugfs entry
  5114. + * @inode: The in-kernel inode representation of the debugfs "file"
  5115. + * @filp: The active open file structure for the debugfs "file"
  5116. + *
  5117. + * This function provides an open implementation for the "width" debugfs
  5118. + * interface to the hardware latency detector.
  5119. + */
  5120. +static int debug_width_fopen(struct inode *inode, struct file *filp)
  5121. +{
  5122. + return 0;
  5123. +}
  5124. +
  5125. +/**
  5126. + * debug_width_fread - Read function for "width" debugfs entry
  5127. + * @filp: The active open file structure for the debugfs "file"
  5128. + * @ubuf: The userspace provided buffer to read value into
  5129. + * @cnt: The maximum number of bytes to read
  5130. + * @ppos: The current "file" position
  5131. + *
  5132. + * This function provides a read implementation for the "width" debugfs
  5133. + * interface to the hardware latency detector. It can be used to determine
  5134. + * for how many us of the total window us we will actively sample for any
  5135. + * hardware-induced latecy periods. Obviously, it is not possible to
  5136. + * sample constantly and have the system respond to a sample reader, or,
  5137. + * worse, without having the system appear to have gone out to lunch.
  5138. + */
  5139. +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
  5140. + size_t cnt, loff_t *ppos)
  5141. +{
  5142. + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
  5143. +}
  5144. +
  5145. +/**
  5146. + * debug_width_fwrite - Write function for "width" debugfs entry
  5147. + * @filp: The active open file structure for the debugfs "file"
  5148. + * @ubuf: The user buffer that contains the value to write
  5149. + * @cnt: The maximum number of bytes to write to "file"
  5150. + * @ppos: The current position in the debugfs "file"
  5151. + *
  5152. + * This function provides a write implementation for the "width" debugfs
  5153. + * interface to the hardware latency detector. It can be used to configure
  5154. + * for how many us of the total window us we will actively sample for any
  5155. + * hardware-induced latency periods. Obviously, it is not possible to
  5156. + * sample constantly and have the system respond to a sample reader, or,
  5157. + * worse, without having the system appear to have gone out to lunch. It
  5158. + * is enforced that width is less that the total window size.
  5159. + */
  5160. +static ssize_t debug_width_fwrite(struct file *filp,
  5161. + const char __user *ubuf,
  5162. + size_t cnt,
  5163. + loff_t *ppos)
  5164. +{
  5165. + char buf[U64STR_SIZE];
  5166. + int csize = min(cnt, sizeof(buf));
  5167. + u64 val = 0;
  5168. + int err = 0;
  5169. +
  5170. + memset(buf, '\0', sizeof(buf));
  5171. + if (copy_from_user(buf, ubuf, csize))
  5172. + return -EFAULT;
  5173. +
  5174. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  5175. + err = kstrtoull(buf, 10, &val);
  5176. + if (0 != err)
  5177. + return -EINVAL;
  5178. +
  5179. + mutex_lock(&data.lock);
  5180. + if (val < data.sample_window)
  5181. + data.sample_width = val;
  5182. + else {
  5183. + mutex_unlock(&data.lock);
  5184. + return -EINVAL;
  5185. + }
  5186. + mutex_unlock(&data.lock);
  5187. +
  5188. + if (enabled)
  5189. + wake_up_process(kthread);
  5190. +
  5191. + return csize;
  5192. +}
  5193. +
  5194. +/**
  5195. + * debug_window_fopen - Open function for "window" debugfs entry
  5196. + * @inode: The in-kernel inode representation of the debugfs "file"
  5197. + * @filp: The active open file structure for the debugfs "file"
  5198. + *
  5199. + * This function provides an open implementation for the "window" debugfs
  5200. + * interface to the hardware latency detector. The window is the total time
  5201. + * in us that will be considered one sample period. Conceptually, windows
  5202. + * occur back-to-back and contain a sample width period during which
  5203. + * actual sampling occurs.
  5204. + */
  5205. +static int debug_window_fopen(struct inode *inode, struct file *filp)
  5206. +{
  5207. + return 0;
  5208. +}
  5209. +
  5210. +/**
  5211. + * debug_window_fread - Read function for "window" debugfs entry
  5212. + * @filp: The active open file structure for the debugfs "file"
  5213. + * @ubuf: The userspace provided buffer to read value into
  5214. + * @cnt: The maximum number of bytes to read
  5215. + * @ppos: The current "file" position
  5216. + *
  5217. + * This function provides a read implementation for the "window" debugfs
  5218. + * interface to the hardware latency detector. The window is the total time
  5219. + * in us that will be considered one sample period. Conceptually, windows
  5220. + * occur back-to-back and contain a sample width period during which
  5221. + * actual sampling occurs. Can be used to read the total window size.
  5222. + */
  5223. +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
  5224. + size_t cnt, loff_t *ppos)
  5225. +{
  5226. + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
  5227. +}
  5228. +
  5229. +/**
  5230. + * debug_window_fwrite - Write function for "window" debugfs entry
  5231. + * @filp: The active open file structure for the debugfs "file"
  5232. + * @ubuf: The user buffer that contains the value to write
  5233. + * @cnt: The maximum number of bytes to write to "file"
  5234. + * @ppos: The current position in the debugfs "file"
  5235. + *
  5236. + * This function provides a write implementation for the "window" debufds
  5237. + * interface to the hardware latency detetector. The window is the total time
  5238. + * in us that will be considered one sample period. Conceptually, windows
  5239. + * occur back-to-back and contain a sample width period during which
  5240. + * actual sampling occurs. Can be used to write a new total window size. It
  5241. + * is enfoced that any value written must be greater than the sample width
  5242. + * size, or an error results.
  5243. + */
  5244. +static ssize_t debug_window_fwrite(struct file *filp,
  5245. + const char __user *ubuf,
  5246. + size_t cnt,
  5247. + loff_t *ppos)
  5248. +{
  5249. + char buf[U64STR_SIZE];
  5250. + int csize = min(cnt, sizeof(buf));
  5251. + u64 val = 0;
  5252. + int err = 0;
  5253. +
  5254. + memset(buf, '\0', sizeof(buf));
  5255. + if (copy_from_user(buf, ubuf, csize))
  5256. + return -EFAULT;
  5257. +
  5258. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  5259. + err = kstrtoull(buf, 10, &val);
  5260. + if (0 != err)
  5261. + return -EINVAL;
  5262. +
  5263. + mutex_lock(&data.lock);
  5264. + if (data.sample_width < val)
  5265. + data.sample_window = val;
  5266. + else {
  5267. + mutex_unlock(&data.lock);
  5268. + return -EINVAL;
  5269. + }
  5270. + mutex_unlock(&data.lock);
  5271. +
  5272. + return csize;
  5273. +}
  5274. +
  5275. +/*
  5276. + * Function pointers for the "count" debugfs file operations
  5277. + */
  5278. +static const struct file_operations count_fops = {
  5279. + .open = debug_count_fopen,
  5280. + .read = debug_count_fread,
  5281. + .write = debug_count_fwrite,
  5282. + .owner = THIS_MODULE,
  5283. +};
  5284. +
  5285. +/*
  5286. + * Function pointers for the "enable" debugfs file operations
  5287. + */
  5288. +static const struct file_operations enable_fops = {
  5289. + .open = debug_enable_fopen,
  5290. + .read = debug_enable_fread,
  5291. + .write = debug_enable_fwrite,
  5292. + .owner = THIS_MODULE,
  5293. +};
  5294. +
  5295. +/*
  5296. + * Function pointers for the "max" debugfs file operations
  5297. + */
  5298. +static const struct file_operations max_fops = {
  5299. + .open = debug_max_fopen,
  5300. + .read = debug_max_fread,
  5301. + .write = debug_max_fwrite,
  5302. + .owner = THIS_MODULE,
  5303. +};
  5304. +
  5305. +/*
  5306. + * Function pointers for the "sample" debugfs file operations
  5307. + */
  5308. +static const struct file_operations sample_fops = {
  5309. + .open = debug_sample_fopen,
  5310. + .read = debug_sample_fread,
  5311. + .release = debug_sample_release,
  5312. + .owner = THIS_MODULE,
  5313. +};
  5314. +
  5315. +/*
  5316. + * Function pointers for the "threshold" debugfs file operations
  5317. + */
  5318. +static const struct file_operations threshold_fops = {
  5319. + .open = debug_threshold_fopen,
  5320. + .read = debug_threshold_fread,
  5321. + .write = debug_threshold_fwrite,
  5322. + .owner = THIS_MODULE,
  5323. +};
  5324. +
  5325. +/*
  5326. + * Function pointers for the "width" debugfs file operations
  5327. + */
  5328. +static const struct file_operations width_fops = {
  5329. + .open = debug_width_fopen,
  5330. + .read = debug_width_fread,
  5331. + .write = debug_width_fwrite,
  5332. + .owner = THIS_MODULE,
  5333. +};
  5334. +
  5335. +/*
  5336. + * Function pointers for the "window" debugfs file operations
  5337. + */
  5338. +static const struct file_operations window_fops = {
  5339. + .open = debug_window_fopen,
  5340. + .read = debug_window_fread,
  5341. + .write = debug_window_fwrite,
  5342. + .owner = THIS_MODULE,
  5343. +};
  5344. +
  5345. +/**
  5346. + * init_debugfs - A function to initialize the debugfs interface files
  5347. + *
  5348. + * This function creates entries in debugfs for "hwlat_detector", including
  5349. + * files to read values from the detector, current samples, and the
  5350. + * maximum sample that has been captured since the hardware latency
  5351. + * dectector was started.
  5352. + */
  5353. +static int init_debugfs(void)
  5354. +{
  5355. + int ret = -ENOMEM;
  5356. +
  5357. + debug_dir = debugfs_create_dir(DRVNAME, NULL);
  5358. + if (!debug_dir)
  5359. + goto err_debug_dir;
  5360. +
  5361. + debug_sample = debugfs_create_file("sample", 0444,
  5362. + debug_dir, NULL,
  5363. + &sample_fops);
  5364. + if (!debug_sample)
  5365. + goto err_sample;
  5366. +
  5367. + debug_count = debugfs_create_file("count", 0444,
  5368. + debug_dir, NULL,
  5369. + &count_fops);
  5370. + if (!debug_count)
  5371. + goto err_count;
  5372. +
  5373. + debug_max = debugfs_create_file("max", 0444,
  5374. + debug_dir, NULL,
  5375. + &max_fops);
  5376. + if (!debug_max)
  5377. + goto err_max;
  5378. +
  5379. + debug_sample_window = debugfs_create_file("window", 0644,
  5380. + debug_dir, NULL,
  5381. + &window_fops);
  5382. + if (!debug_sample_window)
  5383. + goto err_window;
  5384. +
  5385. + debug_sample_width = debugfs_create_file("width", 0644,
  5386. + debug_dir, NULL,
  5387. + &width_fops);
  5388. + if (!debug_sample_width)
  5389. + goto err_width;
  5390. +
  5391. + debug_threshold = debugfs_create_file("threshold", 0644,
  5392. + debug_dir, NULL,
  5393. + &threshold_fops);
  5394. + if (!debug_threshold)
  5395. + goto err_threshold;
  5396. +
  5397. + debug_enable = debugfs_create_file("enable", 0644,
  5398. + debug_dir, &enabled,
  5399. + &enable_fops);
  5400. + if (!debug_enable)
  5401. + goto err_enable;
  5402. +
  5403. + else {
  5404. + ret = 0;
  5405. + goto out;
  5406. + }
  5407. +
  5408. +err_enable:
  5409. + debugfs_remove(debug_threshold);
  5410. +err_threshold:
  5411. + debugfs_remove(debug_sample_width);
  5412. +err_width:
  5413. + debugfs_remove(debug_sample_window);
  5414. +err_window:
  5415. + debugfs_remove(debug_max);
  5416. +err_max:
  5417. + debugfs_remove(debug_count);
  5418. +err_count:
  5419. + debugfs_remove(debug_sample);
  5420. +err_sample:
  5421. + debugfs_remove(debug_dir);
  5422. +err_debug_dir:
  5423. +out:
  5424. + return ret;
  5425. +}
  5426. +
  5427. +/**
  5428. + * free_debugfs - A function to cleanup the debugfs file interface
  5429. + */
  5430. +static void free_debugfs(void)
  5431. +{
  5432. + /* could also use a debugfs_remove_recursive */
  5433. + debugfs_remove(debug_enable);
  5434. + debugfs_remove(debug_threshold);
  5435. + debugfs_remove(debug_sample_width);
  5436. + debugfs_remove(debug_sample_window);
  5437. + debugfs_remove(debug_max);
  5438. + debugfs_remove(debug_count);
  5439. + debugfs_remove(debug_sample);
  5440. + debugfs_remove(debug_dir);
  5441. +}
  5442. +
  5443. +/**
  5444. + * detector_init - Standard module initialization code
  5445. + */
  5446. +static int detector_init(void)
  5447. +{
  5448. + int ret = -ENOMEM;
  5449. +
  5450. + pr_info(BANNER "version %s\n", VERSION);
  5451. +
  5452. + ret = init_stats();
  5453. + if (0 != ret)
  5454. + goto out;
  5455. +
  5456. + ret = init_debugfs();
  5457. + if (0 != ret)
  5458. + goto err_stats;
  5459. +
  5460. + if (enabled)
  5461. + ret = start_kthread();
  5462. +
  5463. + goto out;
  5464. +
  5465. +err_stats:
  5466. + ring_buffer_free(ring_buffer);
  5467. +out:
  5468. + return ret;
  5469. +
  5470. +}
  5471. +
  5472. +/**
  5473. + * detector_exit - Standard module cleanup code
  5474. + */
  5475. +static void detector_exit(void)
  5476. +{
  5477. + int err;
  5478. +
  5479. + if (enabled) {
  5480. + enabled = 0;
  5481. + err = stop_kthread();
  5482. + if (err)
  5483. + pr_err(BANNER "cannot stop kthread\n");
  5484. + }
  5485. +
  5486. + free_debugfs();
  5487. + ring_buffer_free(ring_buffer); /* free up the ring buffer */
  5488. +
  5489. +}
  5490. +
  5491. +module_init(detector_init);
  5492. +module_exit(detector_exit);
  5493. diff -Nur linux-3.18.10.orig/drivers/misc/Kconfig linux-3.18.10/drivers/misc/Kconfig
  5494. --- linux-3.18.10.orig/drivers/misc/Kconfig 2015-03-24 02:05:12.000000000 +0100
  5495. +++ linux-3.18.10/drivers/misc/Kconfig 2015-03-26 12:42:18.655588312 +0100
  5496. @@ -54,6 +54,7 @@
  5497. config ATMEL_TCLIB
  5498. bool "Atmel AT32/AT91 Timer/Counter Library"
  5499. depends on (AVR32 || ARCH_AT91)
  5500. + default y if PREEMPT_RT_FULL
  5501. help
  5502. Select this if you want a library to allocate the Timer/Counter
  5503. blocks found on many Atmel processors. This facilitates using
  5504. @@ -69,8 +70,7 @@
  5505. are combined to make a single 32-bit timer.
  5506. When GENERIC_CLOCKEVENTS is defined, the third timer channel
  5507. - may be used as a clock event device supporting oneshot mode
  5508. - (delays of up to two seconds) based on the 32 KiHz clock.
  5509. + may be used as a clock event device supporting oneshot mode.
  5510. config ATMEL_TCB_CLKSRC_BLOCK
  5511. int
  5512. @@ -84,6 +84,15 @@
  5513. TC can be used for other purposes, such as PWM generation and
  5514. interval timing.
  5515. +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  5516. + bool "TC Block use 32 KiHz clock"
  5517. + depends on ATMEL_TCB_CLKSRC
  5518. + default y if !PREEMPT_RT_FULL
  5519. + help
  5520. + Select this to use 32 KiHz base clock rate as TC block clock
  5521. + source for clock events.
  5522. +
  5523. +
  5524. config DUMMY_IRQ
  5525. tristate "Dummy IRQ handler"
  5526. default n
  5527. @@ -113,6 +122,35 @@
  5528. for information on the specific driver level and support statement
  5529. for your IBM server.
  5530. +config HWLAT_DETECTOR
  5531. + tristate "Testing module to detect hardware-induced latencies"
  5532. + depends on DEBUG_FS
  5533. + depends on RING_BUFFER
  5534. + default m
  5535. + ---help---
  5536. + A simple hardware latency detector. Use this module to detect
  5537. + large latencies introduced by the behavior of the underlying
  5538. + system firmware external to Linux. We do this using periodic
  5539. + use of stop_machine to grab all available CPUs and measure
  5540. + for unexplainable gaps in the CPU timestamp counter(s). By
  5541. + default, the module is not enabled until the "enable" file
  5542. + within the "hwlat_detector" debugfs directory is toggled.
  5543. +
  5544. + This module is often used to detect SMI (System Management
  5545. + Interrupts) on x86 systems, though is not x86 specific. To
  5546. + this end, we default to using a sample window of 1 second,
  5547. + during which we will sample for 0.5 seconds. If an SMI or
  5548. + similar event occurs during that time, it is recorded
  5549. + into an 8K samples global ring buffer until retreived.
  5550. +
  5551. + WARNING: This software should never be enabled (it can be built
  5552. + but should not be turned on after it is loaded) in a production
  5553. + environment where high latencies are a concern since the
  5554. + sampling mechanism actually introduces latencies for
  5555. + regular tasks while the CPU(s) are being held.
  5556. +
  5557. + If unsure, say N
  5558. +
  5559. config PHANTOM
  5560. tristate "Sensable PHANToM (PCI)"
  5561. depends on PCI
  5562. diff -Nur linux-3.18.10.orig/drivers/misc/Makefile linux-3.18.10/drivers/misc/Makefile
  5563. --- linux-3.18.10.orig/drivers/misc/Makefile 2015-03-24 02:05:12.000000000 +0100
  5564. +++ linux-3.18.10/drivers/misc/Makefile 2015-03-26 12:42:18.655588312 +0100
  5565. @@ -38,6 +38,7 @@
  5566. obj-$(CONFIG_HMC6352) += hmc6352.o
  5567. obj-y += eeprom/
  5568. obj-y += cb710/
  5569. +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o
  5570. obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o
  5571. obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o
  5572. obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o
  5573. diff -Nur linux-3.18.10.orig/drivers/mmc/host/mmci.c linux-3.18.10/drivers/mmc/host/mmci.c
  5574. --- linux-3.18.10.orig/drivers/mmc/host/mmci.c 2015-03-24 02:05:12.000000000 +0100
  5575. +++ linux-3.18.10/drivers/mmc/host/mmci.c 2015-03-26 12:42:18.655588312 +0100
  5576. @@ -1153,15 +1153,12 @@
  5577. struct sg_mapping_iter *sg_miter = &host->sg_miter;
  5578. struct variant_data *variant = host->variant;
  5579. void __iomem *base = host->base;
  5580. - unsigned long flags;
  5581. u32 status;
  5582. status = readl(base + MMCISTATUS);
  5583. dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
  5584. - local_irq_save(flags);
  5585. -
  5586. do {
  5587. unsigned int remain, len;
  5588. char *buffer;
  5589. @@ -1201,8 +1198,6 @@
  5590. sg_miter_stop(sg_miter);
  5591. - local_irq_restore(flags);
  5592. -
  5593. /*
  5594. * If we have less than the fifo 'half-full' threshold to transfer,
  5595. * trigger a PIO interrupt as soon as any data is available.
  5596. diff -Nur linux-3.18.10.orig/drivers/mmc/host/sdhci.c linux-3.18.10/drivers/mmc/host/sdhci.c
  5597. --- linux-3.18.10.orig/drivers/mmc/host/sdhci.c 2015-03-24 02:05:12.000000000 +0100
  5598. +++ linux-3.18.10/drivers/mmc/host/sdhci.c 2015-03-26 12:42:18.655588312 +0100
  5599. @@ -2565,6 +2565,31 @@
  5600. return isr ? IRQ_HANDLED : IRQ_NONE;
  5601. }
  5602. +#ifdef CONFIG_PREEMPT_RT_BASE
  5603. +static irqreturn_t sdhci_rt_irq(int irq, void *dev_id)
  5604. +{
  5605. + irqreturn_t ret;
  5606. +
  5607. + local_bh_disable();
  5608. + ret = sdhci_irq(irq, dev_id);
  5609. + local_bh_enable();
  5610. + if (ret == IRQ_WAKE_THREAD)
  5611. + ret = sdhci_thread_irq(irq, dev_id);
  5612. + return ret;
  5613. +}
  5614. +#endif
  5615. +
  5616. +static int sdhci_req_irq(struct sdhci_host *host)
  5617. +{
  5618. +#ifdef CONFIG_PREEMPT_RT_BASE
  5619. + return request_threaded_irq(host->irq, NULL, sdhci_rt_irq,
  5620. + IRQF_SHARED, mmc_hostname(host->mmc), host);
  5621. +#else
  5622. + return request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq,
  5623. + IRQF_SHARED, mmc_hostname(host->mmc), host);
  5624. +#endif
  5625. +}
  5626. +
  5627. /*****************************************************************************\
  5628. * *
  5629. * Suspend/resume *
  5630. @@ -2632,9 +2657,7 @@
  5631. }
  5632. if (!device_may_wakeup(mmc_dev(host->mmc))) {
  5633. - ret = request_threaded_irq(host->irq, sdhci_irq,
  5634. - sdhci_thread_irq, IRQF_SHARED,
  5635. - mmc_hostname(host->mmc), host);
  5636. + ret = sdhci_req_irq(host);
  5637. if (ret)
  5638. return ret;
  5639. } else {
  5640. @@ -3253,8 +3276,7 @@
  5641. sdhci_init(host, 0);
  5642. - ret = request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq,
  5643. - IRQF_SHARED, mmc_hostname(mmc), host);
  5644. + ret = sdhci_req_irq(host);
  5645. if (ret) {
  5646. pr_err("%s: Failed to request IRQ %d: %d\n",
  5647. mmc_hostname(mmc), host->irq, ret);
  5648. diff -Nur linux-3.18.10.orig/drivers/net/ethernet/3com/3c59x.c linux-3.18.10/drivers/net/ethernet/3com/3c59x.c
  5649. --- linux-3.18.10.orig/drivers/net/ethernet/3com/3c59x.c 2015-03-24 02:05:12.000000000 +0100
  5650. +++ linux-3.18.10/drivers/net/ethernet/3com/3c59x.c 2015-03-26 12:42:18.655588312 +0100
  5651. @@ -842,9 +842,9 @@
  5652. {
  5653. struct vortex_private *vp = netdev_priv(dev);
  5654. unsigned long flags;
  5655. - local_irq_save(flags);
  5656. + local_irq_save_nort(flags);
  5657. (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
  5658. - local_irq_restore(flags);
  5659. + local_irq_restore_nort(flags);
  5660. }
  5661. #endif
  5662. @@ -1916,12 +1916,12 @@
  5663. * Block interrupts because vortex_interrupt does a bare spin_lock()
  5664. */
  5665. unsigned long flags;
  5666. - local_irq_save(flags);
  5667. + local_irq_save_nort(flags);
  5668. if (vp->full_bus_master_tx)
  5669. boomerang_interrupt(dev->irq, dev);
  5670. else
  5671. vortex_interrupt(dev->irq, dev);
  5672. - local_irq_restore(flags);
  5673. + local_irq_restore_nort(flags);
  5674. }
  5675. }
  5676. diff -Nur linux-3.18.10.orig/drivers/net/ethernet/atheros/atl1c/atl1c_main.c linux-3.18.10/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
  5677. --- linux-3.18.10.orig/drivers/net/ethernet/atheros/atl1c/atl1c_main.c 2015-03-24 02:05:12.000000000 +0100
  5678. +++ linux-3.18.10/drivers/net/ethernet/atheros/atl1c/atl1c_main.c 2015-03-26 12:42:18.659588317 +0100
  5679. @@ -2213,11 +2213,7 @@
  5680. }
  5681. tpd_req = atl1c_cal_tpd_req(skb);
  5682. - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
  5683. - if (netif_msg_pktdata(adapter))
  5684. - dev_info(&adapter->pdev->dev, "tx locked\n");
  5685. - return NETDEV_TX_LOCKED;
  5686. - }
  5687. + spin_lock_irqsave(&adapter->tx_lock, flags);
  5688. if (atl1c_tpd_avail(adapter, type) < tpd_req) {
  5689. /* no enough descriptor, just stop queue */
  5690. diff -Nur linux-3.18.10.orig/drivers/net/ethernet/atheros/atl1e/atl1e_main.c linux-3.18.10/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
  5691. --- linux-3.18.10.orig/drivers/net/ethernet/atheros/atl1e/atl1e_main.c 2015-03-24 02:05:12.000000000 +0100
  5692. +++ linux-3.18.10/drivers/net/ethernet/atheros/atl1e/atl1e_main.c 2015-03-26 12:42:18.659588317 +0100
  5693. @@ -1880,8 +1880,7 @@
  5694. return NETDEV_TX_OK;
  5695. }
  5696. tpd_req = atl1e_cal_tdp_req(skb);
  5697. - if (!spin_trylock_irqsave(&adapter->tx_lock, flags))
  5698. - return NETDEV_TX_LOCKED;
  5699. + spin_lock_irqsave(&adapter->tx_lock, flags);
  5700. if (atl1e_tpd_avail(adapter) < tpd_req) {
  5701. /* no enough descriptor, just stop queue */
  5702. diff -Nur linux-3.18.10.orig/drivers/net/ethernet/chelsio/cxgb/sge.c linux-3.18.10/drivers/net/ethernet/chelsio/cxgb/sge.c
  5703. --- linux-3.18.10.orig/drivers/net/ethernet/chelsio/cxgb/sge.c 2015-03-24 02:05:12.000000000 +0100
  5704. +++ linux-3.18.10/drivers/net/ethernet/chelsio/cxgb/sge.c 2015-03-26 12:42:18.659588317 +0100
  5705. @@ -1663,8 +1663,7 @@
  5706. struct cmdQ *q = &sge->cmdQ[qid];
  5707. unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
  5708. - if (!spin_trylock(&q->lock))
  5709. - return NETDEV_TX_LOCKED;
  5710. + spin_lock(&q->lock);
  5711. reclaim_completed_tx(sge, q);
  5712. diff -Nur linux-3.18.10.orig/drivers/net/ethernet/freescale/gianfar.c linux-3.18.10/drivers/net/ethernet/freescale/gianfar.c
  5713. --- linux-3.18.10.orig/drivers/net/ethernet/freescale/gianfar.c 2015-03-24 02:05:12.000000000 +0100
  5714. +++ linux-3.18.10/drivers/net/ethernet/freescale/gianfar.c 2015-03-26 12:42:18.659588317 +0100
  5715. @@ -1483,7 +1483,7 @@
  5716. if (netif_running(ndev)) {
  5717. - local_irq_save(flags);
  5718. + local_irq_save_nort(flags);
  5719. lock_tx_qs(priv);
  5720. gfar_halt_nodisable(priv);
  5721. @@ -1499,7 +1499,7 @@
  5722. gfar_write(&regs->maccfg1, tempval);
  5723. unlock_tx_qs(priv);
  5724. - local_irq_restore(flags);
  5725. + local_irq_restore_nort(flags);
  5726. disable_napi(priv);
  5727. @@ -1541,7 +1541,7 @@
  5728. /* Disable Magic Packet mode, in case something
  5729. * else woke us up.
  5730. */
  5731. - local_irq_save(flags);
  5732. + local_irq_save_nort(flags);
  5733. lock_tx_qs(priv);
  5734. tempval = gfar_read(&regs->maccfg2);
  5735. @@ -1551,7 +1551,7 @@
  5736. gfar_start(priv);
  5737. unlock_tx_qs(priv);
  5738. - local_irq_restore(flags);
  5739. + local_irq_restore_nort(flags);
  5740. netif_device_attach(ndev);
  5741. @@ -3307,14 +3307,14 @@
  5742. dev->stats.tx_dropped++;
  5743. atomic64_inc(&priv->extra_stats.tx_underrun);
  5744. - local_irq_save(flags);
  5745. + local_irq_save_nort(flags);
  5746. lock_tx_qs(priv);
  5747. /* Reactivate the Tx Queues */
  5748. gfar_write(&regs->tstat, gfargrp->tstat);
  5749. unlock_tx_qs(priv);
  5750. - local_irq_restore(flags);
  5751. + local_irq_restore_nort(flags);
  5752. }
  5753. netif_dbg(priv, tx_err, dev, "Transmit Error\n");
  5754. }
  5755. diff -Nur linux-3.18.10.orig/drivers/net/ethernet/neterion/s2io.c linux-3.18.10/drivers/net/ethernet/neterion/s2io.c
  5756. --- linux-3.18.10.orig/drivers/net/ethernet/neterion/s2io.c 2015-03-24 02:05:12.000000000 +0100
  5757. +++ linux-3.18.10/drivers/net/ethernet/neterion/s2io.c 2015-03-26 12:42:18.659588317 +0100
  5758. @@ -4084,12 +4084,7 @@
  5759. [skb->priority & (MAX_TX_FIFOS - 1)];
  5760. fifo = &mac_control->fifos[queue];
  5761. - if (do_spin_lock)
  5762. - spin_lock_irqsave(&fifo->tx_lock, flags);
  5763. - else {
  5764. - if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
  5765. - return NETDEV_TX_LOCKED;
  5766. - }
  5767. + spin_lock_irqsave(&fifo->tx_lock, flags);
  5768. if (sp->config.multiq) {
  5769. if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
  5770. diff -Nur linux-3.18.10.orig/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c linux-3.18.10/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
  5771. --- linux-3.18.10.orig/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c 2015-03-24 02:05:12.000000000 +0100
  5772. +++ linux-3.18.10/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c 2015-03-26 12:42:18.659588317 +0100
  5773. @@ -2137,10 +2137,8 @@
  5774. struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
  5775. unsigned long flags;
  5776. - if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) {
  5777. - /* Collision - tell upper layer to requeue */
  5778. - return NETDEV_TX_LOCKED;
  5779. - }
  5780. + spin_lock_irqsave(&tx_ring->tx_lock, flags);
  5781. +
  5782. if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) {
  5783. netif_stop_queue(netdev);
  5784. spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
  5785. diff -Nur linux-3.18.10.orig/drivers/net/ethernet/realtek/8139too.c linux-3.18.10/drivers/net/ethernet/realtek/8139too.c
  5786. --- linux-3.18.10.orig/drivers/net/ethernet/realtek/8139too.c 2015-03-24 02:05:12.000000000 +0100
  5787. +++ linux-3.18.10/drivers/net/ethernet/realtek/8139too.c 2015-03-26 12:42:18.659588317 +0100
  5788. @@ -2215,7 +2215,7 @@
  5789. struct rtl8139_private *tp = netdev_priv(dev);
  5790. const int irq = tp->pci_dev->irq;
  5791. - disable_irq(irq);
  5792. + disable_irq_nosync(irq);
  5793. rtl8139_interrupt(irq, dev);
  5794. enable_irq(irq);
  5795. }
  5796. diff -Nur linux-3.18.10.orig/drivers/net/ethernet/tehuti/tehuti.c linux-3.18.10/drivers/net/ethernet/tehuti/tehuti.c
  5797. --- linux-3.18.10.orig/drivers/net/ethernet/tehuti/tehuti.c 2015-03-24 02:05:12.000000000 +0100
  5798. +++ linux-3.18.10/drivers/net/ethernet/tehuti/tehuti.c 2015-03-26 12:42:18.659588317 +0100
  5799. @@ -1629,13 +1629,8 @@
  5800. unsigned long flags;
  5801. ENTER;
  5802. - local_irq_save(flags);
  5803. - if (!spin_trylock(&priv->tx_lock)) {
  5804. - local_irq_restore(flags);
  5805. - DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n",
  5806. - BDX_DRV_NAME, ndev->name);
  5807. - return NETDEV_TX_LOCKED;
  5808. - }
  5809. +
  5810. + spin_lock_irqsave(&priv->tx_lock, flags);
  5811. /* build tx descriptor */
  5812. BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */
  5813. diff -Nur linux-3.18.10.orig/drivers/net/rionet.c linux-3.18.10/drivers/net/rionet.c
  5814. --- linux-3.18.10.orig/drivers/net/rionet.c 2015-03-24 02:05:12.000000000 +0100
  5815. +++ linux-3.18.10/drivers/net/rionet.c 2015-03-26 12:42:18.659588317 +0100
  5816. @@ -174,11 +174,7 @@
  5817. unsigned long flags;
  5818. int add_num = 1;
  5819. - local_irq_save(flags);
  5820. - if (!spin_trylock(&rnet->tx_lock)) {
  5821. - local_irq_restore(flags);
  5822. - return NETDEV_TX_LOCKED;
  5823. - }
  5824. + spin_lock_irqsave(&rnet->tx_lock, flags);
  5825. if (is_multicast_ether_addr(eth->h_dest))
  5826. add_num = nets[rnet->mport->id].nact;
  5827. diff -Nur linux-3.18.10.orig/drivers/net/wireless/orinoco/orinoco_usb.c linux-3.18.10/drivers/net/wireless/orinoco/orinoco_usb.c
  5828. --- linux-3.18.10.orig/drivers/net/wireless/orinoco/orinoco_usb.c 2015-03-24 02:05:12.000000000 +0100
  5829. +++ linux-3.18.10/drivers/net/wireless/orinoco/orinoco_usb.c 2015-03-26 12:42:18.659588317 +0100
  5830. @@ -699,7 +699,7 @@
  5831. while (!ctx->done.done && msecs--)
  5832. udelay(1000);
  5833. } else {
  5834. - wait_event_interruptible(ctx->done.wait,
  5835. + swait_event_interruptible(ctx->done.wait,
  5836. ctx->done.done);
  5837. }
  5838. break;
  5839. diff -Nur linux-3.18.10.orig/drivers/pci/access.c linux-3.18.10/drivers/pci/access.c
  5840. --- linux-3.18.10.orig/drivers/pci/access.c 2015-03-24 02:05:12.000000000 +0100
  5841. +++ linux-3.18.10/drivers/pci/access.c 2015-03-26 12:42:18.659588317 +0100
  5842. @@ -434,7 +434,7 @@
  5843. WARN_ON(!dev->block_cfg_access);
  5844. dev->block_cfg_access = 0;
  5845. - wake_up_all(&pci_cfg_wait);
  5846. + wake_up_all_locked(&pci_cfg_wait);
  5847. raw_spin_unlock_irqrestore(&pci_lock, flags);
  5848. }
  5849. EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
  5850. diff -Nur linux-3.18.10.orig/drivers/scsi/fcoe/fcoe.c linux-3.18.10/drivers/scsi/fcoe/fcoe.c
  5851. --- linux-3.18.10.orig/drivers/scsi/fcoe/fcoe.c 2015-03-24 02:05:12.000000000 +0100
  5852. +++ linux-3.18.10/drivers/scsi/fcoe/fcoe.c 2015-03-26 12:42:18.659588317 +0100
  5853. @@ -1286,7 +1286,7 @@
  5854. struct sk_buff *skb;
  5855. #ifdef CONFIG_SMP
  5856. struct fcoe_percpu_s *p0;
  5857. - unsigned targ_cpu = get_cpu();
  5858. + unsigned targ_cpu = get_cpu_light();
  5859. #endif /* CONFIG_SMP */
  5860. FCOE_DBG("Destroying receive thread for CPU %d\n", cpu);
  5861. @@ -1342,7 +1342,7 @@
  5862. kfree_skb(skb);
  5863. spin_unlock_bh(&p->fcoe_rx_list.lock);
  5864. }
  5865. - put_cpu();
  5866. + put_cpu_light();
  5867. #else
  5868. /*
  5869. * This a non-SMP scenario where the singular Rx thread is
  5870. @@ -1566,11 +1566,11 @@
  5871. static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
  5872. {
  5873. struct fcoe_percpu_s *fps;
  5874. - int rc;
  5875. + int rc, cpu = get_cpu_light();
  5876. - fps = &get_cpu_var(fcoe_percpu);
  5877. + fps = &per_cpu(fcoe_percpu, cpu);
  5878. rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
  5879. - put_cpu_var(fcoe_percpu);
  5880. + put_cpu_light();
  5881. return rc;
  5882. }
  5883. @@ -1768,11 +1768,11 @@
  5884. return 0;
  5885. }
  5886. - stats = per_cpu_ptr(lport->stats, get_cpu());
  5887. + stats = per_cpu_ptr(lport->stats, get_cpu_light());
  5888. stats->InvalidCRCCount++;
  5889. if (stats->InvalidCRCCount < 5)
  5890. printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
  5891. - put_cpu();
  5892. + put_cpu_light();
  5893. return -EINVAL;
  5894. }
  5895. @@ -1848,13 +1848,13 @@
  5896. goto drop;
  5897. if (!fcoe_filter_frames(lport, fp)) {
  5898. - put_cpu();
  5899. + put_cpu_light();
  5900. fc_exch_recv(lport, fp);
  5901. return;
  5902. }
  5903. drop:
  5904. stats->ErrorFrames++;
  5905. - put_cpu();
  5906. + put_cpu_light();
  5907. kfree_skb(skb);
  5908. }
  5909. diff -Nur linux-3.18.10.orig/drivers/scsi/fcoe/fcoe_ctlr.c linux-3.18.10/drivers/scsi/fcoe/fcoe_ctlr.c
  5910. --- linux-3.18.10.orig/drivers/scsi/fcoe/fcoe_ctlr.c 2015-03-24 02:05:12.000000000 +0100
  5911. +++ linux-3.18.10/drivers/scsi/fcoe/fcoe_ctlr.c 2015-03-26 12:42:18.659588317 +0100
  5912. @@ -831,7 +831,7 @@
  5913. INIT_LIST_HEAD(&del_list);
  5914. - stats = per_cpu_ptr(fip->lp->stats, get_cpu());
  5915. + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
  5916. list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
  5917. deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
  5918. @@ -867,7 +867,7 @@
  5919. sel_time = fcf->time;
  5920. }
  5921. }
  5922. - put_cpu();
  5923. + put_cpu_light();
  5924. list_for_each_entry_safe(fcf, next, &del_list, list) {
  5925. /* Removes fcf from current list */
  5926. diff -Nur linux-3.18.10.orig/drivers/scsi/libfc/fc_exch.c linux-3.18.10/drivers/scsi/libfc/fc_exch.c
  5927. --- linux-3.18.10.orig/drivers/scsi/libfc/fc_exch.c 2015-03-24 02:05:12.000000000 +0100
  5928. +++ linux-3.18.10/drivers/scsi/libfc/fc_exch.c 2015-03-26 12:42:18.659588317 +0100
  5929. @@ -816,10 +816,10 @@
  5930. }
  5931. memset(ep, 0, sizeof(*ep));
  5932. - cpu = get_cpu();
  5933. + cpu = get_cpu_light();
  5934. pool = per_cpu_ptr(mp->pool, cpu);
  5935. spin_lock_bh(&pool->lock);
  5936. - put_cpu();
  5937. + put_cpu_light();
  5938. /* peek cache of free slot */
  5939. if (pool->left != FC_XID_UNKNOWN) {
  5940. diff -Nur linux-3.18.10.orig/drivers/scsi/libsas/sas_ata.c linux-3.18.10/drivers/scsi/libsas/sas_ata.c
  5941. --- linux-3.18.10.orig/drivers/scsi/libsas/sas_ata.c 2015-03-24 02:05:12.000000000 +0100
  5942. +++ linux-3.18.10/drivers/scsi/libsas/sas_ata.c 2015-03-26 12:42:18.659588317 +0100
  5943. @@ -191,7 +191,7 @@
  5944. /* TODO: audit callers to ensure they are ready for qc_issue to
  5945. * unconditionally re-enable interrupts
  5946. */
  5947. - local_irq_save(flags);
  5948. + local_irq_save_nort(flags);
  5949. spin_unlock(ap->lock);
  5950. /* If the device fell off, no sense in issuing commands */
  5951. @@ -261,7 +261,7 @@
  5952. out:
  5953. spin_lock(ap->lock);
  5954. - local_irq_restore(flags);
  5955. + local_irq_restore_nort(flags);
  5956. return ret;
  5957. }
  5958. diff -Nur linux-3.18.10.orig/drivers/scsi/qla2xxx/qla_inline.h linux-3.18.10/drivers/scsi/qla2xxx/qla_inline.h
  5959. --- linux-3.18.10.orig/drivers/scsi/qla2xxx/qla_inline.h 2015-03-24 02:05:12.000000000 +0100
  5960. +++ linux-3.18.10/drivers/scsi/qla2xxx/qla_inline.h 2015-03-26 12:42:18.659588317 +0100
  5961. @@ -59,12 +59,12 @@
  5962. {
  5963. unsigned long flags;
  5964. struct qla_hw_data *ha = rsp->hw;
  5965. - local_irq_save(flags);
  5966. + local_irq_save_nort(flags);
  5967. if (IS_P3P_TYPE(ha))
  5968. qla82xx_poll(0, rsp);
  5969. else
  5970. ha->isp_ops->intr_handler(0, rsp);
  5971. - local_irq_restore(flags);
  5972. + local_irq_restore_nort(flags);
  5973. }
  5974. static inline uint8_t *
  5975. diff -Nur linux-3.18.10.orig/drivers/thermal/x86_pkg_temp_thermal.c linux-3.18.10/drivers/thermal/x86_pkg_temp_thermal.c
  5976. --- linux-3.18.10.orig/drivers/thermal/x86_pkg_temp_thermal.c 2015-03-24 02:05:12.000000000 +0100
  5977. +++ linux-3.18.10/drivers/thermal/x86_pkg_temp_thermal.c 2015-03-26 12:42:18.659588317 +0100
  5978. @@ -29,6 +29,7 @@
  5979. #include <linux/pm.h>
  5980. #include <linux/thermal.h>
  5981. #include <linux/debugfs.h>
  5982. +#include <linux/work-simple.h>
  5983. #include <asm/cpu_device_id.h>
  5984. #include <asm/mce.h>
  5985. @@ -352,7 +353,7 @@
  5986. }
  5987. }
  5988. -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  5989. +static void platform_thermal_notify_work(struct swork_event *event)
  5990. {
  5991. unsigned long flags;
  5992. int cpu = smp_processor_id();
  5993. @@ -369,7 +370,7 @@
  5994. pkg_work_scheduled[phy_id]) {
  5995. disable_pkg_thres_interrupt();
  5996. spin_unlock_irqrestore(&pkg_work_lock, flags);
  5997. - return -EINVAL;
  5998. + return;
  5999. }
  6000. pkg_work_scheduled[phy_id] = 1;
  6001. spin_unlock_irqrestore(&pkg_work_lock, flags);
  6002. @@ -378,9 +379,48 @@
  6003. schedule_delayed_work_on(cpu,
  6004. &per_cpu(pkg_temp_thermal_threshold_work, cpu),
  6005. msecs_to_jiffies(notify_delay_ms));
  6006. +}
  6007. +
  6008. +#ifdef CONFIG_PREEMPT_RT_FULL
  6009. +static struct swork_event notify_work;
  6010. +
  6011. +static int thermal_notify_work_init(void)
  6012. +{
  6013. + int err;
  6014. +
  6015. + err = swork_get();
  6016. + if (err)
  6017. + return err;
  6018. +
  6019. + INIT_SWORK(&notify_work, platform_thermal_notify_work);
  6020. return 0;
  6021. }
  6022. +static void thermal_notify_work_cleanup(void)
  6023. +{
  6024. + swork_put();
  6025. +}
  6026. +
  6027. +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  6028. +{
  6029. + swork_queue(&notify_work);
  6030. + return 0;
  6031. +}
  6032. +
  6033. +#else /* !CONFIG_PREEMPT_RT_FULL */
  6034. +
  6035. +static int thermal_notify_work_init(void) { return 0; }
  6036. +
  6037. +static int thermal_notify_work_cleanup(void) { }
  6038. +
  6039. +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  6040. +{
  6041. + platform_thermal_notify_work(NULL);
  6042. +
  6043. + return 0;
  6044. +}
  6045. +#endif /* CONFIG_PREEMPT_RT_FULL */
  6046. +
  6047. static int find_siblings_cpu(int cpu)
  6048. {
  6049. int i;
  6050. @@ -584,6 +624,9 @@
  6051. if (!x86_match_cpu(pkg_temp_thermal_ids))
  6052. return -ENODEV;
  6053. + if (!thermal_notify_work_init())
  6054. + return -ENODEV;
  6055. +
  6056. spin_lock_init(&pkg_work_lock);
  6057. platform_thermal_package_notify =
  6058. pkg_temp_thermal_platform_thermal_notify;
  6059. @@ -608,7 +651,7 @@
  6060. kfree(pkg_work_scheduled);
  6061. platform_thermal_package_notify = NULL;
  6062. platform_thermal_package_rate_control = NULL;
  6063. -
  6064. + thermal_notify_work_cleanup();
  6065. return -ENODEV;
  6066. }
  6067. @@ -633,6 +676,7 @@
  6068. mutex_unlock(&phy_dev_list_mutex);
  6069. platform_thermal_package_notify = NULL;
  6070. platform_thermal_package_rate_control = NULL;
  6071. + thermal_notify_work_cleanup();
  6072. for_each_online_cpu(i)
  6073. cancel_delayed_work_sync(
  6074. &per_cpu(pkg_temp_thermal_threshold_work, i));
  6075. diff -Nur linux-3.18.10.orig/drivers/tty/serial/8250/8250_core.c linux-3.18.10/drivers/tty/serial/8250/8250_core.c
  6076. --- linux-3.18.10.orig/drivers/tty/serial/8250/8250_core.c 2015-03-24 02:05:12.000000000 +0100
  6077. +++ linux-3.18.10/drivers/tty/serial/8250/8250_core.c 2015-03-26 12:42:18.659588317 +0100
  6078. @@ -37,6 +37,7 @@
  6079. #include <linux/nmi.h>
  6080. #include <linux/mutex.h>
  6081. #include <linux/slab.h>
  6082. +#include <linux/kdb.h>
  6083. #include <linux/uaccess.h>
  6084. #include <linux/pm_runtime.h>
  6085. #ifdef CONFIG_SPARC
  6086. @@ -81,7 +82,16 @@
  6087. #define DEBUG_INTR(fmt...) do { } while (0)
  6088. #endif
  6089. -#define PASS_LIMIT 512
  6090. +/*
  6091. + * On -rt we can have a more delays, and legitimately
  6092. + * so - so don't drop work spuriously and spam the
  6093. + * syslog:
  6094. + */
  6095. +#ifdef CONFIG_PREEMPT_RT_FULL
  6096. +# define PASS_LIMIT 1000000
  6097. +#else
  6098. +# define PASS_LIMIT 512
  6099. +#endif
  6100. #define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
  6101. @@ -3197,7 +3207,7 @@
  6102. serial8250_rpm_get(up);
  6103. - if (port->sysrq || oops_in_progress)
  6104. + if (port->sysrq || oops_in_progress || in_kdb_printk())
  6105. locked = spin_trylock_irqsave(&port->lock, flags);
  6106. else
  6107. spin_lock_irqsave(&port->lock, flags);
  6108. diff -Nur linux-3.18.10.orig/drivers/tty/serial/amba-pl011.c linux-3.18.10/drivers/tty/serial/amba-pl011.c
  6109. --- linux-3.18.10.orig/drivers/tty/serial/amba-pl011.c 2015-03-24 02:05:12.000000000 +0100
  6110. +++ linux-3.18.10/drivers/tty/serial/amba-pl011.c 2015-03-26 12:42:18.659588317 +0100
  6111. @@ -1935,13 +1935,19 @@
  6112. clk_enable(uap->clk);
  6113. - local_irq_save(flags);
  6114. + /*
  6115. + * local_irq_save(flags);
  6116. + *
  6117. + * This local_irq_save() is nonsense. If we come in via sysrq
  6118. + * handling then interrupts are already disabled. Aside of
  6119. + * that the port.sysrq check is racy on SMP regardless.
  6120. + */
  6121. if (uap->port.sysrq)
  6122. locked = 0;
  6123. else if (oops_in_progress)
  6124. - locked = spin_trylock(&uap->port.lock);
  6125. + locked = spin_trylock_irqsave(&uap->port.lock, flags);
  6126. else
  6127. - spin_lock(&uap->port.lock);
  6128. + spin_lock_irqsave(&uap->port.lock, flags);
  6129. /*
  6130. * First save the CR then disable the interrupts
  6131. @@ -1963,8 +1969,7 @@
  6132. writew(old_cr, uap->port.membase + UART011_CR);
  6133. if (locked)
  6134. - spin_unlock(&uap->port.lock);
  6135. - local_irq_restore(flags);
  6136. + spin_unlock_irqrestore(&uap->port.lock, flags);
  6137. clk_disable(uap->clk);
  6138. }
  6139. diff -Nur linux-3.18.10.orig/drivers/tty/serial/omap-serial.c linux-3.18.10/drivers/tty/serial/omap-serial.c
  6140. --- linux-3.18.10.orig/drivers/tty/serial/omap-serial.c 2015-03-24 02:05:12.000000000 +0100
  6141. +++ linux-3.18.10/drivers/tty/serial/omap-serial.c 2015-03-26 12:42:18.663588322 +0100
  6142. @@ -1270,13 +1270,10 @@
  6143. pm_runtime_get_sync(up->dev);
  6144. - local_irq_save(flags);
  6145. - if (up->port.sysrq)
  6146. - locked = 0;
  6147. - else if (oops_in_progress)
  6148. - locked = spin_trylock(&up->port.lock);
  6149. + if (up->port.sysrq || oops_in_progress)
  6150. + locked = spin_trylock_irqsave(&up->port.lock, flags);
  6151. else
  6152. - spin_lock(&up->port.lock);
  6153. + spin_lock_irqsave(&up->port.lock, flags);
  6154. /*
  6155. * First save the IER then disable the interrupts
  6156. @@ -1305,8 +1302,7 @@
  6157. pm_runtime_mark_last_busy(up->dev);
  6158. pm_runtime_put_autosuspend(up->dev);
  6159. if (locked)
  6160. - spin_unlock(&up->port.lock);
  6161. - local_irq_restore(flags);
  6162. + spin_unlock_irqrestore(&up->port.lock, flags);
  6163. }
  6164. static int __init
  6165. diff -Nur linux-3.18.10.orig/drivers/usb/core/hcd.c linux-3.18.10/drivers/usb/core/hcd.c
  6166. --- linux-3.18.10.orig/drivers/usb/core/hcd.c 2015-03-24 02:05:12.000000000 +0100
  6167. +++ linux-3.18.10/drivers/usb/core/hcd.c 2015-03-26 12:42:18.663588322 +0100
  6168. @@ -1681,9 +1681,9 @@
  6169. * and no one may trigger the above deadlock situation when
  6170. * running complete() in tasklet.
  6171. */
  6172. - local_irq_save(flags);
  6173. + local_irq_save_nort(flags);
  6174. urb->complete(urb);
  6175. - local_irq_restore(flags);
  6176. + local_irq_restore_nort(flags);
  6177. usb_anchor_resume_wakeups(anchor);
  6178. atomic_dec(&urb->use_count);
  6179. diff -Nur linux-3.18.10.orig/drivers/usb/gadget/function/f_fs.c linux-3.18.10/drivers/usb/gadget/function/f_fs.c
  6180. --- linux-3.18.10.orig/drivers/usb/gadget/function/f_fs.c 2015-03-24 02:05:12.000000000 +0100
  6181. +++ linux-3.18.10/drivers/usb/gadget/function/f_fs.c 2015-03-26 12:42:18.663588322 +0100
  6182. @@ -1428,7 +1428,7 @@
  6183. pr_info("%s(): freeing\n", __func__);
  6184. ffs_data_clear(ffs);
  6185. BUG_ON(waitqueue_active(&ffs->ev.waitq) ||
  6186. - waitqueue_active(&ffs->ep0req_completion.wait));
  6187. + swaitqueue_active(&ffs->ep0req_completion.wait));
  6188. kfree(ffs->dev_name);
  6189. kfree(ffs);
  6190. }
  6191. diff -Nur linux-3.18.10.orig/drivers/usb/gadget/legacy/inode.c linux-3.18.10/drivers/usb/gadget/legacy/inode.c
  6192. --- linux-3.18.10.orig/drivers/usb/gadget/legacy/inode.c 2015-03-24 02:05:12.000000000 +0100
  6193. +++ linux-3.18.10/drivers/usb/gadget/legacy/inode.c 2015-03-26 12:42:18.663588322 +0100
  6194. @@ -339,7 +339,7 @@
  6195. spin_unlock_irq (&epdata->dev->lock);
  6196. if (likely (value == 0)) {
  6197. - value = wait_event_interruptible (done.wait, done.done);
  6198. + value = swait_event_interruptible (done.wait, done.done);
  6199. if (value != 0) {
  6200. spin_lock_irq (&epdata->dev->lock);
  6201. if (likely (epdata->ep != NULL)) {
  6202. @@ -348,7 +348,7 @@
  6203. usb_ep_dequeue (epdata->ep, epdata->req);
  6204. spin_unlock_irq (&epdata->dev->lock);
  6205. - wait_event (done.wait, done.done);
  6206. + swait_event (done.wait, done.done);
  6207. if (epdata->status == -ECONNRESET)
  6208. epdata->status = -EINTR;
  6209. } else {
  6210. diff -Nur linux-3.18.10.orig/fs/aio.c linux-3.18.10/fs/aio.c
  6211. --- linux-3.18.10.orig/fs/aio.c 2015-03-24 02:05:12.000000000 +0100
  6212. +++ linux-3.18.10/fs/aio.c 2015-03-26 12:42:18.663588322 +0100
  6213. @@ -40,6 +40,7 @@
  6214. #include <linux/ramfs.h>
  6215. #include <linux/percpu-refcount.h>
  6216. #include <linux/mount.h>
  6217. +#include <linux/work-simple.h>
  6218. #include <asm/kmap_types.h>
  6219. #include <asm/uaccess.h>
  6220. @@ -110,7 +111,7 @@
  6221. struct page **ring_pages;
  6222. long nr_pages;
  6223. - struct work_struct free_work;
  6224. + struct swork_event free_work;
  6225. /*
  6226. * signals when all in-flight requests are done
  6227. @@ -226,6 +227,7 @@
  6228. .mount = aio_mount,
  6229. .kill_sb = kill_anon_super,
  6230. };
  6231. + BUG_ON(swork_get());
  6232. aio_mnt = kern_mount(&aio_fs);
  6233. if (IS_ERR(aio_mnt))
  6234. panic("Failed to create aio fs mount.");
  6235. @@ -505,9 +507,9 @@
  6236. return cancel(kiocb);
  6237. }
  6238. -static void free_ioctx(struct work_struct *work)
  6239. +static void free_ioctx(struct swork_event *sev)
  6240. {
  6241. - struct kioctx *ctx = container_of(work, struct kioctx, free_work);
  6242. + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
  6243. pr_debug("freeing %p\n", ctx);
  6244. @@ -526,8 +528,8 @@
  6245. if (ctx->requests_done)
  6246. complete(ctx->requests_done);
  6247. - INIT_WORK(&ctx->free_work, free_ioctx);
  6248. - schedule_work(&ctx->free_work);
  6249. + INIT_SWORK(&ctx->free_work, free_ioctx);
  6250. + swork_queue(&ctx->free_work);
  6251. }
  6252. /*
  6253. @@ -535,9 +537,9 @@
  6254. * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
  6255. * now it's safe to cancel any that need to be.
  6256. */
  6257. -static void free_ioctx_users(struct percpu_ref *ref)
  6258. +static void free_ioctx_users_work(struct swork_event *sev)
  6259. {
  6260. - struct kioctx *ctx = container_of(ref, struct kioctx, users);
  6261. + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
  6262. struct kiocb *req;
  6263. spin_lock_irq(&ctx->ctx_lock);
  6264. @@ -556,6 +558,14 @@
  6265. percpu_ref_put(&ctx->reqs);
  6266. }
  6267. +static void free_ioctx_users(struct percpu_ref *ref)
  6268. +{
  6269. + struct kioctx *ctx = container_of(ref, struct kioctx, users);
  6270. +
  6271. + INIT_SWORK(&ctx->free_work, free_ioctx_users_work);
  6272. + swork_queue(&ctx->free_work);
  6273. +}
  6274. +
  6275. static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
  6276. {
  6277. unsigned i, new_nr;
  6278. diff -Nur linux-3.18.10.orig/fs/autofs4/autofs_i.h linux-3.18.10/fs/autofs4/autofs_i.h
  6279. --- linux-3.18.10.orig/fs/autofs4/autofs_i.h 2015-03-24 02:05:12.000000000 +0100
  6280. +++ linux-3.18.10/fs/autofs4/autofs_i.h 2015-03-26 12:42:18.663588322 +0100
  6281. @@ -34,6 +34,7 @@
  6282. #include <linux/sched.h>
  6283. #include <linux/mount.h>
  6284. #include <linux/namei.h>
  6285. +#include <linux/delay.h>
  6286. #include <asm/current.h>
  6287. #include <asm/uaccess.h>
  6288. diff -Nur linux-3.18.10.orig/fs/autofs4/expire.c linux-3.18.10/fs/autofs4/expire.c
  6289. --- linux-3.18.10.orig/fs/autofs4/expire.c 2015-03-24 02:05:12.000000000 +0100
  6290. +++ linux-3.18.10/fs/autofs4/expire.c 2015-03-26 12:42:18.663588322 +0100
  6291. @@ -151,7 +151,7 @@
  6292. parent = p->d_parent;
  6293. if (!spin_trylock(&parent->d_lock)) {
  6294. spin_unlock(&p->d_lock);
  6295. - cpu_relax();
  6296. + cpu_chill();
  6297. goto relock;
  6298. }
  6299. spin_unlock(&p->d_lock);
  6300. diff -Nur linux-3.18.10.orig/fs/buffer.c linux-3.18.10/fs/buffer.c
  6301. --- linux-3.18.10.orig/fs/buffer.c 2015-03-24 02:05:12.000000000 +0100
  6302. +++ linux-3.18.10/fs/buffer.c 2015-03-26 12:42:18.663588322 +0100
  6303. @@ -301,8 +301,7 @@
  6304. * decide that the page is now completely done.
  6305. */
  6306. first = page_buffers(page);
  6307. - local_irq_save(flags);
  6308. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  6309. + flags = bh_uptodate_lock_irqsave(first);
  6310. clear_buffer_async_read(bh);
  6311. unlock_buffer(bh);
  6312. tmp = bh;
  6313. @@ -315,8 +314,7 @@
  6314. }
  6315. tmp = tmp->b_this_page;
  6316. } while (tmp != bh);
  6317. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6318. - local_irq_restore(flags);
  6319. + bh_uptodate_unlock_irqrestore(first, flags);
  6320. /*
  6321. * If none of the buffers had errors and they are all
  6322. @@ -328,9 +326,7 @@
  6323. return;
  6324. still_busy:
  6325. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6326. - local_irq_restore(flags);
  6327. - return;
  6328. + bh_uptodate_unlock_irqrestore(first, flags);
  6329. }
  6330. /*
  6331. @@ -358,8 +354,7 @@
  6332. }
  6333. first = page_buffers(page);
  6334. - local_irq_save(flags);
  6335. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  6336. + flags = bh_uptodate_lock_irqsave(first);
  6337. clear_buffer_async_write(bh);
  6338. unlock_buffer(bh);
  6339. @@ -371,15 +366,12 @@
  6340. }
  6341. tmp = tmp->b_this_page;
  6342. }
  6343. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6344. - local_irq_restore(flags);
  6345. + bh_uptodate_unlock_irqrestore(first, flags);
  6346. end_page_writeback(page);
  6347. return;
  6348. still_busy:
  6349. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6350. - local_irq_restore(flags);
  6351. - return;
  6352. + bh_uptodate_unlock_irqrestore(first, flags);
  6353. }
  6354. EXPORT_SYMBOL(end_buffer_async_write);
  6355. @@ -3325,6 +3317,7 @@
  6356. struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
  6357. if (ret) {
  6358. INIT_LIST_HEAD(&ret->b_assoc_buffers);
  6359. + buffer_head_init_locks(ret);
  6360. preempt_disable();
  6361. __this_cpu_inc(bh_accounting.nr);
  6362. recalc_bh_state();
  6363. diff -Nur linux-3.18.10.orig/fs/dcache.c linux-3.18.10/fs/dcache.c
  6364. --- linux-3.18.10.orig/fs/dcache.c 2015-03-24 02:05:12.000000000 +0100
  6365. +++ linux-3.18.10/fs/dcache.c 2015-03-26 12:42:18.663588322 +0100
  6366. @@ -19,6 +19,7 @@
  6367. #include <linux/mm.h>
  6368. #include <linux/fs.h>
  6369. #include <linux/fsnotify.h>
  6370. +#include <linux/delay.h>
  6371. #include <linux/slab.h>
  6372. #include <linux/init.h>
  6373. #include <linux/hash.h>
  6374. @@ -552,7 +553,7 @@
  6375. failed:
  6376. spin_unlock(&dentry->d_lock);
  6377. - cpu_relax();
  6378. + cpu_chill();
  6379. return dentry; /* try again with same dentry */
  6380. }
  6381. @@ -2285,7 +2286,7 @@
  6382. if (dentry->d_lockref.count == 1) {
  6383. if (!spin_trylock(&inode->i_lock)) {
  6384. spin_unlock(&dentry->d_lock);
  6385. - cpu_relax();
  6386. + cpu_chill();
  6387. goto again;
  6388. }
  6389. dentry->d_flags &= ~DCACHE_CANT_MOUNT;
  6390. diff -Nur linux-3.18.10.orig/fs/eventpoll.c linux-3.18.10/fs/eventpoll.c
  6391. --- linux-3.18.10.orig/fs/eventpoll.c 2015-03-24 02:05:12.000000000 +0100
  6392. +++ linux-3.18.10/fs/eventpoll.c 2015-03-26 12:42:18.663588322 +0100
  6393. @@ -505,12 +505,12 @@
  6394. */
  6395. static void ep_poll_safewake(wait_queue_head_t *wq)
  6396. {
  6397. - int this_cpu = get_cpu();
  6398. + int this_cpu = get_cpu_light();
  6399. ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
  6400. ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
  6401. - put_cpu();
  6402. + put_cpu_light();
  6403. }
  6404. static void ep_remove_wait_queue(struct eppoll_entry *pwq)
  6405. diff -Nur linux-3.18.10.orig/fs/exec.c linux-3.18.10/fs/exec.c
  6406. --- linux-3.18.10.orig/fs/exec.c 2015-03-24 02:05:12.000000000 +0100
  6407. +++ linux-3.18.10/fs/exec.c 2015-03-26 12:42:18.663588322 +0100
  6408. @@ -841,12 +841,14 @@
  6409. }
  6410. }
  6411. task_lock(tsk);
  6412. + preempt_disable_rt();
  6413. active_mm = tsk->active_mm;
  6414. tsk->mm = mm;
  6415. tsk->active_mm = mm;
  6416. activate_mm(active_mm, mm);
  6417. tsk->mm->vmacache_seqnum = 0;
  6418. vmacache_flush(tsk);
  6419. + preempt_enable_rt();
  6420. task_unlock(tsk);
  6421. if (old_mm) {
  6422. up_read(&old_mm->mmap_sem);
  6423. diff -Nur linux-3.18.10.orig/fs/jbd/checkpoint.c linux-3.18.10/fs/jbd/checkpoint.c
  6424. --- linux-3.18.10.orig/fs/jbd/checkpoint.c 2015-03-24 02:05:12.000000000 +0100
  6425. +++ linux-3.18.10/fs/jbd/checkpoint.c 2015-03-26 12:42:18.663588322 +0100
  6426. @@ -129,6 +129,8 @@
  6427. if (journal->j_flags & JFS_ABORT)
  6428. return;
  6429. spin_unlock(&journal->j_state_lock);
  6430. + if (current->plug)
  6431. + io_schedule();
  6432. mutex_lock(&journal->j_checkpoint_mutex);
  6433. /*
  6434. diff -Nur linux-3.18.10.orig/fs/jbd2/checkpoint.c linux-3.18.10/fs/jbd2/checkpoint.c
  6435. --- linux-3.18.10.orig/fs/jbd2/checkpoint.c 2015-03-24 02:05:12.000000000 +0100
  6436. +++ linux-3.18.10/fs/jbd2/checkpoint.c 2015-03-26 12:42:18.663588322 +0100
  6437. @@ -116,6 +116,8 @@
  6438. nblocks = jbd2_space_needed(journal);
  6439. while (jbd2_log_space_left(journal) < nblocks) {
  6440. write_unlock(&journal->j_state_lock);
  6441. + if (current->plug)
  6442. + io_schedule();
  6443. mutex_lock(&journal->j_checkpoint_mutex);
  6444. /*
  6445. diff -Nur linux-3.18.10.orig/fs/namespace.c linux-3.18.10/fs/namespace.c
  6446. --- linux-3.18.10.orig/fs/namespace.c 2015-03-24 02:05:12.000000000 +0100
  6447. +++ linux-3.18.10/fs/namespace.c 2015-03-26 12:42:18.663588322 +0100
  6448. @@ -14,6 +14,7 @@
  6449. #include <linux/mnt_namespace.h>
  6450. #include <linux/user_namespace.h>
  6451. #include <linux/namei.h>
  6452. +#include <linux/delay.h>
  6453. #include <linux/security.h>
  6454. #include <linux/idr.h>
  6455. #include <linux/init.h> /* init_rootfs */
  6456. @@ -344,8 +345,11 @@
  6457. * incremented count after it has set MNT_WRITE_HOLD.
  6458. */
  6459. smp_mb();
  6460. - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
  6461. - cpu_relax();
  6462. + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
  6463. + preempt_enable();
  6464. + cpu_chill();
  6465. + preempt_disable();
  6466. + }
  6467. /*
  6468. * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
  6469. * be set to match its requirements. So we must not load that until
  6470. diff -Nur linux-3.18.10.orig/fs/ntfs/aops.c linux-3.18.10/fs/ntfs/aops.c
  6471. --- linux-3.18.10.orig/fs/ntfs/aops.c 2015-03-24 02:05:12.000000000 +0100
  6472. +++ linux-3.18.10/fs/ntfs/aops.c 2015-03-26 12:42:18.663588322 +0100
  6473. @@ -107,8 +107,7 @@
  6474. "0x%llx.", (unsigned long long)bh->b_blocknr);
  6475. }
  6476. first = page_buffers(page);
  6477. - local_irq_save(flags);
  6478. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  6479. + flags = bh_uptodate_lock_irqsave(first);
  6480. clear_buffer_async_read(bh);
  6481. unlock_buffer(bh);
  6482. tmp = bh;
  6483. @@ -123,8 +122,7 @@
  6484. }
  6485. tmp = tmp->b_this_page;
  6486. } while (tmp != bh);
  6487. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6488. - local_irq_restore(flags);
  6489. + bh_uptodate_unlock_irqrestore(first, flags);
  6490. /*
  6491. * If none of the buffers had errors then we can set the page uptodate,
  6492. * but we first have to perform the post read mst fixups, if the
  6493. @@ -145,13 +143,13 @@
  6494. recs = PAGE_CACHE_SIZE / rec_size;
  6495. /* Should have been verified before we got here... */
  6496. BUG_ON(!recs);
  6497. - local_irq_save(flags);
  6498. + local_irq_save_nort(flags);
  6499. kaddr = kmap_atomic(page);
  6500. for (i = 0; i < recs; i++)
  6501. post_read_mst_fixup((NTFS_RECORD*)(kaddr +
  6502. i * rec_size), rec_size);
  6503. kunmap_atomic(kaddr);
  6504. - local_irq_restore(flags);
  6505. + local_irq_restore_nort(flags);
  6506. flush_dcache_page(page);
  6507. if (likely(page_uptodate && !PageError(page)))
  6508. SetPageUptodate(page);
  6509. @@ -159,9 +157,7 @@
  6510. unlock_page(page);
  6511. return;
  6512. still_busy:
  6513. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6514. - local_irq_restore(flags);
  6515. - return;
  6516. + bh_uptodate_unlock_irqrestore(first, flags);
  6517. }
  6518. /**
  6519. diff -Nur linux-3.18.10.orig/fs/timerfd.c linux-3.18.10/fs/timerfd.c
  6520. --- linux-3.18.10.orig/fs/timerfd.c 2015-03-24 02:05:12.000000000 +0100
  6521. +++ linux-3.18.10/fs/timerfd.c 2015-03-26 12:42:18.663588322 +0100
  6522. @@ -449,7 +449,10 @@
  6523. break;
  6524. }
  6525. spin_unlock_irq(&ctx->wqh.lock);
  6526. - cpu_relax();
  6527. + if (isalarm(ctx))
  6528. + hrtimer_wait_for_timer(&ctx->t.alarm.timer);
  6529. + else
  6530. + hrtimer_wait_for_timer(&ctx->t.tmr);
  6531. }
  6532. /*
  6533. diff -Nur linux-3.18.10.orig/include/acpi/platform/aclinux.h linux-3.18.10/include/acpi/platform/aclinux.h
  6534. --- linux-3.18.10.orig/include/acpi/platform/aclinux.h 2015-03-24 02:05:12.000000000 +0100
  6535. +++ linux-3.18.10/include/acpi/platform/aclinux.h 2015-03-26 12:42:18.663588322 +0100
  6536. @@ -123,6 +123,7 @@
  6537. #define acpi_cache_t struct kmem_cache
  6538. #define acpi_spinlock spinlock_t *
  6539. +#define acpi_raw_spinlock raw_spinlock_t *
  6540. #define acpi_cpu_flags unsigned long
  6541. /* Use native linux version of acpi_os_allocate_zeroed */
  6542. @@ -141,6 +142,20 @@
  6543. #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id
  6544. #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock
  6545. +#define acpi_os_create_raw_lock(__handle) \
  6546. +({ \
  6547. + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \
  6548. + \
  6549. + if (lock) { \
  6550. + *(__handle) = lock; \
  6551. + raw_spin_lock_init(*(__handle)); \
  6552. + } \
  6553. + lock ? AE_OK : AE_NO_MEMORY; \
  6554. + })
  6555. +
  6556. +#define acpi_os_delete_raw_lock(__handle) kfree(__handle)
  6557. +
  6558. +
  6559. /*
  6560. * OSL interfaces used by debugger/disassembler
  6561. */
  6562. diff -Nur linux-3.18.10.orig/include/asm-generic/bug.h linux-3.18.10/include/asm-generic/bug.h
  6563. --- linux-3.18.10.orig/include/asm-generic/bug.h 2015-03-24 02:05:12.000000000 +0100
  6564. +++ linux-3.18.10/include/asm-generic/bug.h 2015-03-26 12:42:18.663588322 +0100
  6565. @@ -206,6 +206,20 @@
  6566. # define WARN_ON_SMP(x) ({0;})
  6567. #endif
  6568. +#ifdef CONFIG_PREEMPT_RT_BASE
  6569. +# define BUG_ON_RT(c) BUG_ON(c)
  6570. +# define BUG_ON_NONRT(c) do { } while (0)
  6571. +# define WARN_ON_RT(condition) WARN_ON(condition)
  6572. +# define WARN_ON_NONRT(condition) do { } while (0)
  6573. +# define WARN_ON_ONCE_NONRT(condition) do { } while (0)
  6574. +#else
  6575. +# define BUG_ON_RT(c) do { } while (0)
  6576. +# define BUG_ON_NONRT(c) BUG_ON(c)
  6577. +# define WARN_ON_RT(condition) do { } while (0)
  6578. +# define WARN_ON_NONRT(condition) WARN_ON(condition)
  6579. +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition)
  6580. +#endif
  6581. +
  6582. #endif /* __ASSEMBLY__ */
  6583. #endif
  6584. diff -Nur linux-3.18.10.orig/include/linux/blkdev.h linux-3.18.10/include/linux/blkdev.h
  6585. --- linux-3.18.10.orig/include/linux/blkdev.h 2015-03-24 02:05:12.000000000 +0100
  6586. +++ linux-3.18.10/include/linux/blkdev.h 2015-03-26 12:42:18.663588322 +0100
  6587. @@ -101,6 +101,7 @@
  6588. struct list_head queuelist;
  6589. union {
  6590. struct call_single_data csd;
  6591. + struct work_struct work;
  6592. unsigned long fifo_time;
  6593. };
  6594. @@ -478,7 +479,7 @@
  6595. struct throtl_data *td;
  6596. #endif
  6597. struct rcu_head rcu_head;
  6598. - wait_queue_head_t mq_freeze_wq;
  6599. + struct swait_head mq_freeze_wq;
  6600. struct percpu_ref mq_usage_counter;
  6601. struct list_head all_q_node;
  6602. diff -Nur linux-3.18.10.orig/include/linux/blk-mq.h linux-3.18.10/include/linux/blk-mq.h
  6603. --- linux-3.18.10.orig/include/linux/blk-mq.h 2015-03-24 02:05:12.000000000 +0100
  6604. +++ linux-3.18.10/include/linux/blk-mq.h 2015-03-26 12:42:18.663588322 +0100
  6605. @@ -169,6 +169,7 @@
  6606. struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
  6607. struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
  6608. +void __blk_mq_complete_request_remote_work(struct work_struct *work);
  6609. void blk_mq_start_request(struct request *rq);
  6610. void blk_mq_end_request(struct request *rq, int error);
  6611. diff -Nur linux-3.18.10.orig/include/linux/bottom_half.h linux-3.18.10/include/linux/bottom_half.h
  6612. --- linux-3.18.10.orig/include/linux/bottom_half.h 2015-03-24 02:05:12.000000000 +0100
  6613. +++ linux-3.18.10/include/linux/bottom_half.h 2015-03-26 12:42:18.663588322 +0100
  6614. @@ -4,6 +4,17 @@
  6615. #include <linux/preempt.h>
  6616. #include <linux/preempt_mask.h>
  6617. +#ifdef CONFIG_PREEMPT_RT_FULL
  6618. +
  6619. +extern void local_bh_disable(void);
  6620. +extern void _local_bh_enable(void);
  6621. +extern void local_bh_enable(void);
  6622. +extern void local_bh_enable_ip(unsigned long ip);
  6623. +extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
  6624. +extern void __local_bh_enable_ip(unsigned long ip, unsigned int cnt);
  6625. +
  6626. +#else
  6627. +
  6628. #ifdef CONFIG_TRACE_IRQFLAGS
  6629. extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
  6630. #else
  6631. @@ -31,5 +42,6 @@
  6632. {
  6633. __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
  6634. }
  6635. +#endif
  6636. #endif /* _LINUX_BH_H */
  6637. diff -Nur linux-3.18.10.orig/include/linux/buffer_head.h linux-3.18.10/include/linux/buffer_head.h
  6638. --- linux-3.18.10.orig/include/linux/buffer_head.h 2015-03-24 02:05:12.000000000 +0100
  6639. +++ linux-3.18.10/include/linux/buffer_head.h 2015-03-26 12:42:18.663588322 +0100
  6640. @@ -75,8 +75,52 @@
  6641. struct address_space *b_assoc_map; /* mapping this buffer is
  6642. associated with */
  6643. atomic_t b_count; /* users using this buffer_head */
  6644. +#ifdef CONFIG_PREEMPT_RT_BASE
  6645. + spinlock_t b_uptodate_lock;
  6646. +#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
  6647. + defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
  6648. + spinlock_t b_state_lock;
  6649. + spinlock_t b_journal_head_lock;
  6650. +#endif
  6651. +#endif
  6652. };
  6653. +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
  6654. +{
  6655. + unsigned long flags;
  6656. +
  6657. +#ifndef CONFIG_PREEMPT_RT_BASE
  6658. + local_irq_save(flags);
  6659. + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
  6660. +#else
  6661. + spin_lock_irqsave(&bh->b_uptodate_lock, flags);
  6662. +#endif
  6663. + return flags;
  6664. +}
  6665. +
  6666. +static inline void
  6667. +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
  6668. +{
  6669. +#ifndef CONFIG_PREEMPT_RT_BASE
  6670. + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
  6671. + local_irq_restore(flags);
  6672. +#else
  6673. + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
  6674. +#endif
  6675. +}
  6676. +
  6677. +static inline void buffer_head_init_locks(struct buffer_head *bh)
  6678. +{
  6679. +#ifdef CONFIG_PREEMPT_RT_BASE
  6680. + spin_lock_init(&bh->b_uptodate_lock);
  6681. +#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
  6682. + defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
  6683. + spin_lock_init(&bh->b_state_lock);
  6684. + spin_lock_init(&bh->b_journal_head_lock);
  6685. +#endif
  6686. +#endif
  6687. +}
  6688. +
  6689. /*
  6690. * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
  6691. * and buffer_foo() functions.
  6692. diff -Nur linux-3.18.10.orig/include/linux/cgroup.h linux-3.18.10/include/linux/cgroup.h
  6693. --- linux-3.18.10.orig/include/linux/cgroup.h 2015-03-24 02:05:12.000000000 +0100
  6694. +++ linux-3.18.10/include/linux/cgroup.h 2015-03-26 12:42:18.663588322 +0100
  6695. @@ -22,6 +22,7 @@
  6696. #include <linux/seq_file.h>
  6697. #include <linux/kernfs.h>
  6698. #include <linux/wait.h>
  6699. +#include <linux/work-simple.h>
  6700. #ifdef CONFIG_CGROUPS
  6701. @@ -91,6 +92,7 @@
  6702. /* percpu_ref killing and RCU release */
  6703. struct rcu_head rcu_head;
  6704. struct work_struct destroy_work;
  6705. + struct swork_event destroy_swork;
  6706. };
  6707. /* bits in struct cgroup_subsys_state flags field */
  6708. diff -Nur linux-3.18.10.orig/include/linux/completion.h linux-3.18.10/include/linux/completion.h
  6709. --- linux-3.18.10.orig/include/linux/completion.h 2015-03-24 02:05:12.000000000 +0100
  6710. +++ linux-3.18.10/include/linux/completion.h 2015-03-26 12:42:18.663588322 +0100
  6711. @@ -7,8 +7,7 @@
  6712. * Atomic wait-for-completion handler data structures.
  6713. * See kernel/sched/completion.c for details.
  6714. */
  6715. -
  6716. -#include <linux/wait.h>
  6717. +#include <linux/wait-simple.h>
  6718. /*
  6719. * struct completion - structure used to maintain state for a "completion"
  6720. @@ -24,11 +23,11 @@
  6721. */
  6722. struct completion {
  6723. unsigned int done;
  6724. - wait_queue_head_t wait;
  6725. + struct swait_head wait;
  6726. };
  6727. #define COMPLETION_INITIALIZER(work) \
  6728. - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
  6729. + { 0, SWAIT_HEAD_INITIALIZER((work).wait) }
  6730. #define COMPLETION_INITIALIZER_ONSTACK(work) \
  6731. ({ init_completion(&work); work; })
  6732. @@ -73,7 +72,7 @@
  6733. static inline void init_completion(struct completion *x)
  6734. {
  6735. x->done = 0;
  6736. - init_waitqueue_head(&x->wait);
  6737. + init_swait_head(&x->wait);
  6738. }
  6739. /**
  6740. diff -Nur linux-3.18.10.orig/include/linux/cpu.h linux-3.18.10/include/linux/cpu.h
  6741. --- linux-3.18.10.orig/include/linux/cpu.h 2015-03-24 02:05:12.000000000 +0100
  6742. +++ linux-3.18.10/include/linux/cpu.h 2015-03-26 12:42:18.663588322 +0100
  6743. @@ -217,6 +217,8 @@
  6744. extern void put_online_cpus(void);
  6745. extern void cpu_hotplug_disable(void);
  6746. extern void cpu_hotplug_enable(void);
  6747. +extern void pin_current_cpu(void);
  6748. +extern void unpin_current_cpu(void);
  6749. #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
  6750. #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri)
  6751. #define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
  6752. @@ -235,6 +237,8 @@
  6753. #define put_online_cpus() do { } while (0)
  6754. #define cpu_hotplug_disable() do { } while (0)
  6755. #define cpu_hotplug_enable() do { } while (0)
  6756. +static inline void pin_current_cpu(void) { }
  6757. +static inline void unpin_current_cpu(void) { }
  6758. #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
  6759. #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
  6760. /* These aren't inline functions due to a GCC bug. */
  6761. diff -Nur linux-3.18.10.orig/include/linux/delay.h linux-3.18.10/include/linux/delay.h
  6762. --- linux-3.18.10.orig/include/linux/delay.h 2015-03-24 02:05:12.000000000 +0100
  6763. +++ linux-3.18.10/include/linux/delay.h 2015-03-26 12:42:18.663588322 +0100
  6764. @@ -52,4 +52,10 @@
  6765. msleep(seconds * 1000);
  6766. }
  6767. +#ifdef CONFIG_PREEMPT_RT_FULL
  6768. +extern void cpu_chill(void);
  6769. +#else
  6770. +# define cpu_chill() cpu_relax()
  6771. +#endif
  6772. +
  6773. #endif /* defined(_LINUX_DELAY_H) */
  6774. diff -Nur linux-3.18.10.orig/include/linux/ftrace_event.h linux-3.18.10/include/linux/ftrace_event.h
  6775. --- linux-3.18.10.orig/include/linux/ftrace_event.h 2015-03-24 02:05:12.000000000 +0100
  6776. +++ linux-3.18.10/include/linux/ftrace_event.h 2015-03-26 12:42:18.663588322 +0100
  6777. @@ -61,6 +61,9 @@
  6778. unsigned char flags;
  6779. unsigned char preempt_count;
  6780. int pid;
  6781. + unsigned short migrate_disable;
  6782. + unsigned short padding;
  6783. + unsigned char preempt_lazy_count;
  6784. };
  6785. #define FTRACE_MAX_EVENT \
  6786. diff -Nur linux-3.18.10.orig/include/linux/highmem.h linux-3.18.10/include/linux/highmem.h
  6787. --- linux-3.18.10.orig/include/linux/highmem.h 2015-03-24 02:05:12.000000000 +0100
  6788. +++ linux-3.18.10/include/linux/highmem.h 2015-03-26 12:42:18.663588322 +0100
  6789. @@ -7,6 +7,7 @@
  6790. #include <linux/mm.h>
  6791. #include <linux/uaccess.h>
  6792. #include <linux/hardirq.h>
  6793. +#include <linux/sched.h>
  6794. #include <asm/cacheflush.h>
  6795. @@ -85,32 +86,51 @@
  6796. #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
  6797. +#ifndef CONFIG_PREEMPT_RT_FULL
  6798. DECLARE_PER_CPU(int, __kmap_atomic_idx);
  6799. +#endif
  6800. static inline int kmap_atomic_idx_push(void)
  6801. {
  6802. +#ifndef CONFIG_PREEMPT_RT_FULL
  6803. int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
  6804. -#ifdef CONFIG_DEBUG_HIGHMEM
  6805. +# ifdef CONFIG_DEBUG_HIGHMEM
  6806. WARN_ON_ONCE(in_irq() && !irqs_disabled());
  6807. BUG_ON(idx >= KM_TYPE_NR);
  6808. -#endif
  6809. +# endif
  6810. return idx;
  6811. +#else
  6812. + current->kmap_idx++;
  6813. + BUG_ON(current->kmap_idx > KM_TYPE_NR);
  6814. + return current->kmap_idx - 1;
  6815. +#endif
  6816. }
  6817. static inline int kmap_atomic_idx(void)
  6818. {
  6819. +#ifndef CONFIG_PREEMPT_RT_FULL
  6820. return __this_cpu_read(__kmap_atomic_idx) - 1;
  6821. +#else
  6822. + return current->kmap_idx - 1;
  6823. +#endif
  6824. }
  6825. static inline void kmap_atomic_idx_pop(void)
  6826. {
  6827. -#ifdef CONFIG_DEBUG_HIGHMEM
  6828. +#ifndef CONFIG_PREEMPT_RT_FULL
  6829. +# ifdef CONFIG_DEBUG_HIGHMEM
  6830. int idx = __this_cpu_dec_return(__kmap_atomic_idx);
  6831. BUG_ON(idx < 0);
  6832. -#else
  6833. +# else
  6834. __this_cpu_dec(__kmap_atomic_idx);
  6835. +# endif
  6836. +#else
  6837. + current->kmap_idx--;
  6838. +# ifdef CONFIG_DEBUG_HIGHMEM
  6839. + BUG_ON(current->kmap_idx < 0);
  6840. +# endif
  6841. #endif
  6842. }
  6843. diff -Nur linux-3.18.10.orig/include/linux/hrtimer.h linux-3.18.10/include/linux/hrtimer.h
  6844. --- linux-3.18.10.orig/include/linux/hrtimer.h 2015-03-24 02:05:12.000000000 +0100
  6845. +++ linux-3.18.10/include/linux/hrtimer.h 2015-03-26 12:42:18.667588326 +0100
  6846. @@ -111,6 +111,11 @@
  6847. enum hrtimer_restart (*function)(struct hrtimer *);
  6848. struct hrtimer_clock_base *base;
  6849. unsigned long state;
  6850. + struct list_head cb_entry;
  6851. + int irqsafe;
  6852. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  6853. + ktime_t praecox;
  6854. +#endif
  6855. #ifdef CONFIG_TIMER_STATS
  6856. int start_pid;
  6857. void *start_site;
  6858. @@ -147,6 +152,7 @@
  6859. int index;
  6860. clockid_t clockid;
  6861. struct timerqueue_head active;
  6862. + struct list_head expired;
  6863. ktime_t resolution;
  6864. ktime_t (*get_time)(void);
  6865. ktime_t softirq_time;
  6866. @@ -192,6 +198,9 @@
  6867. unsigned long nr_hangs;
  6868. ktime_t max_hang_time;
  6869. #endif
  6870. +#ifdef CONFIG_PREEMPT_RT_BASE
  6871. + wait_queue_head_t wait;
  6872. +#endif
  6873. struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
  6874. };
  6875. @@ -379,6 +388,13 @@
  6876. return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
  6877. }
  6878. +/* Softirq preemption could deadlock timer removal */
  6879. +#ifdef CONFIG_PREEMPT_RT_BASE
  6880. + extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
  6881. +#else
  6882. +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
  6883. +#endif
  6884. +
  6885. /* Query timers: */
  6886. extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
  6887. extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
  6888. diff -Nur linux-3.18.10.orig/include/linux/idr.h linux-3.18.10/include/linux/idr.h
  6889. --- linux-3.18.10.orig/include/linux/idr.h 2015-03-24 02:05:12.000000000 +0100
  6890. +++ linux-3.18.10/include/linux/idr.h 2015-03-26 12:42:18.667588326 +0100
  6891. @@ -95,10 +95,14 @@
  6892. * Each idr_preload() should be matched with an invocation of this
  6893. * function. See idr_preload() for details.
  6894. */
  6895. +#ifdef CONFIG_PREEMPT_RT_FULL
  6896. +void idr_preload_end(void);
  6897. +#else
  6898. static inline void idr_preload_end(void)
  6899. {
  6900. preempt_enable();
  6901. }
  6902. +#endif
  6903. /**
  6904. * idr_find - return pointer for given id
  6905. diff -Nur linux-3.18.10.orig/include/linux/init_task.h linux-3.18.10/include/linux/init_task.h
  6906. --- linux-3.18.10.orig/include/linux/init_task.h 2015-03-24 02:05:12.000000000 +0100
  6907. +++ linux-3.18.10/include/linux/init_task.h 2015-03-26 12:42:18.667588326 +0100
  6908. @@ -147,9 +147,16 @@
  6909. # define INIT_PERF_EVENTS(tsk)
  6910. #endif
  6911. +#ifdef CONFIG_PREEMPT_RT_BASE
  6912. +# define INIT_TIMER_LIST .posix_timer_list = NULL,
  6913. +#else
  6914. +# define INIT_TIMER_LIST
  6915. +#endif
  6916. +
  6917. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  6918. # define INIT_VTIME(tsk) \
  6919. - .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
  6920. + .vtime_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.vtime_lock), \
  6921. + .vtime_seq = SEQCNT_ZERO(tsk.vtime_seq), \
  6922. .vtime_snap = 0, \
  6923. .vtime_snap_whence = VTIME_SYS,
  6924. #else
  6925. @@ -219,6 +226,7 @@
  6926. .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
  6927. .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
  6928. .timer_slack_ns = 50000, /* 50 usec default slack */ \
  6929. + INIT_TIMER_LIST \
  6930. .pids = { \
  6931. [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
  6932. [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
  6933. diff -Nur linux-3.18.10.orig/include/linux/interrupt.h linux-3.18.10/include/linux/interrupt.h
  6934. --- linux-3.18.10.orig/include/linux/interrupt.h 2015-03-24 02:05:12.000000000 +0100
  6935. +++ linux-3.18.10/include/linux/interrupt.h 2015-03-26 12:42:18.667588326 +0100
  6936. @@ -57,6 +57,7 @@
  6937. * IRQF_NO_THREAD - Interrupt cannot be threaded
  6938. * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device
  6939. * resume time.
  6940. + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT)
  6941. */
  6942. #define IRQF_DISABLED 0x00000020
  6943. #define IRQF_SHARED 0x00000080
  6944. @@ -70,6 +71,7 @@
  6945. #define IRQF_FORCE_RESUME 0x00008000
  6946. #define IRQF_NO_THREAD 0x00010000
  6947. #define IRQF_EARLY_RESUME 0x00020000
  6948. +#define IRQF_NO_SOFTIRQ_CALL 0x00080000
  6949. #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
  6950. @@ -180,7 +182,7 @@
  6951. #ifdef CONFIG_LOCKDEP
  6952. # define local_irq_enable_in_hardirq() do { } while (0)
  6953. #else
  6954. -# define local_irq_enable_in_hardirq() local_irq_enable()
  6955. +# define local_irq_enable_in_hardirq() local_irq_enable_nort()
  6956. #endif
  6957. extern void disable_irq_nosync(unsigned int irq);
  6958. @@ -210,6 +212,7 @@
  6959. unsigned int irq;
  6960. struct kref kref;
  6961. struct work_struct work;
  6962. + struct list_head list;
  6963. void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
  6964. void (*release)(struct kref *ref);
  6965. };
  6966. @@ -358,9 +361,13 @@
  6967. #ifdef CONFIG_IRQ_FORCED_THREADING
  6968. +# ifndef CONFIG_PREEMPT_RT_BASE
  6969. extern bool force_irqthreads;
  6970. +# else
  6971. +# define force_irqthreads (true)
  6972. +# endif
  6973. #else
  6974. -#define force_irqthreads (0)
  6975. +#define force_irqthreads (false)
  6976. #endif
  6977. #ifndef __ARCH_SET_SOFTIRQ_PENDING
  6978. @@ -416,9 +423,10 @@
  6979. void (*action)(struct softirq_action *);
  6980. };
  6981. +#ifndef CONFIG_PREEMPT_RT_FULL
  6982. asmlinkage void do_softirq(void);
  6983. asmlinkage void __do_softirq(void);
  6984. -
  6985. +static inline void thread_do_softirq(void) { do_softirq(); }
  6986. #ifdef __ARCH_HAS_DO_SOFTIRQ
  6987. void do_softirq_own_stack(void);
  6988. #else
  6989. @@ -427,6 +435,9 @@
  6990. __do_softirq();
  6991. }
  6992. #endif
  6993. +#else
  6994. +extern void thread_do_softirq(void);
  6995. +#endif
  6996. extern void open_softirq(int nr, void (*action)(struct softirq_action *));
  6997. extern void softirq_init(void);
  6998. @@ -434,6 +445,7 @@
  6999. extern void raise_softirq_irqoff(unsigned int nr);
  7000. extern void raise_softirq(unsigned int nr);
  7001. +extern void softirq_check_pending_idle(void);
  7002. DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
  7003. @@ -455,8 +467,9 @@
  7004. to be executed on some cpu at least once after this.
  7005. * If the tasklet is already scheduled, but its execution is still not
  7006. started, it will be executed only once.
  7007. - * If this tasklet is already running on another CPU (or schedule is called
  7008. - from tasklet itself), it is rescheduled for later.
  7009. + * If this tasklet is already running on another CPU, it is rescheduled
  7010. + for later.
  7011. + * Schedule must not be called from the tasklet itself (a lockup occurs)
  7012. * Tasklet is strictly serialized wrt itself, but not
  7013. wrt another tasklets. If client needs some intertask synchronization,
  7014. he makes it with spinlocks.
  7015. @@ -481,27 +494,36 @@
  7016. enum
  7017. {
  7018. TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */
  7019. - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */
  7020. + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */
  7021. + TASKLET_STATE_PENDING /* Tasklet is pending */
  7022. };
  7023. -#ifdef CONFIG_SMP
  7024. +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED)
  7025. +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN)
  7026. +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING)
  7027. +
  7028. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  7029. static inline int tasklet_trylock(struct tasklet_struct *t)
  7030. {
  7031. return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
  7032. }
  7033. +static inline int tasklet_tryunlock(struct tasklet_struct *t)
  7034. +{
  7035. + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN;
  7036. +}
  7037. +
  7038. static inline void tasklet_unlock(struct tasklet_struct *t)
  7039. {
  7040. smp_mb__before_atomic();
  7041. clear_bit(TASKLET_STATE_RUN, &(t)->state);
  7042. }
  7043. -static inline void tasklet_unlock_wait(struct tasklet_struct *t)
  7044. -{
  7045. - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
  7046. -}
  7047. +extern void tasklet_unlock_wait(struct tasklet_struct *t);
  7048. +
  7049. #else
  7050. #define tasklet_trylock(t) 1
  7051. +#define tasklet_tryunlock(t) 1
  7052. #define tasklet_unlock_wait(t) do { } while (0)
  7053. #define tasklet_unlock(t) do { } while (0)
  7054. #endif
  7055. @@ -550,17 +572,8 @@
  7056. smp_mb();
  7057. }
  7058. -static inline void tasklet_enable(struct tasklet_struct *t)
  7059. -{
  7060. - smp_mb__before_atomic();
  7061. - atomic_dec(&t->count);
  7062. -}
  7063. -
  7064. -static inline void tasklet_hi_enable(struct tasklet_struct *t)
  7065. -{
  7066. - smp_mb__before_atomic();
  7067. - atomic_dec(&t->count);
  7068. -}
  7069. +extern void tasklet_enable(struct tasklet_struct *t);
  7070. +extern void tasklet_hi_enable(struct tasklet_struct *t);
  7071. extern void tasklet_kill(struct tasklet_struct *t);
  7072. extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
  7073. @@ -592,6 +605,12 @@
  7074. tasklet_kill(&ttimer->tasklet);
  7075. }
  7076. +#ifdef CONFIG_PREEMPT_RT_FULL
  7077. +extern void softirq_early_init(void);
  7078. +#else
  7079. +static inline void softirq_early_init(void) { }
  7080. +#endif
  7081. +
  7082. /*
  7083. * Autoprobing for irqs:
  7084. *
  7085. diff -Nur linux-3.18.10.orig/include/linux/irqdesc.h linux-3.18.10/include/linux/irqdesc.h
  7086. --- linux-3.18.10.orig/include/linux/irqdesc.h 2015-03-24 02:05:12.000000000 +0100
  7087. +++ linux-3.18.10/include/linux/irqdesc.h 2015-03-26 12:42:18.667588326 +0100
  7088. @@ -63,6 +63,7 @@
  7089. unsigned int irqs_unhandled;
  7090. atomic_t threads_handled;
  7091. int threads_handled_last;
  7092. + u64 random_ip;
  7093. raw_spinlock_t lock;
  7094. struct cpumask *percpu_enabled;
  7095. #ifdef CONFIG_SMP
  7096. diff -Nur linux-3.18.10.orig/include/linux/irqflags.h linux-3.18.10/include/linux/irqflags.h
  7097. --- linux-3.18.10.orig/include/linux/irqflags.h 2015-03-24 02:05:12.000000000 +0100
  7098. +++ linux-3.18.10/include/linux/irqflags.h 2015-03-26 12:42:18.667588326 +0100
  7099. @@ -25,8 +25,6 @@
  7100. # define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
  7101. # define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
  7102. # define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
  7103. -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
  7104. -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
  7105. # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
  7106. #else
  7107. # define trace_hardirqs_on() do { } while (0)
  7108. @@ -39,9 +37,15 @@
  7109. # define trace_softirqs_enabled(p) 0
  7110. # define trace_hardirq_enter() do { } while (0)
  7111. # define trace_hardirq_exit() do { } while (0)
  7112. +# define INIT_TRACE_IRQFLAGS
  7113. +#endif
  7114. +
  7115. +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL)
  7116. +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
  7117. +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
  7118. +#else
  7119. # define lockdep_softirq_enter() do { } while (0)
  7120. # define lockdep_softirq_exit() do { } while (0)
  7121. -# define INIT_TRACE_IRQFLAGS
  7122. #endif
  7123. #if defined(CONFIG_IRQSOFF_TRACER) || \
  7124. @@ -147,4 +151,23 @@
  7125. #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
  7126. +/*
  7127. + * local_irq* variants depending on RT/!RT
  7128. + */
  7129. +#ifdef CONFIG_PREEMPT_RT_FULL
  7130. +# define local_irq_disable_nort() do { } while (0)
  7131. +# define local_irq_enable_nort() do { } while (0)
  7132. +# define local_irq_save_nort(flags) local_save_flags(flags)
  7133. +# define local_irq_restore_nort(flags) (void)(flags)
  7134. +# define local_irq_disable_rt() local_irq_disable()
  7135. +# define local_irq_enable_rt() local_irq_enable()
  7136. +#else
  7137. +# define local_irq_disable_nort() local_irq_disable()
  7138. +# define local_irq_enable_nort() local_irq_enable()
  7139. +# define local_irq_save_nort(flags) local_irq_save(flags)
  7140. +# define local_irq_restore_nort(flags) local_irq_restore(flags)
  7141. +# define local_irq_disable_rt() do { } while (0)
  7142. +# define local_irq_enable_rt() do { } while (0)
  7143. +#endif
  7144. +
  7145. #endif
  7146. diff -Nur linux-3.18.10.orig/include/linux/irq.h linux-3.18.10/include/linux/irq.h
  7147. --- linux-3.18.10.orig/include/linux/irq.h 2015-03-24 02:05:12.000000000 +0100
  7148. +++ linux-3.18.10/include/linux/irq.h 2015-03-26 12:42:18.667588326 +0100
  7149. @@ -73,6 +73,7 @@
  7150. * IRQ_IS_POLLED - Always polled by another interrupt. Exclude
  7151. * it from the spurious interrupt detection
  7152. * mechanism and from core side polling.
  7153. + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT)
  7154. */
  7155. enum {
  7156. IRQ_TYPE_NONE = 0x00000000,
  7157. @@ -98,13 +99,14 @@
  7158. IRQ_NOTHREAD = (1 << 16),
  7159. IRQ_PER_CPU_DEVID = (1 << 17),
  7160. IRQ_IS_POLLED = (1 << 18),
  7161. + IRQ_NO_SOFTIRQ_CALL = (1 << 19),
  7162. };
  7163. #define IRQF_MODIFY_MASK \
  7164. (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
  7165. IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
  7166. IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
  7167. - IRQ_IS_POLLED)
  7168. + IRQ_IS_POLLED | IRQ_NO_SOFTIRQ_CALL)
  7169. #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
  7170. diff -Nur linux-3.18.10.orig/include/linux/irq_work.h linux-3.18.10/include/linux/irq_work.h
  7171. --- linux-3.18.10.orig/include/linux/irq_work.h 2015-03-24 02:05:12.000000000 +0100
  7172. +++ linux-3.18.10/include/linux/irq_work.h 2015-03-26 12:42:18.667588326 +0100
  7173. @@ -16,6 +16,7 @@
  7174. #define IRQ_WORK_BUSY 2UL
  7175. #define IRQ_WORK_FLAGS 3UL
  7176. #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */
  7177. +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */
  7178. struct irq_work {
  7179. unsigned long flags;
  7180. diff -Nur linux-3.18.10.orig/include/linux/jbd_common.h linux-3.18.10/include/linux/jbd_common.h
  7181. --- linux-3.18.10.orig/include/linux/jbd_common.h 2015-03-24 02:05:12.000000000 +0100
  7182. +++ linux-3.18.10/include/linux/jbd_common.h 2015-03-26 12:42:18.667588326 +0100
  7183. @@ -15,32 +15,56 @@
  7184. static inline void jbd_lock_bh_state(struct buffer_head *bh)
  7185. {
  7186. +#ifndef CONFIG_PREEMPT_RT_BASE
  7187. bit_spin_lock(BH_State, &bh->b_state);
  7188. +#else
  7189. + spin_lock(&bh->b_state_lock);
  7190. +#endif
  7191. }
  7192. static inline int jbd_trylock_bh_state(struct buffer_head *bh)
  7193. {
  7194. +#ifndef CONFIG_PREEMPT_RT_BASE
  7195. return bit_spin_trylock(BH_State, &bh->b_state);
  7196. +#else
  7197. + return spin_trylock(&bh->b_state_lock);
  7198. +#endif
  7199. }
  7200. static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
  7201. {
  7202. +#ifndef CONFIG_PREEMPT_RT_BASE
  7203. return bit_spin_is_locked(BH_State, &bh->b_state);
  7204. +#else
  7205. + return spin_is_locked(&bh->b_state_lock);
  7206. +#endif
  7207. }
  7208. static inline void jbd_unlock_bh_state(struct buffer_head *bh)
  7209. {
  7210. +#ifndef CONFIG_PREEMPT_RT_BASE
  7211. bit_spin_unlock(BH_State, &bh->b_state);
  7212. +#else
  7213. + spin_unlock(&bh->b_state_lock);
  7214. +#endif
  7215. }
  7216. static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
  7217. {
  7218. +#ifndef CONFIG_PREEMPT_RT_BASE
  7219. bit_spin_lock(BH_JournalHead, &bh->b_state);
  7220. +#else
  7221. + spin_lock(&bh->b_journal_head_lock);
  7222. +#endif
  7223. }
  7224. static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
  7225. {
  7226. +#ifndef CONFIG_PREEMPT_RT_BASE
  7227. bit_spin_unlock(BH_JournalHead, &bh->b_state);
  7228. +#else
  7229. + spin_unlock(&bh->b_journal_head_lock);
  7230. +#endif
  7231. }
  7232. #endif
  7233. diff -Nur linux-3.18.10.orig/include/linux/jump_label.h linux-3.18.10/include/linux/jump_label.h
  7234. --- linux-3.18.10.orig/include/linux/jump_label.h 2015-03-24 02:05:12.000000000 +0100
  7235. +++ linux-3.18.10/include/linux/jump_label.h 2015-03-26 12:42:18.667588326 +0100
  7236. @@ -55,7 +55,8 @@
  7237. "%s used before call to jump_label_init", \
  7238. __func__)
  7239. -#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
  7240. +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) && \
  7241. + !defined(CONFIG_PREEMPT_BASE)
  7242. struct static_key {
  7243. atomic_t enabled;
  7244. diff -Nur linux-3.18.10.orig/include/linux/kdb.h linux-3.18.10/include/linux/kdb.h
  7245. --- linux-3.18.10.orig/include/linux/kdb.h 2015-03-24 02:05:12.000000000 +0100
  7246. +++ linux-3.18.10/include/linux/kdb.h 2015-03-26 12:42:18.667588326 +0100
  7247. @@ -116,7 +116,7 @@
  7248. extern __printf(1, 0) int vkdb_printf(const char *fmt, va_list args);
  7249. extern __printf(1, 2) int kdb_printf(const char *, ...);
  7250. typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
  7251. -
  7252. +#define in_kdb_printk() (kdb_trap_printk)
  7253. extern void kdb_init(int level);
  7254. /* Access to kdb specific polling devices */
  7255. @@ -151,6 +151,7 @@
  7256. extern int kdb_unregister(char *);
  7257. #else /* ! CONFIG_KGDB_KDB */
  7258. static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
  7259. +#define in_kdb_printk() (0)
  7260. static inline void kdb_init(int level) {}
  7261. static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
  7262. char *help, short minlen) { return 0; }
  7263. diff -Nur linux-3.18.10.orig/include/linux/kernel.h linux-3.18.10/include/linux/kernel.h
  7264. --- linux-3.18.10.orig/include/linux/kernel.h 2015-03-24 02:05:12.000000000 +0100
  7265. +++ linux-3.18.10/include/linux/kernel.h 2015-03-26 12:42:18.667588326 +0100
  7266. @@ -451,6 +451,7 @@
  7267. SYSTEM_HALT,
  7268. SYSTEM_POWER_OFF,
  7269. SYSTEM_RESTART,
  7270. + SYSTEM_SUSPEND,
  7271. } system_state;
  7272. #define TAINT_PROPRIETARY_MODULE 0
  7273. diff -Nur linux-3.18.10.orig/include/linux/lglock.h linux-3.18.10/include/linux/lglock.h
  7274. --- linux-3.18.10.orig/include/linux/lglock.h 2015-03-24 02:05:12.000000000 +0100
  7275. +++ linux-3.18.10/include/linux/lglock.h 2015-03-26 12:42:18.667588326 +0100
  7276. @@ -34,22 +34,39 @@
  7277. #endif
  7278. struct lglock {
  7279. +#ifndef CONFIG_PREEMPT_RT_FULL
  7280. arch_spinlock_t __percpu *lock;
  7281. +#else
  7282. + struct rt_mutex __percpu *lock;
  7283. +#endif
  7284. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  7285. struct lock_class_key lock_key;
  7286. struct lockdep_map lock_dep_map;
  7287. #endif
  7288. };
  7289. -#define DEFINE_LGLOCK(name) \
  7290. +#ifndef CONFIG_PREEMPT_RT_FULL
  7291. +# define DEFINE_LGLOCK(name) \
  7292. static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
  7293. = __ARCH_SPIN_LOCK_UNLOCKED; \
  7294. struct lglock name = { .lock = &name ## _lock }
  7295. -#define DEFINE_STATIC_LGLOCK(name) \
  7296. +# define DEFINE_STATIC_LGLOCK(name) \
  7297. static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
  7298. = __ARCH_SPIN_LOCK_UNLOCKED; \
  7299. static struct lglock name = { .lock = &name ## _lock }
  7300. +#else
  7301. +
  7302. +# define DEFINE_LGLOCK(name) \
  7303. + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \
  7304. + = __RT_MUTEX_INITIALIZER( name ## _lock); \
  7305. + struct lglock name = { .lock = &name ## _lock }
  7306. +
  7307. +# define DEFINE_STATIC_LGLOCK(name) \
  7308. + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \
  7309. + = __RT_MUTEX_INITIALIZER( name ## _lock); \
  7310. + static struct lglock name = { .lock = &name ## _lock }
  7311. +#endif
  7312. void lg_lock_init(struct lglock *lg, char *name);
  7313. void lg_local_lock(struct lglock *lg);
  7314. @@ -59,6 +76,12 @@
  7315. void lg_global_lock(struct lglock *lg);
  7316. void lg_global_unlock(struct lglock *lg);
  7317. +#ifndef CONFIG_PREEMPT_RT_FULL
  7318. +#define lg_global_trylock_relax(name) lg_global_lock(name)
  7319. +#else
  7320. +void lg_global_trylock_relax(struct lglock *lg);
  7321. +#endif
  7322. +
  7323. #else
  7324. /* When !CONFIG_SMP, map lglock to spinlock */
  7325. #define lglock spinlock
  7326. diff -Nur linux-3.18.10.orig/include/linux/list_bl.h linux-3.18.10/include/linux/list_bl.h
  7327. --- linux-3.18.10.orig/include/linux/list_bl.h 2015-03-24 02:05:12.000000000 +0100
  7328. +++ linux-3.18.10/include/linux/list_bl.h 2015-03-26 12:42:18.667588326 +0100
  7329. @@ -2,6 +2,7 @@
  7330. #define _LINUX_LIST_BL_H
  7331. #include <linux/list.h>
  7332. +#include <linux/spinlock.h>
  7333. #include <linux/bit_spinlock.h>
  7334. /*
  7335. @@ -32,13 +33,22 @@
  7336. struct hlist_bl_head {
  7337. struct hlist_bl_node *first;
  7338. +#ifdef CONFIG_PREEMPT_RT_BASE
  7339. + raw_spinlock_t lock;
  7340. +#endif
  7341. };
  7342. struct hlist_bl_node {
  7343. struct hlist_bl_node *next, **pprev;
  7344. };
  7345. -#define INIT_HLIST_BL_HEAD(ptr) \
  7346. - ((ptr)->first = NULL)
  7347. +
  7348. +static inline void INIT_HLIST_BL_HEAD(struct hlist_bl_head *h)
  7349. +{
  7350. + h->first = NULL;
  7351. +#ifdef CONFIG_PREEMPT_RT_BASE
  7352. + raw_spin_lock_init(&h->lock);
  7353. +#endif
  7354. +}
  7355. static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
  7356. {
  7357. @@ -117,12 +127,26 @@
  7358. static inline void hlist_bl_lock(struct hlist_bl_head *b)
  7359. {
  7360. +#ifndef CONFIG_PREEMPT_RT_BASE
  7361. bit_spin_lock(0, (unsigned long *)b);
  7362. +#else
  7363. + raw_spin_lock(&b->lock);
  7364. +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  7365. + __set_bit(0, (unsigned long *)b);
  7366. +#endif
  7367. +#endif
  7368. }
  7369. static inline void hlist_bl_unlock(struct hlist_bl_head *b)
  7370. {
  7371. +#ifndef CONFIG_PREEMPT_RT_BASE
  7372. __bit_spin_unlock(0, (unsigned long *)b);
  7373. +#else
  7374. +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  7375. + __clear_bit(0, (unsigned long *)b);
  7376. +#endif
  7377. + raw_spin_unlock(&b->lock);
  7378. +#endif
  7379. }
  7380. static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
  7381. diff -Nur linux-3.18.10.orig/include/linux/locallock.h linux-3.18.10/include/linux/locallock.h
  7382. --- linux-3.18.10.orig/include/linux/locallock.h 1970-01-01 01:00:00.000000000 +0100
  7383. +++ linux-3.18.10/include/linux/locallock.h 2015-03-26 12:42:18.667588326 +0100
  7384. @@ -0,0 +1,270 @@
  7385. +#ifndef _LINUX_LOCALLOCK_H
  7386. +#define _LINUX_LOCALLOCK_H
  7387. +
  7388. +#include <linux/percpu.h>
  7389. +#include <linux/spinlock.h>
  7390. +
  7391. +#ifdef CONFIG_PREEMPT_RT_BASE
  7392. +
  7393. +#ifdef CONFIG_DEBUG_SPINLOCK
  7394. +# define LL_WARN(cond) WARN_ON(cond)
  7395. +#else
  7396. +# define LL_WARN(cond) do { } while (0)
  7397. +#endif
  7398. +
  7399. +/*
  7400. + * per cpu lock based substitute for local_irq_*()
  7401. + */
  7402. +struct local_irq_lock {
  7403. + spinlock_t lock;
  7404. + struct task_struct *owner;
  7405. + int nestcnt;
  7406. + unsigned long flags;
  7407. +};
  7408. +
  7409. +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \
  7410. + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \
  7411. + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
  7412. +
  7413. +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \
  7414. + DECLARE_PER_CPU(struct local_irq_lock, lvar)
  7415. +
  7416. +#define local_irq_lock_init(lvar) \
  7417. + do { \
  7418. + int __cpu; \
  7419. + for_each_possible_cpu(__cpu) \
  7420. + spin_lock_init(&per_cpu(lvar, __cpu).lock); \
  7421. + } while (0)
  7422. +
  7423. +/*
  7424. + * spin_lock|trylock|unlock_local flavour that does not migrate disable
  7425. + * used for __local_lock|trylock|unlock where get_local_var/put_local_var
  7426. + * already takes care of the migrate_disable/enable
  7427. + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls.
  7428. + */
  7429. +#ifdef CONFIG_PREEMPT_RT_FULL
  7430. +# define spin_lock_local(lock) rt_spin_lock(lock)
  7431. +# define spin_trylock_local(lock) rt_spin_trylock(lock)
  7432. +# define spin_unlock_local(lock) rt_spin_unlock(lock)
  7433. +#else
  7434. +# define spin_lock_local(lock) spin_lock(lock)
  7435. +# define spin_trylock_local(lock) spin_trylock(lock)
  7436. +# define spin_unlock_local(lock) spin_unlock(lock)
  7437. +#endif
  7438. +
  7439. +static inline void __local_lock(struct local_irq_lock *lv)
  7440. +{
  7441. + if (lv->owner != current) {
  7442. + spin_lock_local(&lv->lock);
  7443. + LL_WARN(lv->owner);
  7444. + LL_WARN(lv->nestcnt);
  7445. + lv->owner = current;
  7446. + }
  7447. + lv->nestcnt++;
  7448. +}
  7449. +
  7450. +#define local_lock(lvar) \
  7451. + do { __local_lock(&get_local_var(lvar)); } while (0)
  7452. +
  7453. +static inline int __local_trylock(struct local_irq_lock *lv)
  7454. +{
  7455. + if (lv->owner != current && spin_trylock_local(&lv->lock)) {
  7456. + LL_WARN(lv->owner);
  7457. + LL_WARN(lv->nestcnt);
  7458. + lv->owner = current;
  7459. + lv->nestcnt = 1;
  7460. + return 1;
  7461. + }
  7462. + return 0;
  7463. +}
  7464. +
  7465. +#define local_trylock(lvar) \
  7466. + ({ \
  7467. + int __locked; \
  7468. + __locked = __local_trylock(&get_local_var(lvar)); \
  7469. + if (!__locked) \
  7470. + put_local_var(lvar); \
  7471. + __locked; \
  7472. + })
  7473. +
  7474. +static inline void __local_unlock(struct local_irq_lock *lv)
  7475. +{
  7476. + LL_WARN(lv->nestcnt == 0);
  7477. + LL_WARN(lv->owner != current);
  7478. + if (--lv->nestcnt)
  7479. + return;
  7480. +
  7481. + lv->owner = NULL;
  7482. + spin_unlock_local(&lv->lock);
  7483. +}
  7484. +
  7485. +#define local_unlock(lvar) \
  7486. + do { \
  7487. + __local_unlock(&__get_cpu_var(lvar)); \
  7488. + put_local_var(lvar); \
  7489. + } while (0)
  7490. +
  7491. +static inline void __local_lock_irq(struct local_irq_lock *lv)
  7492. +{
  7493. + spin_lock_irqsave(&lv->lock, lv->flags);
  7494. + LL_WARN(lv->owner);
  7495. + LL_WARN(lv->nestcnt);
  7496. + lv->owner = current;
  7497. + lv->nestcnt = 1;
  7498. +}
  7499. +
  7500. +#define local_lock_irq(lvar) \
  7501. + do { __local_lock_irq(&get_local_var(lvar)); } while (0)
  7502. +
  7503. +#define local_lock_irq_on(lvar, cpu) \
  7504. + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0)
  7505. +
  7506. +static inline void __local_unlock_irq(struct local_irq_lock *lv)
  7507. +{
  7508. + LL_WARN(!lv->nestcnt);
  7509. + LL_WARN(lv->owner != current);
  7510. + lv->owner = NULL;
  7511. + lv->nestcnt = 0;
  7512. + spin_unlock_irq(&lv->lock);
  7513. +}
  7514. +
  7515. +#define local_unlock_irq(lvar) \
  7516. + do { \
  7517. + __local_unlock_irq(&__get_cpu_var(lvar)); \
  7518. + put_local_var(lvar); \
  7519. + } while (0)
  7520. +
  7521. +#define local_unlock_irq_on(lvar, cpu) \
  7522. + do { \
  7523. + __local_unlock_irq(&per_cpu(lvar, cpu)); \
  7524. + } while (0)
  7525. +
  7526. +static inline int __local_lock_irqsave(struct local_irq_lock *lv)
  7527. +{
  7528. + if (lv->owner != current) {
  7529. + __local_lock_irq(lv);
  7530. + return 0;
  7531. + } else {
  7532. + lv->nestcnt++;
  7533. + return 1;
  7534. + }
  7535. +}
  7536. +
  7537. +#define local_lock_irqsave(lvar, _flags) \
  7538. + do { \
  7539. + if (__local_lock_irqsave(&get_local_var(lvar))) \
  7540. + put_local_var(lvar); \
  7541. + _flags = __get_cpu_var(lvar).flags; \
  7542. + } while (0)
  7543. +
  7544. +#define local_lock_irqsave_on(lvar, _flags, cpu) \
  7545. + do { \
  7546. + __local_lock_irqsave(&per_cpu(lvar, cpu)); \
  7547. + _flags = per_cpu(lvar, cpu).flags; \
  7548. + } while (0)
  7549. +
  7550. +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
  7551. + unsigned long flags)
  7552. +{
  7553. + LL_WARN(!lv->nestcnt);
  7554. + LL_WARN(lv->owner != current);
  7555. + if (--lv->nestcnt)
  7556. + return 0;
  7557. +
  7558. + lv->owner = NULL;
  7559. + spin_unlock_irqrestore(&lv->lock, lv->flags);
  7560. + return 1;
  7561. +}
  7562. +
  7563. +#define local_unlock_irqrestore(lvar, flags) \
  7564. + do { \
  7565. + if (__local_unlock_irqrestore(&__get_cpu_var(lvar), flags)) \
  7566. + put_local_var(lvar); \
  7567. + } while (0)
  7568. +
  7569. +#define local_unlock_irqrestore_on(lvar, flags, cpu) \
  7570. + do { \
  7571. + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \
  7572. + } while (0)
  7573. +
  7574. +#define local_spin_trylock_irq(lvar, lock) \
  7575. + ({ \
  7576. + int __locked; \
  7577. + local_lock_irq(lvar); \
  7578. + __locked = spin_trylock(lock); \
  7579. + if (!__locked) \
  7580. + local_unlock_irq(lvar); \
  7581. + __locked; \
  7582. + })
  7583. +
  7584. +#define local_spin_lock_irq(lvar, lock) \
  7585. + do { \
  7586. + local_lock_irq(lvar); \
  7587. + spin_lock(lock); \
  7588. + } while (0)
  7589. +
  7590. +#define local_spin_unlock_irq(lvar, lock) \
  7591. + do { \
  7592. + spin_unlock(lock); \
  7593. + local_unlock_irq(lvar); \
  7594. + } while (0)
  7595. +
  7596. +#define local_spin_lock_irqsave(lvar, lock, flags) \
  7597. + do { \
  7598. + local_lock_irqsave(lvar, flags); \
  7599. + spin_lock(lock); \
  7600. + } while (0)
  7601. +
  7602. +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
  7603. + do { \
  7604. + spin_unlock(lock); \
  7605. + local_unlock_irqrestore(lvar, flags); \
  7606. + } while (0)
  7607. +
  7608. +#define get_locked_var(lvar, var) \
  7609. + (*({ \
  7610. + local_lock(lvar); \
  7611. + &__get_cpu_var(var); \
  7612. + }))
  7613. +
  7614. +#define put_locked_var(lvar, var) local_unlock(lvar);
  7615. +
  7616. +#define local_lock_cpu(lvar) \
  7617. + ({ \
  7618. + local_lock(lvar); \
  7619. + smp_processor_id(); \
  7620. + })
  7621. +
  7622. +#define local_unlock_cpu(lvar) local_unlock(lvar)
  7623. +
  7624. +#else /* PREEMPT_RT_BASE */
  7625. +
  7626. +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar
  7627. +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar
  7628. +
  7629. +static inline void local_irq_lock_init(int lvar) { }
  7630. +
  7631. +#define local_lock(lvar) preempt_disable()
  7632. +#define local_unlock(lvar) preempt_enable()
  7633. +#define local_lock_irq(lvar) local_irq_disable()
  7634. +#define local_unlock_irq(lvar) local_irq_enable()
  7635. +#define local_lock_irqsave(lvar, flags) local_irq_save(flags)
  7636. +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags)
  7637. +
  7638. +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock)
  7639. +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock)
  7640. +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock)
  7641. +#define local_spin_lock_irqsave(lvar, lock, flags) \
  7642. + spin_lock_irqsave(lock, flags)
  7643. +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
  7644. + spin_unlock_irqrestore(lock, flags)
  7645. +
  7646. +#define get_locked_var(lvar, var) get_cpu_var(var)
  7647. +#define put_locked_var(lvar, var) put_cpu_var(var)
  7648. +
  7649. +#define local_lock_cpu(lvar) get_cpu()
  7650. +#define local_unlock_cpu(lvar) put_cpu()
  7651. +
  7652. +#endif
  7653. +
  7654. +#endif
  7655. diff -Nur linux-3.18.10.orig/include/linux/mm_types.h linux-3.18.10/include/linux/mm_types.h
  7656. --- linux-3.18.10.orig/include/linux/mm_types.h 2015-03-24 02:05:12.000000000 +0100
  7657. +++ linux-3.18.10/include/linux/mm_types.h 2015-03-26 12:42:18.667588326 +0100
  7658. @@ -11,6 +11,7 @@
  7659. #include <linux/completion.h>
  7660. #include <linux/cpumask.h>
  7661. #include <linux/page-debug-flags.h>
  7662. +#include <linux/rcupdate.h>
  7663. #include <linux/uprobes.h>
  7664. #include <linux/page-flags-layout.h>
  7665. #include <asm/page.h>
  7666. @@ -454,6 +455,9 @@
  7667. bool tlb_flush_pending;
  7668. #endif
  7669. struct uprobes_state uprobes_state;
  7670. +#ifdef CONFIG_PREEMPT_RT_BASE
  7671. + struct rcu_head delayed_drop;
  7672. +#endif
  7673. };
  7674. static inline void mm_init_cpumask(struct mm_struct *mm)
  7675. diff -Nur linux-3.18.10.orig/include/linux/mutex.h linux-3.18.10/include/linux/mutex.h
  7676. --- linux-3.18.10.orig/include/linux/mutex.h 2015-03-24 02:05:12.000000000 +0100
  7677. +++ linux-3.18.10/include/linux/mutex.h 2015-03-26 12:42:18.667588326 +0100
  7678. @@ -19,6 +19,17 @@
  7679. #include <asm/processor.h>
  7680. #include <linux/osq_lock.h>
  7681. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  7682. +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
  7683. + , .dep_map = { .name = #lockname }
  7684. +#else
  7685. +# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
  7686. +#endif
  7687. +
  7688. +#ifdef CONFIG_PREEMPT_RT_FULL
  7689. +# include <linux/mutex_rt.h>
  7690. +#else
  7691. +
  7692. /*
  7693. * Simple, straightforward mutexes with strict semantics:
  7694. *
  7695. @@ -100,13 +111,6 @@
  7696. static inline void mutex_destroy(struct mutex *lock) {}
  7697. #endif
  7698. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  7699. -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
  7700. - , .dep_map = { .name = #lockname }
  7701. -#else
  7702. -# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
  7703. -#endif
  7704. -
  7705. #define __MUTEX_INITIALIZER(lockname) \
  7706. { .count = ATOMIC_INIT(1) \
  7707. , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
  7708. @@ -174,6 +178,8 @@
  7709. extern int mutex_trylock(struct mutex *lock);
  7710. extern void mutex_unlock(struct mutex *lock);
  7711. +#endif /* !PREEMPT_RT_FULL */
  7712. +
  7713. extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
  7714. #endif /* __LINUX_MUTEX_H */
  7715. diff -Nur linux-3.18.10.orig/include/linux/mutex_rt.h linux-3.18.10/include/linux/mutex_rt.h
  7716. --- linux-3.18.10.orig/include/linux/mutex_rt.h 1970-01-01 01:00:00.000000000 +0100
  7717. +++ linux-3.18.10/include/linux/mutex_rt.h 2015-03-26 12:42:18.667588326 +0100
  7718. @@ -0,0 +1,84 @@
  7719. +#ifndef __LINUX_MUTEX_RT_H
  7720. +#define __LINUX_MUTEX_RT_H
  7721. +
  7722. +#ifndef __LINUX_MUTEX_H
  7723. +#error "Please include mutex.h"
  7724. +#endif
  7725. +
  7726. +#include <linux/rtmutex.h>
  7727. +
  7728. +/* FIXME: Just for __lockfunc */
  7729. +#include <linux/spinlock.h>
  7730. +
  7731. +struct mutex {
  7732. + struct rt_mutex lock;
  7733. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  7734. + struct lockdep_map dep_map;
  7735. +#endif
  7736. +};
  7737. +
  7738. +#define __MUTEX_INITIALIZER(mutexname) \
  7739. + { \
  7740. + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \
  7741. + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \
  7742. + }
  7743. +
  7744. +#define DEFINE_MUTEX(mutexname) \
  7745. + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
  7746. +
  7747. +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
  7748. +extern void __lockfunc _mutex_lock(struct mutex *lock);
  7749. +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
  7750. +extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
  7751. +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass);
  7752. +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
  7753. +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass);
  7754. +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass);
  7755. +extern int __lockfunc _mutex_trylock(struct mutex *lock);
  7756. +extern void __lockfunc _mutex_unlock(struct mutex *lock);
  7757. +
  7758. +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock)
  7759. +#define mutex_lock(l) _mutex_lock(l)
  7760. +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l)
  7761. +#define mutex_lock_killable(l) _mutex_lock_killable(l)
  7762. +#define mutex_trylock(l) _mutex_trylock(l)
  7763. +#define mutex_unlock(l) _mutex_unlock(l)
  7764. +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock)
  7765. +
  7766. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  7767. +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s)
  7768. +# define mutex_lock_interruptible_nested(l, s) \
  7769. + _mutex_lock_interruptible_nested(l, s)
  7770. +# define mutex_lock_killable_nested(l, s) \
  7771. + _mutex_lock_killable_nested(l, s)
  7772. +
  7773. +# define mutex_lock_nest_lock(lock, nest_lock) \
  7774. +do { \
  7775. + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
  7776. + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
  7777. +} while (0)
  7778. +
  7779. +#else
  7780. +# define mutex_lock_nested(l, s) _mutex_lock(l)
  7781. +# define mutex_lock_interruptible_nested(l, s) \
  7782. + _mutex_lock_interruptible(l)
  7783. +# define mutex_lock_killable_nested(l, s) \
  7784. + _mutex_lock_killable(l)
  7785. +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
  7786. +#endif
  7787. +
  7788. +# define mutex_init(mutex) \
  7789. +do { \
  7790. + static struct lock_class_key __key; \
  7791. + \
  7792. + rt_mutex_init(&(mutex)->lock); \
  7793. + __mutex_do_init((mutex), #mutex, &__key); \
  7794. +} while (0)
  7795. +
  7796. +# define __mutex_init(mutex, name, key) \
  7797. +do { \
  7798. + rt_mutex_init(&(mutex)->lock); \
  7799. + __mutex_do_init((mutex), name, key); \
  7800. +} while (0)
  7801. +
  7802. +#endif
  7803. diff -Nur linux-3.18.10.orig/include/linux/netdevice.h linux-3.18.10/include/linux/netdevice.h
  7804. --- linux-3.18.10.orig/include/linux/netdevice.h 2015-03-24 02:05:12.000000000 +0100
  7805. +++ linux-3.18.10/include/linux/netdevice.h 2015-03-26 12:42:18.667588326 +0100
  7806. @@ -2345,6 +2345,7 @@
  7807. unsigned int dropped;
  7808. struct sk_buff_head input_pkt_queue;
  7809. struct napi_struct backlog;
  7810. + struct sk_buff_head tofree_queue;
  7811. #ifdef CONFIG_NET_FLOW_LIMIT
  7812. struct sd_flow_limit __rcu *flow_limit;
  7813. diff -Nur linux-3.18.10.orig/include/linux/netfilter/x_tables.h linux-3.18.10/include/linux/netfilter/x_tables.h
  7814. --- linux-3.18.10.orig/include/linux/netfilter/x_tables.h 2015-03-24 02:05:12.000000000 +0100
  7815. +++ linux-3.18.10/include/linux/netfilter/x_tables.h 2015-03-26 12:42:18.667588326 +0100
  7816. @@ -3,6 +3,7 @@
  7817. #include <linux/netdevice.h>
  7818. +#include <linux/locallock.h>
  7819. #include <uapi/linux/netfilter/x_tables.h>
  7820. /**
  7821. @@ -282,6 +283,8 @@
  7822. */
  7823. DECLARE_PER_CPU(seqcount_t, xt_recseq);
  7824. +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock);
  7825. +
  7826. /**
  7827. * xt_write_recseq_begin - start of a write section
  7828. *
  7829. @@ -296,6 +299,9 @@
  7830. {
  7831. unsigned int addend;
  7832. + /* RT protection */
  7833. + local_lock(xt_write_lock);
  7834. +
  7835. /*
  7836. * Low order bit of sequence is set if we already
  7837. * called xt_write_recseq_begin().
  7838. @@ -326,6 +332,7 @@
  7839. /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
  7840. smp_wmb();
  7841. __this_cpu_add(xt_recseq.sequence, addend);
  7842. + local_unlock(xt_write_lock);
  7843. }
  7844. /*
  7845. diff -Nur linux-3.18.10.orig/include/linux/notifier.h linux-3.18.10/include/linux/notifier.h
  7846. --- linux-3.18.10.orig/include/linux/notifier.h 2015-03-24 02:05:12.000000000 +0100
  7847. +++ linux-3.18.10/include/linux/notifier.h 2015-03-26 12:42:18.667588326 +0100
  7848. @@ -6,7 +6,7 @@
  7849. *
  7850. * Alan Cox <Alan.Cox@linux.org>
  7851. */
  7852. -
  7853. +
  7854. #ifndef _LINUX_NOTIFIER_H
  7855. #define _LINUX_NOTIFIER_H
  7856. #include <linux/errno.h>
  7857. @@ -42,9 +42,7 @@
  7858. * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
  7859. * As compensation, srcu_notifier_chain_unregister() is rather expensive.
  7860. * SRCU notifier chains should be used when the chain will be called very
  7861. - * often but notifier_blocks will seldom be removed. Also, SRCU notifier
  7862. - * chains are slightly more difficult to use because they require special
  7863. - * runtime initialization.
  7864. + * often but notifier_blocks will seldom be removed.
  7865. */
  7866. typedef int (*notifier_fn_t)(struct notifier_block *nb,
  7867. @@ -88,7 +86,7 @@
  7868. (name)->head = NULL; \
  7869. } while (0)
  7870. -/* srcu_notifier_heads must be initialized and cleaned up dynamically */
  7871. +/* srcu_notifier_heads must be cleaned up dynamically */
  7872. extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
  7873. #define srcu_cleanup_notifier_head(name) \
  7874. cleanup_srcu_struct(&(name)->srcu);
  7875. @@ -101,7 +99,13 @@
  7876. .head = NULL }
  7877. #define RAW_NOTIFIER_INIT(name) { \
  7878. .head = NULL }
  7879. -/* srcu_notifier_heads cannot be initialized statically */
  7880. +
  7881. +#define SRCU_NOTIFIER_INIT(name, pcpu) \
  7882. + { \
  7883. + .mutex = __MUTEX_INITIALIZER(name.mutex), \
  7884. + .head = NULL, \
  7885. + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \
  7886. + }
  7887. #define ATOMIC_NOTIFIER_HEAD(name) \
  7888. struct atomic_notifier_head name = \
  7889. @@ -113,6 +117,18 @@
  7890. struct raw_notifier_head name = \
  7891. RAW_NOTIFIER_INIT(name)
  7892. +#define _SRCU_NOTIFIER_HEAD(name, mod) \
  7893. + static DEFINE_PER_CPU(struct srcu_struct_array, \
  7894. + name##_head_srcu_array); \
  7895. + mod struct srcu_notifier_head name = \
  7896. + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array)
  7897. +
  7898. +#define SRCU_NOTIFIER_HEAD(name) \
  7899. + _SRCU_NOTIFIER_HEAD(name, )
  7900. +
  7901. +#define SRCU_NOTIFIER_HEAD_STATIC(name) \
  7902. + _SRCU_NOTIFIER_HEAD(name, static)
  7903. +
  7904. #ifdef __KERNEL__
  7905. extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
  7906. @@ -182,12 +198,12 @@
  7907. /*
  7908. * Declared notifiers so far. I can imagine quite a few more chains
  7909. - * over time (eg laptop power reset chains, reboot chain (to clean
  7910. + * over time (eg laptop power reset chains, reboot chain (to clean
  7911. * device units up), device [un]mount chain, module load/unload chain,
  7912. - * low memory chain, screenblank chain (for plug in modular screenblankers)
  7913. + * low memory chain, screenblank chain (for plug in modular screenblankers)
  7914. * VC switch chains (for loadable kernel svgalib VC switch helpers) etc...
  7915. */
  7916. -
  7917. +
  7918. /* CPU notfiers are defined in include/linux/cpu.h. */
  7919. /* netdevice notifiers are defined in include/linux/netdevice.h */
  7920. diff -Nur linux-3.18.10.orig/include/linux/percpu.h linux-3.18.10/include/linux/percpu.h
  7921. --- linux-3.18.10.orig/include/linux/percpu.h 2015-03-24 02:05:12.000000000 +0100
  7922. +++ linux-3.18.10/include/linux/percpu.h 2015-03-26 12:42:18.667588326 +0100
  7923. @@ -23,6 +23,35 @@
  7924. PERCPU_MODULE_RESERVE)
  7925. #endif
  7926. +#ifdef CONFIG_PREEMPT_RT_FULL
  7927. +
  7928. +#define get_local_var(var) (*({ \
  7929. + migrate_disable(); \
  7930. + &__get_cpu_var(var); }))
  7931. +
  7932. +#define put_local_var(var) do { \
  7933. + (void)&(var); \
  7934. + migrate_enable(); \
  7935. +} while (0)
  7936. +
  7937. +# define get_local_ptr(var) ({ \
  7938. + migrate_disable(); \
  7939. + this_cpu_ptr(var); })
  7940. +
  7941. +# define put_local_ptr(var) do { \
  7942. + (void)(var); \
  7943. + migrate_enable(); \
  7944. +} while (0)
  7945. +
  7946. +#else
  7947. +
  7948. +#define get_local_var(var) get_cpu_var(var)
  7949. +#define put_local_var(var) put_cpu_var(var)
  7950. +#define get_local_ptr(var) get_cpu_ptr(var)
  7951. +#define put_local_ptr(var) put_cpu_ptr(var)
  7952. +
  7953. +#endif
  7954. +
  7955. /* minimum unit size, also is the maximum supported allocation size */
  7956. #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
  7957. diff -Nur linux-3.18.10.orig/include/linux/pid.h linux-3.18.10/include/linux/pid.h
  7958. --- linux-3.18.10.orig/include/linux/pid.h 2015-03-24 02:05:12.000000000 +0100
  7959. +++ linux-3.18.10/include/linux/pid.h 2015-03-26 12:42:18.667588326 +0100
  7960. @@ -2,6 +2,7 @@
  7961. #define _LINUX_PID_H
  7962. #include <linux/rcupdate.h>
  7963. +#include <linux/atomic.h>
  7964. enum pid_type
  7965. {
  7966. diff -Nur linux-3.18.10.orig/include/linux/preempt.h linux-3.18.10/include/linux/preempt.h
  7967. --- linux-3.18.10.orig/include/linux/preempt.h 2015-03-24 02:05:12.000000000 +0100
  7968. +++ linux-3.18.10/include/linux/preempt.h 2015-03-26 12:42:18.667588326 +0100
  7969. @@ -33,6 +33,20 @@
  7970. #define preempt_count_inc() preempt_count_add(1)
  7971. #define preempt_count_dec() preempt_count_sub(1)
  7972. +#ifdef CONFIG_PREEMPT_LAZY
  7973. +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
  7974. +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
  7975. +#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
  7976. +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
  7977. +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
  7978. +#else
  7979. +#define add_preempt_lazy_count(val) do { } while (0)
  7980. +#define sub_preempt_lazy_count(val) do { } while (0)
  7981. +#define inc_preempt_lazy_count() do { } while (0)
  7982. +#define dec_preempt_lazy_count() do { } while (0)
  7983. +#define preempt_lazy_count() (0)
  7984. +#endif
  7985. +
  7986. #ifdef CONFIG_PREEMPT_COUNT
  7987. #define preempt_disable() \
  7988. @@ -41,13 +55,25 @@
  7989. barrier(); \
  7990. } while (0)
  7991. +#define preempt_lazy_disable() \
  7992. +do { \
  7993. + inc_preempt_lazy_count(); \
  7994. + barrier(); \
  7995. +} while (0)
  7996. +
  7997. #define sched_preempt_enable_no_resched() \
  7998. do { \
  7999. barrier(); \
  8000. preempt_count_dec(); \
  8001. } while (0)
  8002. -#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
  8003. +#ifdef CONFIG_PREEMPT_RT_BASE
  8004. +# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
  8005. +# define preempt_check_resched_rt() preempt_check_resched()
  8006. +#else
  8007. +# define preempt_enable_no_resched() preempt_enable()
  8008. +# define preempt_check_resched_rt() barrier();
  8009. +#endif
  8010. #ifdef CONFIG_PREEMPT
  8011. #define preempt_enable() \
  8012. @@ -63,6 +89,13 @@
  8013. __preempt_schedule(); \
  8014. } while (0)
  8015. +#define preempt_lazy_enable() \
  8016. +do { \
  8017. + dec_preempt_lazy_count(); \
  8018. + barrier(); \
  8019. + preempt_check_resched(); \
  8020. +} while (0)
  8021. +
  8022. #else
  8023. #define preempt_enable() \
  8024. do { \
  8025. @@ -121,6 +154,7 @@
  8026. #define preempt_disable_notrace() barrier()
  8027. #define preempt_enable_no_resched_notrace() barrier()
  8028. #define preempt_enable_notrace() barrier()
  8029. +#define preempt_check_resched_rt() barrier()
  8030. #endif /* CONFIG_PREEMPT_COUNT */
  8031. @@ -140,10 +174,31 @@
  8032. } while (0)
  8033. #define preempt_fold_need_resched() \
  8034. do { \
  8035. - if (tif_need_resched()) \
  8036. + if (tif_need_resched_now()) \
  8037. set_preempt_need_resched(); \
  8038. } while (0)
  8039. +#ifdef CONFIG_PREEMPT_RT_FULL
  8040. +# define preempt_disable_rt() preempt_disable()
  8041. +# define preempt_enable_rt() preempt_enable()
  8042. +# define preempt_disable_nort() barrier()
  8043. +# define preempt_enable_nort() barrier()
  8044. +# ifdef CONFIG_SMP
  8045. + extern void migrate_disable(void);
  8046. + extern void migrate_enable(void);
  8047. +# else /* CONFIG_SMP */
  8048. +# define migrate_disable() barrier()
  8049. +# define migrate_enable() barrier()
  8050. +# endif /* CONFIG_SMP */
  8051. +#else
  8052. +# define preempt_disable_rt() barrier()
  8053. +# define preempt_enable_rt() barrier()
  8054. +# define preempt_disable_nort() preempt_disable()
  8055. +# define preempt_enable_nort() preempt_enable()
  8056. +# define migrate_disable() preempt_disable()
  8057. +# define migrate_enable() preempt_enable()
  8058. +#endif
  8059. +
  8060. #ifdef CONFIG_PREEMPT_NOTIFIERS
  8061. struct preempt_notifier;
  8062. diff -Nur linux-3.18.10.orig/include/linux/preempt_mask.h linux-3.18.10/include/linux/preempt_mask.h
  8063. --- linux-3.18.10.orig/include/linux/preempt_mask.h 2015-03-24 02:05:12.000000000 +0100
  8064. +++ linux-3.18.10/include/linux/preempt_mask.h 2015-03-26 12:42:18.667588326 +0100
  8065. @@ -44,16 +44,26 @@
  8066. #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
  8067. #define NMI_OFFSET (1UL << NMI_SHIFT)
  8068. -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
  8069. +#ifndef CONFIG_PREEMPT_RT_FULL
  8070. +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
  8071. +#else
  8072. +# define SOFTIRQ_DISABLE_OFFSET (0)
  8073. +#endif
  8074. #define PREEMPT_ACTIVE_BITS 1
  8075. #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
  8076. #define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
  8077. #define hardirq_count() (preempt_count() & HARDIRQ_MASK)
  8078. -#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
  8079. #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
  8080. | NMI_MASK))
  8081. +#ifndef CONFIG_PREEMPT_RT_FULL
  8082. +# define softirq_count() (preempt_count() & SOFTIRQ_MASK)
  8083. +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
  8084. +#else
  8085. +# define softirq_count() (0UL)
  8086. +extern int in_serving_softirq(void);
  8087. +#endif
  8088. /*
  8089. * Are we doing bottom half or hardware interrupt processing?
  8090. @@ -64,7 +74,6 @@
  8091. #define in_irq() (hardirq_count())
  8092. #define in_softirq() (softirq_count())
  8093. #define in_interrupt() (irq_count())
  8094. -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
  8095. /*
  8096. * Are we in NMI context?
  8097. diff -Nur linux-3.18.10.orig/include/linux/printk.h linux-3.18.10/include/linux/printk.h
  8098. --- linux-3.18.10.orig/include/linux/printk.h 2015-03-24 02:05:12.000000000 +0100
  8099. +++ linux-3.18.10/include/linux/printk.h 2015-03-26 12:42:18.667588326 +0100
  8100. @@ -119,9 +119,11 @@
  8101. extern asmlinkage __printf(1, 2)
  8102. void early_printk(const char *fmt, ...);
  8103. void early_vprintk(const char *fmt, va_list ap);
  8104. +extern void printk_kill(void);
  8105. #else
  8106. static inline __printf(1, 2) __cold
  8107. void early_printk(const char *s, ...) { }
  8108. +static inline void printk_kill(void) { }
  8109. #endif
  8110. #ifdef CONFIG_PRINTK
  8111. @@ -155,7 +157,6 @@
  8112. #define printk_ratelimit() __printk_ratelimit(__func__)
  8113. extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
  8114. unsigned int interval_msec);
  8115. -
  8116. extern int printk_delay_msec;
  8117. extern int dmesg_restrict;
  8118. extern int kptr_restrict;
  8119. diff -Nur linux-3.18.10.orig/include/linux/radix-tree.h linux-3.18.10/include/linux/radix-tree.h
  8120. --- linux-3.18.10.orig/include/linux/radix-tree.h 2015-03-24 02:05:12.000000000 +0100
  8121. +++ linux-3.18.10/include/linux/radix-tree.h 2015-03-26 12:42:18.667588326 +0100
  8122. @@ -277,8 +277,13 @@
  8123. unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
  8124. void ***results, unsigned long *indices,
  8125. unsigned long first_index, unsigned int max_items);
  8126. +#ifndef CONFIG_PREEMPT_RT_FULL
  8127. int radix_tree_preload(gfp_t gfp_mask);
  8128. int radix_tree_maybe_preload(gfp_t gfp_mask);
  8129. +#else
  8130. +static inline int radix_tree_preload(gfp_t gm) { return 0; }
  8131. +static inline int radix_tree_maybe_preload(gfp_t gfp_mask) { return 0; }
  8132. +#endif
  8133. void radix_tree_init(void);
  8134. void *radix_tree_tag_set(struct radix_tree_root *root,
  8135. unsigned long index, unsigned int tag);
  8136. @@ -303,7 +308,7 @@
  8137. static inline void radix_tree_preload_end(void)
  8138. {
  8139. - preempt_enable();
  8140. + preempt_enable_nort();
  8141. }
  8142. /**
  8143. diff -Nur linux-3.18.10.orig/include/linux/random.h linux-3.18.10/include/linux/random.h
  8144. --- linux-3.18.10.orig/include/linux/random.h 2015-03-24 02:05:12.000000000 +0100
  8145. +++ linux-3.18.10/include/linux/random.h 2015-03-26 12:42:18.667588326 +0100
  8146. @@ -11,7 +11,7 @@
  8147. extern void add_device_randomness(const void *, unsigned int);
  8148. extern void add_input_randomness(unsigned int type, unsigned int code,
  8149. unsigned int value);
  8150. -extern void add_interrupt_randomness(int irq, int irq_flags);
  8151. +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip);
  8152. extern void get_random_bytes(void *buf, int nbytes);
  8153. extern void get_random_bytes_arch(void *buf, int nbytes);
  8154. diff -Nur linux-3.18.10.orig/include/linux/rcupdate.h linux-3.18.10/include/linux/rcupdate.h
  8155. --- linux-3.18.10.orig/include/linux/rcupdate.h 2015-03-24 02:05:12.000000000 +0100
  8156. +++ linux-3.18.10/include/linux/rcupdate.h 2015-03-26 12:42:18.667588326 +0100
  8157. @@ -147,6 +147,9 @@
  8158. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  8159. +#ifdef CONFIG_PREEMPT_RT_FULL
  8160. +#define call_rcu_bh call_rcu
  8161. +#else
  8162. /**
  8163. * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
  8164. * @head: structure to be used for queueing the RCU updates.
  8165. @@ -170,6 +173,7 @@
  8166. */
  8167. void call_rcu_bh(struct rcu_head *head,
  8168. void (*func)(struct rcu_head *head));
  8169. +#endif
  8170. /**
  8171. * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
  8172. @@ -231,6 +235,11 @@
  8173. * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
  8174. */
  8175. #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
  8176. +#ifndef CONFIG_PREEMPT_RT_FULL
  8177. +#define sched_rcu_preempt_depth() rcu_preempt_depth()
  8178. +#else
  8179. +static inline int sched_rcu_preempt_depth(void) { return 0; }
  8180. +#endif
  8181. #else /* #ifdef CONFIG_PREEMPT_RCU */
  8182. @@ -254,6 +263,8 @@
  8183. return 0;
  8184. }
  8185. +#define sched_rcu_preempt_depth() rcu_preempt_depth()
  8186. +
  8187. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  8188. /* Internal to kernel */
  8189. @@ -430,7 +441,14 @@
  8190. int debug_lockdep_rcu_enabled(void);
  8191. int rcu_read_lock_held(void);
  8192. +#ifdef CONFIG_PREEMPT_RT_FULL
  8193. +static inline int rcu_read_lock_bh_held(void)
  8194. +{
  8195. + return rcu_read_lock_held();
  8196. +}
  8197. +#else
  8198. int rcu_read_lock_bh_held(void);
  8199. +#endif
  8200. /**
  8201. * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
  8202. @@ -955,10 +973,14 @@
  8203. static inline void rcu_read_lock_bh(void)
  8204. {
  8205. local_bh_disable();
  8206. +#ifdef CONFIG_PREEMPT_RT_FULL
  8207. + rcu_read_lock();
  8208. +#else
  8209. __acquire(RCU_BH);
  8210. rcu_lock_acquire(&rcu_bh_lock_map);
  8211. rcu_lockdep_assert(rcu_is_watching(),
  8212. "rcu_read_lock_bh() used illegally while idle");
  8213. +#endif
  8214. }
  8215. /*
  8216. @@ -968,10 +990,14 @@
  8217. */
  8218. static inline void rcu_read_unlock_bh(void)
  8219. {
  8220. +#ifdef CONFIG_PREEMPT_RT_FULL
  8221. + rcu_read_unlock();
  8222. +#else
  8223. rcu_lockdep_assert(rcu_is_watching(),
  8224. "rcu_read_unlock_bh() used illegally while idle");
  8225. rcu_lock_release(&rcu_bh_lock_map);
  8226. __release(RCU_BH);
  8227. +#endif
  8228. local_bh_enable();
  8229. }
  8230. diff -Nur linux-3.18.10.orig/include/linux/rcutree.h linux-3.18.10/include/linux/rcutree.h
  8231. --- linux-3.18.10.orig/include/linux/rcutree.h 2015-03-24 02:05:12.000000000 +0100
  8232. +++ linux-3.18.10/include/linux/rcutree.h 2015-03-26 12:42:18.667588326 +0100
  8233. @@ -46,7 +46,11 @@
  8234. rcu_note_context_switch(cpu);
  8235. }
  8236. +#ifdef CONFIG_PREEMPT_RT_FULL
  8237. +# define synchronize_rcu_bh synchronize_rcu
  8238. +#else
  8239. void synchronize_rcu_bh(void);
  8240. +#endif
  8241. void synchronize_sched_expedited(void);
  8242. void synchronize_rcu_expedited(void);
  8243. @@ -74,7 +78,11 @@
  8244. }
  8245. void rcu_barrier(void);
  8246. +#ifdef CONFIG_PREEMPT_RT_FULL
  8247. +# define rcu_barrier_bh rcu_barrier
  8248. +#else
  8249. void rcu_barrier_bh(void);
  8250. +#endif
  8251. void rcu_barrier_sched(void);
  8252. unsigned long get_state_synchronize_rcu(void);
  8253. void cond_synchronize_rcu(unsigned long oldstate);
  8254. @@ -82,12 +90,10 @@
  8255. extern unsigned long rcutorture_testseq;
  8256. extern unsigned long rcutorture_vernum;
  8257. long rcu_batches_completed(void);
  8258. -long rcu_batches_completed_bh(void);
  8259. long rcu_batches_completed_sched(void);
  8260. void show_rcu_gp_kthreads(void);
  8261. void rcu_force_quiescent_state(void);
  8262. -void rcu_bh_force_quiescent_state(void);
  8263. void rcu_sched_force_quiescent_state(void);
  8264. void exit_rcu(void);
  8265. @@ -97,4 +103,12 @@
  8266. bool rcu_is_watching(void);
  8267. +#ifndef CONFIG_PREEMPT_RT_FULL
  8268. +void rcu_bh_force_quiescent_state(void);
  8269. +long rcu_batches_completed_bh(void);
  8270. +#else
  8271. +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state
  8272. +# define rcu_batches_completed_bh rcu_batches_completed
  8273. +#endif
  8274. +
  8275. #endif /* __LINUX_RCUTREE_H */
  8276. diff -Nur linux-3.18.10.orig/include/linux/rtmutex.h linux-3.18.10/include/linux/rtmutex.h
  8277. --- linux-3.18.10.orig/include/linux/rtmutex.h 2015-03-24 02:05:12.000000000 +0100
  8278. +++ linux-3.18.10/include/linux/rtmutex.h 2015-03-26 12:42:18.667588326 +0100
  8279. @@ -14,10 +14,14 @@
  8280. #include <linux/linkage.h>
  8281. #include <linux/rbtree.h>
  8282. -#include <linux/spinlock_types.h>
  8283. +#include <linux/spinlock_types_raw.h>
  8284. extern int max_lock_depth; /* for sysctl */
  8285. +#ifdef CONFIG_DEBUG_MUTEXES
  8286. +#include <linux/debug_locks.h>
  8287. +#endif
  8288. +
  8289. /**
  8290. * The rt_mutex structure
  8291. *
  8292. @@ -31,8 +35,8 @@
  8293. struct rb_root waiters;
  8294. struct rb_node *waiters_leftmost;
  8295. struct task_struct *owner;
  8296. -#ifdef CONFIG_DEBUG_RT_MUTEXES
  8297. int save_state;
  8298. +#ifdef CONFIG_DEBUG_RT_MUTEXES
  8299. const char *name, *file;
  8300. int line;
  8301. void *magic;
  8302. @@ -55,22 +59,33 @@
  8303. # define rt_mutex_debug_check_no_locks_held(task) do { } while (0)
  8304. #endif
  8305. +# define rt_mutex_init(mutex) \
  8306. + do { \
  8307. + raw_spin_lock_init(&(mutex)->wait_lock); \
  8308. + __rt_mutex_init(mutex, #mutex); \
  8309. + } while (0)
  8310. +
  8311. #ifdef CONFIG_DEBUG_RT_MUTEXES
  8312. # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
  8313. , .name = #mutexname, .file = __FILE__, .line = __LINE__
  8314. -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__)
  8315. extern void rt_mutex_debug_task_free(struct task_struct *tsk);
  8316. #else
  8317. # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
  8318. -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL)
  8319. # define rt_mutex_debug_task_free(t) do { } while (0)
  8320. #endif
  8321. -#define __RT_MUTEX_INITIALIZER(mutexname) \
  8322. - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  8323. +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
  8324. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  8325. , .waiters = RB_ROOT \
  8326. , .owner = NULL \
  8327. - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
  8328. + __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
  8329. +
  8330. +#define __RT_MUTEX_INITIALIZER(mutexname) \
  8331. + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
  8332. +
  8333. +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
  8334. + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
  8335. + , .save_state = 1 }
  8336. #define DEFINE_RT_MUTEX(mutexname) \
  8337. struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
  8338. @@ -91,6 +106,7 @@
  8339. extern void rt_mutex_lock(struct rt_mutex *lock);
  8340. extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
  8341. +extern int rt_mutex_lock_killable(struct rt_mutex *lock);
  8342. extern int rt_mutex_timed_lock(struct rt_mutex *lock,
  8343. struct hrtimer_sleeper *timeout);
  8344. diff -Nur linux-3.18.10.orig/include/linux/rwlock_rt.h linux-3.18.10/include/linux/rwlock_rt.h
  8345. --- linux-3.18.10.orig/include/linux/rwlock_rt.h 1970-01-01 01:00:00.000000000 +0100
  8346. +++ linux-3.18.10/include/linux/rwlock_rt.h 2015-03-26 12:42:18.667588326 +0100
  8347. @@ -0,0 +1,99 @@
  8348. +#ifndef __LINUX_RWLOCK_RT_H
  8349. +#define __LINUX_RWLOCK_RT_H
  8350. +
  8351. +#ifndef __LINUX_SPINLOCK_H
  8352. +#error Do not include directly. Use spinlock.h
  8353. +#endif
  8354. +
  8355. +#define rwlock_init(rwl) \
  8356. +do { \
  8357. + static struct lock_class_key __key; \
  8358. + \
  8359. + rt_mutex_init(&(rwl)->lock); \
  8360. + __rt_rwlock_init(rwl, #rwl, &__key); \
  8361. +} while (0)
  8362. +
  8363. +extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
  8364. +extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
  8365. +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
  8366. +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags);
  8367. +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
  8368. +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
  8369. +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
  8370. +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock);
  8371. +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock);
  8372. +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
  8373. +
  8374. +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock))
  8375. +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock))
  8376. +
  8377. +#define write_trylock_irqsave(lock, flags) \
  8378. + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags))
  8379. +
  8380. +#define read_lock_irqsave(lock, flags) \
  8381. + do { \
  8382. + typecheck(unsigned long, flags); \
  8383. + flags = rt_read_lock_irqsave(lock); \
  8384. + } while (0)
  8385. +
  8386. +#define write_lock_irqsave(lock, flags) \
  8387. + do { \
  8388. + typecheck(unsigned long, flags); \
  8389. + flags = rt_write_lock_irqsave(lock); \
  8390. + } while (0)
  8391. +
  8392. +#define read_lock(lock) rt_read_lock(lock)
  8393. +
  8394. +#define read_lock_bh(lock) \
  8395. + do { \
  8396. + local_bh_disable(); \
  8397. + rt_read_lock(lock); \
  8398. + } while (0)
  8399. +
  8400. +#define read_lock_irq(lock) read_lock(lock)
  8401. +
  8402. +#define write_lock(lock) rt_write_lock(lock)
  8403. +
  8404. +#define write_lock_bh(lock) \
  8405. + do { \
  8406. + local_bh_disable(); \
  8407. + rt_write_lock(lock); \
  8408. + } while (0)
  8409. +
  8410. +#define write_lock_irq(lock) write_lock(lock)
  8411. +
  8412. +#define read_unlock(lock) rt_read_unlock(lock)
  8413. +
  8414. +#define read_unlock_bh(lock) \
  8415. + do { \
  8416. + rt_read_unlock(lock); \
  8417. + local_bh_enable(); \
  8418. + } while (0)
  8419. +
  8420. +#define read_unlock_irq(lock) read_unlock(lock)
  8421. +
  8422. +#define write_unlock(lock) rt_write_unlock(lock)
  8423. +
  8424. +#define write_unlock_bh(lock) \
  8425. + do { \
  8426. + rt_write_unlock(lock); \
  8427. + local_bh_enable(); \
  8428. + } while (0)
  8429. +
  8430. +#define write_unlock_irq(lock) write_unlock(lock)
  8431. +
  8432. +#define read_unlock_irqrestore(lock, flags) \
  8433. + do { \
  8434. + typecheck(unsigned long, flags); \
  8435. + (void) flags; \
  8436. + rt_read_unlock(lock); \
  8437. + } while (0)
  8438. +
  8439. +#define write_unlock_irqrestore(lock, flags) \
  8440. + do { \
  8441. + typecheck(unsigned long, flags); \
  8442. + (void) flags; \
  8443. + rt_write_unlock(lock); \
  8444. + } while (0)
  8445. +
  8446. +#endif
  8447. diff -Nur linux-3.18.10.orig/include/linux/rwlock_types.h linux-3.18.10/include/linux/rwlock_types.h
  8448. --- linux-3.18.10.orig/include/linux/rwlock_types.h 2015-03-24 02:05:12.000000000 +0100
  8449. +++ linux-3.18.10/include/linux/rwlock_types.h 2015-03-26 12:42:18.667588326 +0100
  8450. @@ -1,6 +1,10 @@
  8451. #ifndef __LINUX_RWLOCK_TYPES_H
  8452. #define __LINUX_RWLOCK_TYPES_H
  8453. +#if !defined(__LINUX_SPINLOCK_TYPES_H)
  8454. +# error "Do not include directly, include spinlock_types.h"
  8455. +#endif
  8456. +
  8457. /*
  8458. * include/linux/rwlock_types.h - generic rwlock type definitions
  8459. * and initializers
  8460. @@ -43,6 +47,7 @@
  8461. RW_DEP_MAP_INIT(lockname) }
  8462. #endif
  8463. -#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
  8464. +#define DEFINE_RWLOCK(name) \
  8465. + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
  8466. #endif /* __LINUX_RWLOCK_TYPES_H */
  8467. diff -Nur linux-3.18.10.orig/include/linux/rwlock_types_rt.h linux-3.18.10/include/linux/rwlock_types_rt.h
  8468. --- linux-3.18.10.orig/include/linux/rwlock_types_rt.h 1970-01-01 01:00:00.000000000 +0100
  8469. +++ linux-3.18.10/include/linux/rwlock_types_rt.h 2015-03-26 12:42:18.667588326 +0100
  8470. @@ -0,0 +1,33 @@
  8471. +#ifndef __LINUX_RWLOCK_TYPES_RT_H
  8472. +#define __LINUX_RWLOCK_TYPES_RT_H
  8473. +
  8474. +#ifndef __LINUX_SPINLOCK_TYPES_H
  8475. +#error "Do not include directly. Include spinlock_types.h instead"
  8476. +#endif
  8477. +
  8478. +/*
  8479. + * rwlocks - rtmutex which allows single reader recursion
  8480. + */
  8481. +typedef struct {
  8482. + struct rt_mutex lock;
  8483. + int read_depth;
  8484. + unsigned int break_lock;
  8485. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8486. + struct lockdep_map dep_map;
  8487. +#endif
  8488. +} rwlock_t;
  8489. +
  8490. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8491. +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  8492. +#else
  8493. +# define RW_DEP_MAP_INIT(lockname)
  8494. +#endif
  8495. +
  8496. +#define __RW_LOCK_UNLOCKED(name) \
  8497. + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \
  8498. + RW_DEP_MAP_INIT(name) }
  8499. +
  8500. +#define DEFINE_RWLOCK(name) \
  8501. + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
  8502. +
  8503. +#endif
  8504. diff -Nur linux-3.18.10.orig/include/linux/rwsem.h linux-3.18.10/include/linux/rwsem.h
  8505. --- linux-3.18.10.orig/include/linux/rwsem.h 2015-03-24 02:05:12.000000000 +0100
  8506. +++ linux-3.18.10/include/linux/rwsem.h 2015-03-26 12:42:18.667588326 +0100
  8507. @@ -18,6 +18,10 @@
  8508. #include <linux/osq_lock.h>
  8509. #endif
  8510. +#ifdef CONFIG_PREEMPT_RT_FULL
  8511. +#include <linux/rwsem_rt.h>
  8512. +#else /* PREEMPT_RT_FULL */
  8513. +
  8514. struct rw_semaphore;
  8515. #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
  8516. @@ -177,4 +181,6 @@
  8517. # define up_read_non_owner(sem) up_read(sem)
  8518. #endif
  8519. +#endif /* !PREEMPT_RT_FULL */
  8520. +
  8521. #endif /* _LINUX_RWSEM_H */
  8522. diff -Nur linux-3.18.10.orig/include/linux/rwsem_rt.h linux-3.18.10/include/linux/rwsem_rt.h
  8523. --- linux-3.18.10.orig/include/linux/rwsem_rt.h 1970-01-01 01:00:00.000000000 +0100
  8524. +++ linux-3.18.10/include/linux/rwsem_rt.h 2015-03-26 12:42:18.667588326 +0100
  8525. @@ -0,0 +1,134 @@
  8526. +#ifndef _LINUX_RWSEM_RT_H
  8527. +#define _LINUX_RWSEM_RT_H
  8528. +
  8529. +#ifndef _LINUX_RWSEM_H
  8530. +#error "Include rwsem.h"
  8531. +#endif
  8532. +
  8533. +/*
  8534. + * RW-semaphores are a spinlock plus a reader-depth count.
  8535. + *
  8536. + * Note that the semantics are different from the usual
  8537. + * Linux rw-sems, in PREEMPT_RT mode we do not allow
  8538. + * multiple readers to hold the lock at once, we only allow
  8539. + * a read-lock owner to read-lock recursively. This is
  8540. + * better for latency, makes the implementation inherently
  8541. + * fair and makes it simpler as well.
  8542. + */
  8543. +
  8544. +#include <linux/rtmutex.h>
  8545. +
  8546. +struct rw_semaphore {
  8547. + struct rt_mutex lock;
  8548. + int read_depth;
  8549. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8550. + struct lockdep_map dep_map;
  8551. +#endif
  8552. +};
  8553. +
  8554. +#define __RWSEM_INITIALIZER(name) \
  8555. + { .lock = __RT_MUTEX_INITIALIZER(name.lock), \
  8556. + RW_DEP_MAP_INIT(name) }
  8557. +
  8558. +#define DECLARE_RWSEM(lockname) \
  8559. + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
  8560. +
  8561. +extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
  8562. + struct lock_class_key *key);
  8563. +
  8564. +#define __rt_init_rwsem(sem, name, key) \
  8565. + do { \
  8566. + rt_mutex_init(&(sem)->lock); \
  8567. + __rt_rwsem_init((sem), (name), (key));\
  8568. + } while (0)
  8569. +
  8570. +#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key)
  8571. +
  8572. +# define rt_init_rwsem(sem) \
  8573. +do { \
  8574. + static struct lock_class_key __key; \
  8575. + \
  8576. + __rt_init_rwsem((sem), #sem, &__key); \
  8577. +} while (0)
  8578. +
  8579. +extern void rt_down_write(struct rw_semaphore *rwsem);
  8580. +extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass);
  8581. +extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass);
  8582. +extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
  8583. + struct lockdep_map *nest);
  8584. +extern void rt_down_read(struct rw_semaphore *rwsem);
  8585. +extern int rt_down_write_trylock(struct rw_semaphore *rwsem);
  8586. +extern int rt_down_read_trylock(struct rw_semaphore *rwsem);
  8587. +extern void rt_up_read(struct rw_semaphore *rwsem);
  8588. +extern void rt_up_write(struct rw_semaphore *rwsem);
  8589. +extern void rt_downgrade_write(struct rw_semaphore *rwsem);
  8590. +
  8591. +#define init_rwsem(sem) rt_init_rwsem(sem)
  8592. +#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock)
  8593. +
  8594. +static inline int rwsem_is_contended(struct rw_semaphore *sem)
  8595. +{
  8596. + /* rt_mutex_has_waiters() */
  8597. + return !RB_EMPTY_ROOT(&sem->lock.waiters);
  8598. +}
  8599. +
  8600. +static inline void down_read(struct rw_semaphore *sem)
  8601. +{
  8602. + rt_down_read(sem);
  8603. +}
  8604. +
  8605. +static inline int down_read_trylock(struct rw_semaphore *sem)
  8606. +{
  8607. + return rt_down_read_trylock(sem);
  8608. +}
  8609. +
  8610. +static inline void down_write(struct rw_semaphore *sem)
  8611. +{
  8612. + rt_down_write(sem);
  8613. +}
  8614. +
  8615. +static inline int down_write_trylock(struct rw_semaphore *sem)
  8616. +{
  8617. + return rt_down_write_trylock(sem);
  8618. +}
  8619. +
  8620. +static inline void up_read(struct rw_semaphore *sem)
  8621. +{
  8622. + rt_up_read(sem);
  8623. +}
  8624. +
  8625. +static inline void up_write(struct rw_semaphore *sem)
  8626. +{
  8627. + rt_up_write(sem);
  8628. +}
  8629. +
  8630. +static inline void downgrade_write(struct rw_semaphore *sem)
  8631. +{
  8632. + rt_downgrade_write(sem);
  8633. +}
  8634. +
  8635. +static inline void down_read_nested(struct rw_semaphore *sem, int subclass)
  8636. +{
  8637. + return rt_down_read_nested(sem, subclass);
  8638. +}
  8639. +
  8640. +static inline void down_write_nested(struct rw_semaphore *sem, int subclass)
  8641. +{
  8642. + rt_down_write_nested(sem, subclass);
  8643. +}
  8644. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8645. +static inline void down_write_nest_lock(struct rw_semaphore *sem,
  8646. + struct rw_semaphore *nest_lock)
  8647. +{
  8648. + rt_down_write_nested_lock(sem, &nest_lock->dep_map);
  8649. +}
  8650. +
  8651. +#else
  8652. +
  8653. +static inline void down_write_nest_lock(struct rw_semaphore *sem,
  8654. + struct rw_semaphore *nest_lock)
  8655. +{
  8656. + rt_down_write_nested_lock(sem, NULL);
  8657. +}
  8658. +#endif
  8659. +#endif
  8660. diff -Nur linux-3.18.10.orig/include/linux/sched.h linux-3.18.10/include/linux/sched.h
  8661. --- linux-3.18.10.orig/include/linux/sched.h 2015-03-24 02:05:12.000000000 +0100
  8662. +++ linux-3.18.10/include/linux/sched.h 2015-03-26 12:42:18.667588326 +0100
  8663. @@ -26,6 +26,7 @@
  8664. #include <linux/nodemask.h>
  8665. #include <linux/mm_types.h>
  8666. #include <linux/preempt_mask.h>
  8667. +#include <asm/kmap_types.h>
  8668. #include <asm/page.h>
  8669. #include <asm/ptrace.h>
  8670. @@ -56,6 +57,7 @@
  8671. #include <linux/cred.h>
  8672. #include <linux/llist.h>
  8673. #include <linux/uidgid.h>
  8674. +#include <linux/hardirq.h>
  8675. #include <linux/gfp.h>
  8676. #include <linux/magic.h>
  8677. @@ -235,10 +237,7 @@
  8678. TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
  8679. __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
  8680. -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
  8681. #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
  8682. -#define task_is_stopped_or_traced(task) \
  8683. - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
  8684. #define task_contributes_to_load(task) \
  8685. ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
  8686. (task->flags & PF_FROZEN) == 0)
  8687. @@ -1234,6 +1233,7 @@
  8688. struct task_struct {
  8689. volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
  8690. + volatile long saved_state; /* saved state for "spinlock sleepers" */
  8691. void *stack;
  8692. atomic_t usage;
  8693. unsigned int flags; /* per process flags, defined below */
  8694. @@ -1270,6 +1270,12 @@
  8695. #endif
  8696. unsigned int policy;
  8697. +#ifdef CONFIG_PREEMPT_RT_FULL
  8698. + int migrate_disable;
  8699. +# ifdef CONFIG_SCHED_DEBUG
  8700. + int migrate_disable_atomic;
  8701. +# endif
  8702. +#endif
  8703. int nr_cpus_allowed;
  8704. cpumask_t cpus_allowed;
  8705. @@ -1371,7 +1377,8 @@
  8706. struct cputime prev_cputime;
  8707. #endif
  8708. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  8709. - seqlock_t vtime_seqlock;
  8710. + raw_spinlock_t vtime_lock;
  8711. + seqcount_t vtime_seq;
  8712. unsigned long long vtime_snap;
  8713. enum {
  8714. VTIME_SLEEPING = 0,
  8715. @@ -1387,6 +1394,9 @@
  8716. struct task_cputime cputime_expires;
  8717. struct list_head cpu_timers[3];
  8718. +#ifdef CONFIG_PREEMPT_RT_BASE
  8719. + struct task_struct *posix_timer_list;
  8720. +#endif
  8721. /* process credentials */
  8722. const struct cred __rcu *real_cred; /* objective and real subjective task
  8723. @@ -1419,10 +1429,15 @@
  8724. /* signal handlers */
  8725. struct signal_struct *signal;
  8726. struct sighand_struct *sighand;
  8727. + struct sigqueue *sigqueue_cache;
  8728. sigset_t blocked, real_blocked;
  8729. sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
  8730. struct sigpending pending;
  8731. +#ifdef CONFIG_PREEMPT_RT_FULL
  8732. + /* TODO: move me into ->restart_block ? */
  8733. + struct siginfo forced_info;
  8734. +#endif
  8735. unsigned long sas_ss_sp;
  8736. size_t sas_ss_size;
  8737. @@ -1460,6 +1475,9 @@
  8738. /* mutex deadlock detection */
  8739. struct mutex_waiter *blocked_on;
  8740. #endif
  8741. +#ifdef CONFIG_PREEMPT_RT_FULL
  8742. + int pagefault_disabled;
  8743. +#endif
  8744. #ifdef CONFIG_TRACE_IRQFLAGS
  8745. unsigned int irq_events;
  8746. unsigned long hardirq_enable_ip;
  8747. @@ -1644,6 +1662,12 @@
  8748. unsigned long trace;
  8749. /* bitmask and counter of trace recursion */
  8750. unsigned long trace_recursion;
  8751. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  8752. + u64 preempt_timestamp_hist;
  8753. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  8754. + long timer_offset;
  8755. +#endif
  8756. +#endif
  8757. #endif /* CONFIG_TRACING */
  8758. #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
  8759. unsigned int memcg_kmem_skip_account;
  8760. @@ -1661,11 +1685,19 @@
  8761. unsigned int sequential_io;
  8762. unsigned int sequential_io_avg;
  8763. #endif
  8764. +#ifdef CONFIG_PREEMPT_RT_BASE
  8765. + struct rcu_head put_rcu;
  8766. + int softirq_nestcnt;
  8767. + unsigned int softirqs_raised;
  8768. +#endif
  8769. +#ifdef CONFIG_PREEMPT_RT_FULL
  8770. +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
  8771. + int kmap_idx;
  8772. + pte_t kmap_pte[KM_TYPE_NR];
  8773. +# endif
  8774. +#endif
  8775. };
  8776. -/* Future-safe accessor for struct task_struct's cpus_allowed. */
  8777. -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
  8778. -
  8779. #define TNF_MIGRATED 0x01
  8780. #define TNF_NO_GROUP 0x02
  8781. #define TNF_SHARED 0x04
  8782. @@ -1700,6 +1732,17 @@
  8783. }
  8784. #endif
  8785. +#ifdef CONFIG_PREEMPT_RT_FULL
  8786. +static inline bool cur_pf_disabled(void) { return current->pagefault_disabled; }
  8787. +#else
  8788. +static inline bool cur_pf_disabled(void) { return false; }
  8789. +#endif
  8790. +
  8791. +static inline bool pagefault_disabled(void)
  8792. +{
  8793. + return in_atomic() || cur_pf_disabled();
  8794. +}
  8795. +
  8796. static inline struct pid *task_pid(struct task_struct *task)
  8797. {
  8798. return task->pids[PIDTYPE_PID].pid;
  8799. @@ -1853,6 +1896,15 @@
  8800. extern void free_task(struct task_struct *tsk);
  8801. #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
  8802. +#ifdef CONFIG_PREEMPT_RT_BASE
  8803. +extern void __put_task_struct_cb(struct rcu_head *rhp);
  8804. +
  8805. +static inline void put_task_struct(struct task_struct *t)
  8806. +{
  8807. + if (atomic_dec_and_test(&t->usage))
  8808. + call_rcu(&t->put_rcu, __put_task_struct_cb);
  8809. +}
  8810. +#else
  8811. extern void __put_task_struct(struct task_struct *t);
  8812. static inline void put_task_struct(struct task_struct *t)
  8813. @@ -1860,6 +1912,7 @@
  8814. if (atomic_dec_and_test(&t->usage))
  8815. __put_task_struct(t);
  8816. }
  8817. +#endif
  8818. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  8819. extern void task_cputime(struct task_struct *t,
  8820. @@ -1898,6 +1951,7 @@
  8821. /*
  8822. * Per process flags
  8823. */
  8824. +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */
  8825. #define PF_EXITING 0x00000004 /* getting shut down */
  8826. #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
  8827. #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
  8828. @@ -2058,6 +2112,10 @@
  8829. extern int set_cpus_allowed_ptr(struct task_struct *p,
  8830. const struct cpumask *new_mask);
  8831. +int migrate_me(void);
  8832. +void tell_sched_cpu_down_begin(int cpu);
  8833. +void tell_sched_cpu_down_done(int cpu);
  8834. +
  8835. #else
  8836. static inline void do_set_cpus_allowed(struct task_struct *p,
  8837. const struct cpumask *new_mask)
  8838. @@ -2070,6 +2128,9 @@
  8839. return -EINVAL;
  8840. return 0;
  8841. }
  8842. +static inline int migrate_me(void) { return 0; }
  8843. +static inline void tell_sched_cpu_down_begin(int cpu) { }
  8844. +static inline void tell_sched_cpu_down_done(int cpu) { }
  8845. #endif
  8846. #ifdef CONFIG_NO_HZ_COMMON
  8847. @@ -2290,6 +2351,7 @@
  8848. extern int wake_up_state(struct task_struct *tsk, unsigned int state);
  8849. extern int wake_up_process(struct task_struct *tsk);
  8850. +extern int wake_up_lock_sleeper(struct task_struct * tsk);
  8851. extern void wake_up_new_task(struct task_struct *tsk);
  8852. #ifdef CONFIG_SMP
  8853. extern void kick_process(struct task_struct *tsk);
  8854. @@ -2406,12 +2468,24 @@
  8855. /* mmdrop drops the mm and the page tables */
  8856. extern void __mmdrop(struct mm_struct *);
  8857. +
  8858. static inline void mmdrop(struct mm_struct * mm)
  8859. {
  8860. if (unlikely(atomic_dec_and_test(&mm->mm_count)))
  8861. __mmdrop(mm);
  8862. }
  8863. +#ifdef CONFIG_PREEMPT_RT_BASE
  8864. +extern void __mmdrop_delayed(struct rcu_head *rhp);
  8865. +static inline void mmdrop_delayed(struct mm_struct *mm)
  8866. +{
  8867. + if (atomic_dec_and_test(&mm->mm_count))
  8868. + call_rcu(&mm->delayed_drop, __mmdrop_delayed);
  8869. +}
  8870. +#else
  8871. +# define mmdrop_delayed(mm) mmdrop(mm)
  8872. +#endif
  8873. +
  8874. /* mmput gets rid of the mappings and all user-space */
  8875. extern void mmput(struct mm_struct *);
  8876. /* Grab a reference to a task's mm, if it is not already going away */
  8877. @@ -2719,6 +2793,43 @@
  8878. return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
  8879. }
  8880. +#ifdef CONFIG_PREEMPT_LAZY
  8881. +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
  8882. +{
  8883. + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
  8884. +}
  8885. +
  8886. +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
  8887. +{
  8888. + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
  8889. +}
  8890. +
  8891. +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
  8892. +{
  8893. + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
  8894. +}
  8895. +
  8896. +static inline int need_resched_lazy(void)
  8897. +{
  8898. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  8899. +}
  8900. +
  8901. +static inline int need_resched_now(void)
  8902. +{
  8903. + return test_thread_flag(TIF_NEED_RESCHED);
  8904. +}
  8905. +
  8906. +#else
  8907. +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
  8908. +static inline int need_resched_lazy(void) { return 0; }
  8909. +
  8910. +static inline int need_resched_now(void)
  8911. +{
  8912. + return test_thread_flag(TIF_NEED_RESCHED);
  8913. +}
  8914. +
  8915. +#endif
  8916. +
  8917. static inline int restart_syscall(void)
  8918. {
  8919. set_tsk_thread_flag(current, TIF_SIGPENDING);
  8920. @@ -2750,6 +2861,51 @@
  8921. return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
  8922. }
  8923. +static inline bool __task_is_stopped_or_traced(struct task_struct *task)
  8924. +{
  8925. + if (task->state & (__TASK_STOPPED | __TASK_TRACED))
  8926. + return true;
  8927. +#ifdef CONFIG_PREEMPT_RT_FULL
  8928. + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
  8929. + return true;
  8930. +#endif
  8931. + return false;
  8932. +}
  8933. +
  8934. +static inline bool task_is_stopped_or_traced(struct task_struct *task)
  8935. +{
  8936. + bool traced_stopped;
  8937. +
  8938. +#ifdef CONFIG_PREEMPT_RT_FULL
  8939. + unsigned long flags;
  8940. +
  8941. + raw_spin_lock_irqsave(&task->pi_lock, flags);
  8942. + traced_stopped = __task_is_stopped_or_traced(task);
  8943. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  8944. +#else
  8945. + traced_stopped = __task_is_stopped_or_traced(task);
  8946. +#endif
  8947. + return traced_stopped;
  8948. +}
  8949. +
  8950. +static inline bool task_is_traced(struct task_struct *task)
  8951. +{
  8952. + bool traced = false;
  8953. +
  8954. + if (task->state & __TASK_TRACED)
  8955. + return true;
  8956. +#ifdef CONFIG_PREEMPT_RT_FULL
  8957. + /* in case the task is sleeping on tasklist_lock */
  8958. + raw_spin_lock_irq(&task->pi_lock);
  8959. + if (task->state & __TASK_TRACED)
  8960. + traced = true;
  8961. + else if (task->saved_state & __TASK_TRACED)
  8962. + traced = true;
  8963. + raw_spin_unlock_irq(&task->pi_lock);
  8964. +#endif
  8965. + return traced;
  8966. +}
  8967. +
  8968. /*
  8969. * cond_resched() and cond_resched_lock(): latency reduction via
  8970. * explicit rescheduling in places that are safe. The return
  8971. @@ -2766,7 +2922,7 @@
  8972. extern int __cond_resched_lock(spinlock_t *lock);
  8973. -#ifdef CONFIG_PREEMPT_COUNT
  8974. +#if defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT_FULL)
  8975. #define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET
  8976. #else
  8977. #define PREEMPT_LOCK_OFFSET 0
  8978. @@ -2777,12 +2933,16 @@
  8979. __cond_resched_lock(lock); \
  8980. })
  8981. +#ifndef CONFIG_PREEMPT_RT_FULL
  8982. extern int __cond_resched_softirq(void);
  8983. #define cond_resched_softirq() ({ \
  8984. __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
  8985. __cond_resched_softirq(); \
  8986. })
  8987. +#else
  8988. +# define cond_resched_softirq() cond_resched()
  8989. +#endif
  8990. static inline void cond_resched_rcu(void)
  8991. {
  8992. @@ -2949,6 +3109,26 @@
  8993. #endif /* CONFIG_SMP */
  8994. +static inline int __migrate_disabled(struct task_struct *p)
  8995. +{
  8996. +#ifdef CONFIG_PREEMPT_RT_FULL
  8997. + return p->migrate_disable;
  8998. +#else
  8999. + return 0;
  9000. +#endif
  9001. +}
  9002. +
  9003. +/* Future-safe accessor for struct task_struct's cpus_allowed. */
  9004. +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p)
  9005. +{
  9006. +#ifdef CONFIG_PREEMPT_RT_FULL
  9007. + if (p->migrate_disable)
  9008. + return cpumask_of(task_cpu(p));
  9009. +#endif
  9010. +
  9011. + return &p->cpus_allowed;
  9012. +}
  9013. +
  9014. extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
  9015. extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
  9016. diff -Nur linux-3.18.10.orig/include/linux/seqlock.h linux-3.18.10/include/linux/seqlock.h
  9017. --- linux-3.18.10.orig/include/linux/seqlock.h 2015-03-24 02:05:12.000000000 +0100
  9018. +++ linux-3.18.10/include/linux/seqlock.h 2015-03-26 12:42:18.667588326 +0100
  9019. @@ -219,20 +219,30 @@
  9020. return __read_seqcount_retry(s, start);
  9021. }
  9022. -
  9023. -
  9024. -static inline void raw_write_seqcount_begin(seqcount_t *s)
  9025. +static inline void __raw_write_seqcount_begin(seqcount_t *s)
  9026. {
  9027. s->sequence++;
  9028. smp_wmb();
  9029. }
  9030. -static inline void raw_write_seqcount_end(seqcount_t *s)
  9031. +static inline void raw_write_seqcount_begin(seqcount_t *s)
  9032. +{
  9033. + preempt_disable_rt();
  9034. + __raw_write_seqcount_begin(s);
  9035. +}
  9036. +
  9037. +static inline void __raw_write_seqcount_end(seqcount_t *s)
  9038. {
  9039. smp_wmb();
  9040. s->sequence++;
  9041. }
  9042. +static inline void raw_write_seqcount_end(seqcount_t *s)
  9043. +{
  9044. + __raw_write_seqcount_end(s);
  9045. + preempt_enable_rt();
  9046. +}
  9047. +
  9048. /*
  9049. * raw_write_seqcount_latch - redirect readers to even/odd copy
  9050. * @s: pointer to seqcount_t
  9051. @@ -305,10 +315,32 @@
  9052. /*
  9053. * Read side functions for starting and finalizing a read side section.
  9054. */
  9055. +#ifndef CONFIG_PREEMPT_RT_FULL
  9056. static inline unsigned read_seqbegin(const seqlock_t *sl)
  9057. {
  9058. return read_seqcount_begin(&sl->seqcount);
  9059. }
  9060. +#else
  9061. +/*
  9062. + * Starvation safe read side for RT
  9063. + */
  9064. +static inline unsigned read_seqbegin(seqlock_t *sl)
  9065. +{
  9066. + unsigned ret;
  9067. +
  9068. +repeat:
  9069. + ret = ACCESS_ONCE(sl->seqcount.sequence);
  9070. + if (unlikely(ret & 1)) {
  9071. + /*
  9072. + * Take the lock and let the writer proceed (i.e. evtl
  9073. + * boost it), otherwise we could loop here forever.
  9074. + */
  9075. + spin_unlock_wait(&sl->lock);
  9076. + goto repeat;
  9077. + }
  9078. + return ret;
  9079. +}
  9080. +#endif
  9081. static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
  9082. {
  9083. @@ -323,36 +355,36 @@
  9084. static inline void write_seqlock(seqlock_t *sl)
  9085. {
  9086. spin_lock(&sl->lock);
  9087. - write_seqcount_begin(&sl->seqcount);
  9088. + __raw_write_seqcount_begin(&sl->seqcount);
  9089. }
  9090. static inline void write_sequnlock(seqlock_t *sl)
  9091. {
  9092. - write_seqcount_end(&sl->seqcount);
  9093. + __raw_write_seqcount_end(&sl->seqcount);
  9094. spin_unlock(&sl->lock);
  9095. }
  9096. static inline void write_seqlock_bh(seqlock_t *sl)
  9097. {
  9098. spin_lock_bh(&sl->lock);
  9099. - write_seqcount_begin(&sl->seqcount);
  9100. + __raw_write_seqcount_begin(&sl->seqcount);
  9101. }
  9102. static inline void write_sequnlock_bh(seqlock_t *sl)
  9103. {
  9104. - write_seqcount_end(&sl->seqcount);
  9105. + __raw_write_seqcount_end(&sl->seqcount);
  9106. spin_unlock_bh(&sl->lock);
  9107. }
  9108. static inline void write_seqlock_irq(seqlock_t *sl)
  9109. {
  9110. spin_lock_irq(&sl->lock);
  9111. - write_seqcount_begin(&sl->seqcount);
  9112. + __raw_write_seqcount_begin(&sl->seqcount);
  9113. }
  9114. static inline void write_sequnlock_irq(seqlock_t *sl)
  9115. {
  9116. - write_seqcount_end(&sl->seqcount);
  9117. + __raw_write_seqcount_end(&sl->seqcount);
  9118. spin_unlock_irq(&sl->lock);
  9119. }
  9120. @@ -361,7 +393,7 @@
  9121. unsigned long flags;
  9122. spin_lock_irqsave(&sl->lock, flags);
  9123. - write_seqcount_begin(&sl->seqcount);
  9124. + __raw_write_seqcount_begin(&sl->seqcount);
  9125. return flags;
  9126. }
  9127. @@ -371,7 +403,7 @@
  9128. static inline void
  9129. write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
  9130. {
  9131. - write_seqcount_end(&sl->seqcount);
  9132. + __raw_write_seqcount_end(&sl->seqcount);
  9133. spin_unlock_irqrestore(&sl->lock, flags);
  9134. }
  9135. diff -Nur linux-3.18.10.orig/include/linux/signal.h linux-3.18.10/include/linux/signal.h
  9136. --- linux-3.18.10.orig/include/linux/signal.h 2015-03-24 02:05:12.000000000 +0100
  9137. +++ linux-3.18.10/include/linux/signal.h 2015-03-26 12:42:18.667588326 +0100
  9138. @@ -218,6 +218,7 @@
  9139. }
  9140. extern void flush_sigqueue(struct sigpending *queue);
  9141. +extern void flush_task_sigqueue(struct task_struct *tsk);
  9142. /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
  9143. static inline int valid_signal(unsigned long sig)
  9144. diff -Nur linux-3.18.10.orig/include/linux/skbuff.h linux-3.18.10/include/linux/skbuff.h
  9145. --- linux-3.18.10.orig/include/linux/skbuff.h 2015-03-24 02:05:12.000000000 +0100
  9146. +++ linux-3.18.10/include/linux/skbuff.h 2015-03-26 12:42:18.667588326 +0100
  9147. @@ -172,6 +172,7 @@
  9148. __u32 qlen;
  9149. spinlock_t lock;
  9150. + raw_spinlock_t raw_lock;
  9151. };
  9152. struct sk_buff;
  9153. @@ -1327,6 +1328,12 @@
  9154. __skb_queue_head_init(list);
  9155. }
  9156. +static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
  9157. +{
  9158. + raw_spin_lock_init(&list->raw_lock);
  9159. + __skb_queue_head_init(list);
  9160. +}
  9161. +
  9162. static inline void skb_queue_head_init_class(struct sk_buff_head *list,
  9163. struct lock_class_key *class)
  9164. {
  9165. diff -Nur linux-3.18.10.orig/include/linux/smp.h linux-3.18.10/include/linux/smp.h
  9166. --- linux-3.18.10.orig/include/linux/smp.h 2015-03-24 02:05:12.000000000 +0100
  9167. +++ linux-3.18.10/include/linux/smp.h 2015-03-26 12:42:18.667588326 +0100
  9168. @@ -178,6 +178,9 @@
  9169. #define get_cpu() ({ preempt_disable(); smp_processor_id(); })
  9170. #define put_cpu() preempt_enable()
  9171. +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
  9172. +#define put_cpu_light() migrate_enable()
  9173. +
  9174. /*
  9175. * Callback to arch code if there's nosmp or maxcpus=0 on the
  9176. * boot command line:
  9177. diff -Nur linux-3.18.10.orig/include/linux/spinlock_api_smp.h linux-3.18.10/include/linux/spinlock_api_smp.h
  9178. --- linux-3.18.10.orig/include/linux/spinlock_api_smp.h 2015-03-24 02:05:12.000000000 +0100
  9179. +++ linux-3.18.10/include/linux/spinlock_api_smp.h 2015-03-26 12:42:18.671588332 +0100
  9180. @@ -187,6 +187,8 @@
  9181. return 0;
  9182. }
  9183. -#include <linux/rwlock_api_smp.h>
  9184. +#ifndef CONFIG_PREEMPT_RT_FULL
  9185. +# include <linux/rwlock_api_smp.h>
  9186. +#endif
  9187. #endif /* __LINUX_SPINLOCK_API_SMP_H */
  9188. diff -Nur linux-3.18.10.orig/include/linux/spinlock.h linux-3.18.10/include/linux/spinlock.h
  9189. --- linux-3.18.10.orig/include/linux/spinlock.h 2015-03-24 02:05:12.000000000 +0100
  9190. +++ linux-3.18.10/include/linux/spinlock.h 2015-03-26 12:42:18.667588326 +0100
  9191. @@ -278,7 +278,11 @@
  9192. #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock))
  9193. /* Include rwlock functions */
  9194. -#include <linux/rwlock.h>
  9195. +#ifdef CONFIG_PREEMPT_RT_FULL
  9196. +# include <linux/rwlock_rt.h>
  9197. +#else
  9198. +# include <linux/rwlock.h>
  9199. +#endif
  9200. /*
  9201. * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
  9202. @@ -289,6 +293,10 @@
  9203. # include <linux/spinlock_api_up.h>
  9204. #endif
  9205. +#ifdef CONFIG_PREEMPT_RT_FULL
  9206. +# include <linux/spinlock_rt.h>
  9207. +#else /* PREEMPT_RT_FULL */
  9208. +
  9209. /*
  9210. * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
  9211. */
  9212. @@ -418,4 +426,6 @@
  9213. #define atomic_dec_and_lock(atomic, lock) \
  9214. __cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
  9215. +#endif /* !PREEMPT_RT_FULL */
  9216. +
  9217. #endif /* __LINUX_SPINLOCK_H */
  9218. diff -Nur linux-3.18.10.orig/include/linux/spinlock_rt.h linux-3.18.10/include/linux/spinlock_rt.h
  9219. --- linux-3.18.10.orig/include/linux/spinlock_rt.h 1970-01-01 01:00:00.000000000 +0100
  9220. +++ linux-3.18.10/include/linux/spinlock_rt.h 2015-03-26 12:42:18.671588332 +0100
  9221. @@ -0,0 +1,167 @@
  9222. +#ifndef __LINUX_SPINLOCK_RT_H
  9223. +#define __LINUX_SPINLOCK_RT_H
  9224. +
  9225. +#ifndef __LINUX_SPINLOCK_H
  9226. +#error Do not include directly. Use spinlock.h
  9227. +#endif
  9228. +
  9229. +#include <linux/bug.h>
  9230. +
  9231. +extern void
  9232. +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key);
  9233. +
  9234. +#define spin_lock_init(slock) \
  9235. +do { \
  9236. + static struct lock_class_key __key; \
  9237. + \
  9238. + rt_mutex_init(&(slock)->lock); \
  9239. + __rt_spin_lock_init(slock, #slock, &__key); \
  9240. +} while (0)
  9241. +
  9242. +extern void __lockfunc rt_spin_lock(spinlock_t *lock);
  9243. +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
  9244. +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
  9245. +extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
  9246. +extern void __lockfunc rt_spin_unlock_after_trylock_in_irq(spinlock_t *lock);
  9247. +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
  9248. +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
  9249. +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
  9250. +extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
  9251. +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock);
  9252. +
  9253. +/*
  9254. + * lockdep-less calls, for derived types like rwlock:
  9255. + * (for trylock they can use rt_mutex_trylock() directly.
  9256. + */
  9257. +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
  9258. +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
  9259. +extern int __lockfunc __rt_spin_trylock(struct rt_mutex *lock);
  9260. +
  9261. +#define spin_lock(lock) \
  9262. + do { \
  9263. + migrate_disable(); \
  9264. + rt_spin_lock(lock); \
  9265. + } while (0)
  9266. +
  9267. +#define spin_lock_bh(lock) \
  9268. + do { \
  9269. + local_bh_disable(); \
  9270. + migrate_disable(); \
  9271. + rt_spin_lock(lock); \
  9272. + } while (0)
  9273. +
  9274. +#define spin_lock_irq(lock) spin_lock(lock)
  9275. +
  9276. +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock))
  9277. +
  9278. +#define spin_trylock(lock) \
  9279. +({ \
  9280. + int __locked; \
  9281. + migrate_disable(); \
  9282. + __locked = spin_do_trylock(lock); \
  9283. + if (!__locked) \
  9284. + migrate_enable(); \
  9285. + __locked; \
  9286. +})
  9287. +
  9288. +#ifdef CONFIG_LOCKDEP
  9289. +# define spin_lock_nested(lock, subclass) \
  9290. + do { \
  9291. + migrate_disable(); \
  9292. + rt_spin_lock_nested(lock, subclass); \
  9293. + } while (0)
  9294. +
  9295. +# define spin_lock_irqsave_nested(lock, flags, subclass) \
  9296. + do { \
  9297. + typecheck(unsigned long, flags); \
  9298. + flags = 0; \
  9299. + migrate_disable(); \
  9300. + rt_spin_lock_nested(lock, subclass); \
  9301. + } while (0)
  9302. +#else
  9303. +# define spin_lock_nested(lock, subclass) spin_lock(lock)
  9304. +
  9305. +# define spin_lock_irqsave_nested(lock, flags, subclass) \
  9306. + do { \
  9307. + typecheck(unsigned long, flags); \
  9308. + flags = 0; \
  9309. + spin_lock(lock); \
  9310. + } while (0)
  9311. +#endif
  9312. +
  9313. +#define spin_lock_irqsave(lock, flags) \
  9314. + do { \
  9315. + typecheck(unsigned long, flags); \
  9316. + flags = 0; \
  9317. + spin_lock(lock); \
  9318. + } while (0)
  9319. +
  9320. +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock)
  9321. +{
  9322. + unsigned long flags = 0;
  9323. +#ifdef CONFIG_TRACE_IRQFLAGS
  9324. + flags = rt_spin_lock_trace_flags(lock);
  9325. +#else
  9326. + spin_lock(lock); /* lock_local */
  9327. +#endif
  9328. + return flags;
  9329. +}
  9330. +
  9331. +/* FIXME: we need rt_spin_lock_nest_lock */
  9332. +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0)
  9333. +
  9334. +#define spin_unlock(lock) \
  9335. + do { \
  9336. + rt_spin_unlock(lock); \
  9337. + migrate_enable(); \
  9338. + } while (0)
  9339. +
  9340. +#define spin_unlock_bh(lock) \
  9341. + do { \
  9342. + rt_spin_unlock(lock); \
  9343. + migrate_enable(); \
  9344. + local_bh_enable(); \
  9345. + } while (0)
  9346. +
  9347. +#define spin_unlock_irq(lock) spin_unlock(lock)
  9348. +
  9349. +#define spin_unlock_irqrestore(lock, flags) \
  9350. + do { \
  9351. + typecheck(unsigned long, flags); \
  9352. + (void) flags; \
  9353. + spin_unlock(lock); \
  9354. + } while (0)
  9355. +
  9356. +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock))
  9357. +#define spin_trylock_irq(lock) spin_trylock(lock)
  9358. +
  9359. +#define spin_trylock_irqsave(lock, flags) \
  9360. + rt_spin_trylock_irqsave(lock, &(flags))
  9361. +
  9362. +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock)
  9363. +
  9364. +#ifdef CONFIG_GENERIC_LOCKBREAK
  9365. +# define spin_is_contended(lock) ((lock)->break_lock)
  9366. +#else
  9367. +# define spin_is_contended(lock) (((void)(lock), 0))
  9368. +#endif
  9369. +
  9370. +static inline int spin_can_lock(spinlock_t *lock)
  9371. +{
  9372. + return !rt_mutex_is_locked(&lock->lock);
  9373. +}
  9374. +
  9375. +static inline int spin_is_locked(spinlock_t *lock)
  9376. +{
  9377. + return rt_mutex_is_locked(&lock->lock);
  9378. +}
  9379. +
  9380. +static inline void assert_spin_locked(spinlock_t *lock)
  9381. +{
  9382. + BUG_ON(!spin_is_locked(lock));
  9383. +}
  9384. +
  9385. +#define atomic_dec_and_lock(atomic, lock) \
  9386. + atomic_dec_and_spin_lock(atomic, lock)
  9387. +
  9388. +#endif
  9389. diff -Nur linux-3.18.10.orig/include/linux/spinlock_types.h linux-3.18.10/include/linux/spinlock_types.h
  9390. --- linux-3.18.10.orig/include/linux/spinlock_types.h 2015-03-24 02:05:12.000000000 +0100
  9391. +++ linux-3.18.10/include/linux/spinlock_types.h 2015-03-26 12:42:18.671588332 +0100
  9392. @@ -9,80 +9,15 @@
  9393. * Released under the General Public License (GPL).
  9394. */
  9395. -#if defined(CONFIG_SMP)
  9396. -# include <asm/spinlock_types.h>
  9397. -#else
  9398. -# include <linux/spinlock_types_up.h>
  9399. -#endif
  9400. -
  9401. -#include <linux/lockdep.h>
  9402. -
  9403. -typedef struct raw_spinlock {
  9404. - arch_spinlock_t raw_lock;
  9405. -#ifdef CONFIG_GENERIC_LOCKBREAK
  9406. - unsigned int break_lock;
  9407. -#endif
  9408. -#ifdef CONFIG_DEBUG_SPINLOCK
  9409. - unsigned int magic, owner_cpu;
  9410. - void *owner;
  9411. -#endif
  9412. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9413. - struct lockdep_map dep_map;
  9414. -#endif
  9415. -} raw_spinlock_t;
  9416. -
  9417. -#define SPINLOCK_MAGIC 0xdead4ead
  9418. -
  9419. -#define SPINLOCK_OWNER_INIT ((void *)-1L)
  9420. -
  9421. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9422. -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  9423. -#else
  9424. -# define SPIN_DEP_MAP_INIT(lockname)
  9425. -#endif
  9426. +#include <linux/spinlock_types_raw.h>
  9427. -#ifdef CONFIG_DEBUG_SPINLOCK
  9428. -# define SPIN_DEBUG_INIT(lockname) \
  9429. - .magic = SPINLOCK_MAGIC, \
  9430. - .owner_cpu = -1, \
  9431. - .owner = SPINLOCK_OWNER_INIT,
  9432. +#ifndef CONFIG_PREEMPT_RT_FULL
  9433. +# include <linux/spinlock_types_nort.h>
  9434. +# include <linux/rwlock_types.h>
  9435. #else
  9436. -# define SPIN_DEBUG_INIT(lockname)
  9437. +# include <linux/rtmutex.h>
  9438. +# include <linux/spinlock_types_rt.h>
  9439. +# include <linux/rwlock_types_rt.h>
  9440. #endif
  9441. -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
  9442. - { \
  9443. - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
  9444. - SPIN_DEBUG_INIT(lockname) \
  9445. - SPIN_DEP_MAP_INIT(lockname) }
  9446. -
  9447. -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
  9448. - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
  9449. -
  9450. -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
  9451. -
  9452. -typedef struct spinlock {
  9453. - union {
  9454. - struct raw_spinlock rlock;
  9455. -
  9456. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9457. -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
  9458. - struct {
  9459. - u8 __padding[LOCK_PADSIZE];
  9460. - struct lockdep_map dep_map;
  9461. - };
  9462. -#endif
  9463. - };
  9464. -} spinlock_t;
  9465. -
  9466. -#define __SPIN_LOCK_INITIALIZER(lockname) \
  9467. - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
  9468. -
  9469. -#define __SPIN_LOCK_UNLOCKED(lockname) \
  9470. - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
  9471. -
  9472. -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
  9473. -
  9474. -#include <linux/rwlock_types.h>
  9475. -
  9476. #endif /* __LINUX_SPINLOCK_TYPES_H */
  9477. diff -Nur linux-3.18.10.orig/include/linux/spinlock_types_nort.h linux-3.18.10/include/linux/spinlock_types_nort.h
  9478. --- linux-3.18.10.orig/include/linux/spinlock_types_nort.h 1970-01-01 01:00:00.000000000 +0100
  9479. +++ linux-3.18.10/include/linux/spinlock_types_nort.h 2015-03-26 12:42:18.671588332 +0100
  9480. @@ -0,0 +1,33 @@
  9481. +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H
  9482. +#define __LINUX_SPINLOCK_TYPES_NORT_H
  9483. +
  9484. +#ifndef __LINUX_SPINLOCK_TYPES_H
  9485. +#error "Do not include directly. Include spinlock_types.h instead"
  9486. +#endif
  9487. +
  9488. +/*
  9489. + * The non RT version maps spinlocks to raw_spinlocks
  9490. + */
  9491. +typedef struct spinlock {
  9492. + union {
  9493. + struct raw_spinlock rlock;
  9494. +
  9495. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9496. +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
  9497. + struct {
  9498. + u8 __padding[LOCK_PADSIZE];
  9499. + struct lockdep_map dep_map;
  9500. + };
  9501. +#endif
  9502. + };
  9503. +} spinlock_t;
  9504. +
  9505. +#define __SPIN_LOCK_INITIALIZER(lockname) \
  9506. + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
  9507. +
  9508. +#define __SPIN_LOCK_UNLOCKED(lockname) \
  9509. + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
  9510. +
  9511. +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
  9512. +
  9513. +#endif
  9514. diff -Nur linux-3.18.10.orig/include/linux/spinlock_types_raw.h linux-3.18.10/include/linux/spinlock_types_raw.h
  9515. --- linux-3.18.10.orig/include/linux/spinlock_types_raw.h 1970-01-01 01:00:00.000000000 +0100
  9516. +++ linux-3.18.10/include/linux/spinlock_types_raw.h 2015-03-26 12:42:18.671588332 +0100
  9517. @@ -0,0 +1,56 @@
  9518. +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  9519. +#define __LINUX_SPINLOCK_TYPES_RAW_H
  9520. +
  9521. +#if defined(CONFIG_SMP)
  9522. +# include <asm/spinlock_types.h>
  9523. +#else
  9524. +# include <linux/spinlock_types_up.h>
  9525. +#endif
  9526. +
  9527. +#include <linux/lockdep.h>
  9528. +
  9529. +typedef struct raw_spinlock {
  9530. + arch_spinlock_t raw_lock;
  9531. +#ifdef CONFIG_GENERIC_LOCKBREAK
  9532. + unsigned int break_lock;
  9533. +#endif
  9534. +#ifdef CONFIG_DEBUG_SPINLOCK
  9535. + unsigned int magic, owner_cpu;
  9536. + void *owner;
  9537. +#endif
  9538. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9539. + struct lockdep_map dep_map;
  9540. +#endif
  9541. +} raw_spinlock_t;
  9542. +
  9543. +#define SPINLOCK_MAGIC 0xdead4ead
  9544. +
  9545. +#define SPINLOCK_OWNER_INIT ((void *)-1L)
  9546. +
  9547. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9548. +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  9549. +#else
  9550. +# define SPIN_DEP_MAP_INIT(lockname)
  9551. +#endif
  9552. +
  9553. +#ifdef CONFIG_DEBUG_SPINLOCK
  9554. +# define SPIN_DEBUG_INIT(lockname) \
  9555. + .magic = SPINLOCK_MAGIC, \
  9556. + .owner_cpu = -1, \
  9557. + .owner = SPINLOCK_OWNER_INIT,
  9558. +#else
  9559. +# define SPIN_DEBUG_INIT(lockname)
  9560. +#endif
  9561. +
  9562. +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
  9563. + { \
  9564. + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
  9565. + SPIN_DEBUG_INIT(lockname) \
  9566. + SPIN_DEP_MAP_INIT(lockname) }
  9567. +
  9568. +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
  9569. + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
  9570. +
  9571. +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
  9572. +
  9573. +#endif
  9574. diff -Nur linux-3.18.10.orig/include/linux/spinlock_types_rt.h linux-3.18.10/include/linux/spinlock_types_rt.h
  9575. --- linux-3.18.10.orig/include/linux/spinlock_types_rt.h 1970-01-01 01:00:00.000000000 +0100
  9576. +++ linux-3.18.10/include/linux/spinlock_types_rt.h 2015-03-26 12:42:18.671588332 +0100
  9577. @@ -0,0 +1,51 @@
  9578. +#ifndef __LINUX_SPINLOCK_TYPES_RT_H
  9579. +#define __LINUX_SPINLOCK_TYPES_RT_H
  9580. +
  9581. +#ifndef __LINUX_SPINLOCK_TYPES_H
  9582. +#error "Do not include directly. Include spinlock_types.h instead"
  9583. +#endif
  9584. +
  9585. +#include <linux/cache.h>
  9586. +
  9587. +/*
  9588. + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field:
  9589. + */
  9590. +typedef struct spinlock {
  9591. + struct rt_mutex lock;
  9592. + unsigned int break_lock;
  9593. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9594. + struct lockdep_map dep_map;
  9595. +#endif
  9596. +} spinlock_t;
  9597. +
  9598. +#ifdef CONFIG_DEBUG_RT_MUTEXES
  9599. +# define __RT_SPIN_INITIALIZER(name) \
  9600. + { \
  9601. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
  9602. + .save_state = 1, \
  9603. + .file = __FILE__, \
  9604. + .line = __LINE__ , \
  9605. + }
  9606. +#else
  9607. +# define __RT_SPIN_INITIALIZER(name) \
  9608. + { \
  9609. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
  9610. + .save_state = 1, \
  9611. + }
  9612. +#endif
  9613. +
  9614. +/*
  9615. +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock)
  9616. +*/
  9617. +
  9618. +#define __SPIN_LOCK_UNLOCKED(name) \
  9619. + { .lock = __RT_SPIN_INITIALIZER(name.lock), \
  9620. + SPIN_DEP_MAP_INIT(name) }
  9621. +
  9622. +#define __DEFINE_SPINLOCK(name) \
  9623. + spinlock_t name = __SPIN_LOCK_UNLOCKED(name)
  9624. +
  9625. +#define DEFINE_SPINLOCK(name) \
  9626. + spinlock_t name __cacheline_aligned_in_smp = __SPIN_LOCK_UNLOCKED(name)
  9627. +
  9628. +#endif
  9629. diff -Nur linux-3.18.10.orig/include/linux/srcu.h linux-3.18.10/include/linux/srcu.h
  9630. --- linux-3.18.10.orig/include/linux/srcu.h 2015-03-24 02:05:12.000000000 +0100
  9631. +++ linux-3.18.10/include/linux/srcu.h 2015-03-26 12:42:18.671588332 +0100
  9632. @@ -84,10 +84,10 @@
  9633. void process_srcu(struct work_struct *work);
  9634. -#define __SRCU_STRUCT_INIT(name) \
  9635. +#define __SRCU_STRUCT_INIT(name, pcpu_name) \
  9636. { \
  9637. .completed = -300, \
  9638. - .per_cpu_ref = &name##_srcu_array, \
  9639. + .per_cpu_ref = &pcpu_name, \
  9640. .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \
  9641. .running = false, \
  9642. .batch_queue = RCU_BATCH_INIT(name.batch_queue), \
  9643. @@ -104,11 +104,12 @@
  9644. */
  9645. #define DEFINE_SRCU(name) \
  9646. static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
  9647. - struct srcu_struct name = __SRCU_STRUCT_INIT(name);
  9648. + struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array);
  9649. #define DEFINE_STATIC_SRCU(name) \
  9650. static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
  9651. - static struct srcu_struct name = __SRCU_STRUCT_INIT(name);
  9652. + static struct srcu_struct name = __SRCU_STRUCT_INIT(\
  9653. + name, name##_srcu_array);
  9654. /**
  9655. * call_srcu() - Queue a callback for invocation after an SRCU grace period
  9656. diff -Nur linux-3.18.10.orig/include/linux/swap.h linux-3.18.10/include/linux/swap.h
  9657. --- linux-3.18.10.orig/include/linux/swap.h 2015-03-24 02:05:12.000000000 +0100
  9658. +++ linux-3.18.10/include/linux/swap.h 2015-03-26 12:42:18.671588332 +0100
  9659. @@ -11,6 +11,7 @@
  9660. #include <linux/fs.h>
  9661. #include <linux/atomic.h>
  9662. #include <linux/page-flags.h>
  9663. +#include <linux/locallock.h>
  9664. #include <asm/page.h>
  9665. struct notifier_block;
  9666. @@ -260,7 +261,8 @@
  9667. void *workingset_eviction(struct address_space *mapping, struct page *page);
  9668. bool workingset_refault(void *shadow);
  9669. void workingset_activation(struct page *page);
  9670. -extern struct list_lru workingset_shadow_nodes;
  9671. +extern struct list_lru __workingset_shadow_nodes;
  9672. +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
  9673. static inline unsigned int workingset_node_pages(struct radix_tree_node *node)
  9674. {
  9675. diff -Nur linux-3.18.10.orig/include/linux/sysctl.h linux-3.18.10/include/linux/sysctl.h
  9676. --- linux-3.18.10.orig/include/linux/sysctl.h 2015-03-24 02:05:12.000000000 +0100
  9677. +++ linux-3.18.10/include/linux/sysctl.h 2015-03-26 12:42:18.671588332 +0100
  9678. @@ -25,6 +25,7 @@
  9679. #include <linux/rcupdate.h>
  9680. #include <linux/wait.h>
  9681. #include <linux/rbtree.h>
  9682. +#include <linux/atomic.h>
  9683. #include <uapi/linux/sysctl.h>
  9684. /* For the /proc/sys support */
  9685. diff -Nur linux-3.18.10.orig/include/linux/thread_info.h linux-3.18.10/include/linux/thread_info.h
  9686. --- linux-3.18.10.orig/include/linux/thread_info.h 2015-03-24 02:05:12.000000000 +0100
  9687. +++ linux-3.18.10/include/linux/thread_info.h 2015-03-26 12:42:18.671588332 +0100
  9688. @@ -102,7 +102,17 @@
  9689. #define test_thread_flag(flag) \
  9690. test_ti_thread_flag(current_thread_info(), flag)
  9691. -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
  9692. +#ifdef CONFIG_PREEMPT_LAZY
  9693. +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \
  9694. + test_thread_flag(TIF_NEED_RESCHED_LAZY))
  9695. +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
  9696. +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY))
  9697. +
  9698. +#else
  9699. +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
  9700. +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
  9701. +#define tif_need_resched_lazy() 0
  9702. +#endif
  9703. #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
  9704. /*
  9705. diff -Nur linux-3.18.10.orig/include/linux/timer.h linux-3.18.10/include/linux/timer.h
  9706. --- linux-3.18.10.orig/include/linux/timer.h 2015-03-24 02:05:12.000000000 +0100
  9707. +++ linux-3.18.10/include/linux/timer.h 2015-03-26 12:42:18.671588332 +0100
  9708. @@ -241,7 +241,7 @@
  9709. extern int try_to_del_timer_sync(struct timer_list *timer);
  9710. -#ifdef CONFIG_SMP
  9711. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  9712. extern int del_timer_sync(struct timer_list *timer);
  9713. #else
  9714. # define del_timer_sync(t) del_timer(t)
  9715. diff -Nur linux-3.18.10.orig/include/linux/uaccess.h linux-3.18.10/include/linux/uaccess.h
  9716. --- linux-3.18.10.orig/include/linux/uaccess.h 2015-03-24 02:05:12.000000000 +0100
  9717. +++ linux-3.18.10/include/linux/uaccess.h 2015-03-26 12:42:18.671588332 +0100
  9718. @@ -6,14 +6,9 @@
  9719. /*
  9720. * These routines enable/disable the pagefault handler in that
  9721. - * it will not take any locks and go straight to the fixup table.
  9722. - *
  9723. - * They have great resemblance to the preempt_disable/enable calls
  9724. - * and in fact they are identical; this is because currently there is
  9725. - * no other way to make the pagefault handlers do this. So we do
  9726. - * disable preemption but we don't necessarily care about that.
  9727. + * it will not take any MM locks and go straight to the fixup table.
  9728. */
  9729. -static inline void pagefault_disable(void)
  9730. +static inline void raw_pagefault_disable(void)
  9731. {
  9732. preempt_count_inc();
  9733. /*
  9734. @@ -23,7 +18,7 @@
  9735. barrier();
  9736. }
  9737. -static inline void pagefault_enable(void)
  9738. +static inline void raw_pagefault_enable(void)
  9739. {
  9740. #ifndef CONFIG_PREEMPT
  9741. /*
  9742. @@ -37,6 +32,21 @@
  9743. #endif
  9744. }
  9745. +#ifndef CONFIG_PREEMPT_RT_FULL
  9746. +static inline void pagefault_disable(void)
  9747. +{
  9748. + raw_pagefault_disable();
  9749. +}
  9750. +
  9751. +static inline void pagefault_enable(void)
  9752. +{
  9753. + raw_pagefault_enable();
  9754. +}
  9755. +#else
  9756. +extern void pagefault_disable(void);
  9757. +extern void pagefault_enable(void);
  9758. +#endif
  9759. +
  9760. #ifndef ARCH_HAS_NOCACHE_UACCESS
  9761. static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
  9762. @@ -76,9 +86,9 @@
  9763. mm_segment_t old_fs = get_fs(); \
  9764. \
  9765. set_fs(KERNEL_DS); \
  9766. - pagefault_disable(); \
  9767. + raw_pagefault_disable(); \
  9768. ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \
  9769. - pagefault_enable(); \
  9770. + raw_pagefault_enable(); \
  9771. set_fs(old_fs); \
  9772. ret; \
  9773. })
  9774. diff -Nur linux-3.18.10.orig/include/linux/uprobes.h linux-3.18.10/include/linux/uprobes.h
  9775. --- linux-3.18.10.orig/include/linux/uprobes.h 2015-03-24 02:05:12.000000000 +0100
  9776. +++ linux-3.18.10/include/linux/uprobes.h 2015-03-26 12:42:18.671588332 +0100
  9777. @@ -27,6 +27,7 @@
  9778. #include <linux/errno.h>
  9779. #include <linux/rbtree.h>
  9780. #include <linux/types.h>
  9781. +#include <linux/wait.h>
  9782. struct vm_area_struct;
  9783. struct mm_struct;
  9784. diff -Nur linux-3.18.10.orig/include/linux/vmstat.h linux-3.18.10/include/linux/vmstat.h
  9785. --- linux-3.18.10.orig/include/linux/vmstat.h 2015-03-24 02:05:12.000000000 +0100
  9786. +++ linux-3.18.10/include/linux/vmstat.h 2015-03-26 12:42:18.671588332 +0100
  9787. @@ -33,7 +33,9 @@
  9788. */
  9789. static inline void __count_vm_event(enum vm_event_item item)
  9790. {
  9791. + preempt_disable_rt();
  9792. raw_cpu_inc(vm_event_states.event[item]);
  9793. + preempt_enable_rt();
  9794. }
  9795. static inline void count_vm_event(enum vm_event_item item)
  9796. @@ -43,7 +45,9 @@
  9797. static inline void __count_vm_events(enum vm_event_item item, long delta)
  9798. {
  9799. + preempt_disable_rt();
  9800. raw_cpu_add(vm_event_states.event[item], delta);
  9801. + preempt_enable_rt();
  9802. }
  9803. static inline void count_vm_events(enum vm_event_item item, long delta)
  9804. diff -Nur linux-3.18.10.orig/include/linux/wait.h linux-3.18.10/include/linux/wait.h
  9805. --- linux-3.18.10.orig/include/linux/wait.h 2015-03-24 02:05:12.000000000 +0100
  9806. +++ linux-3.18.10/include/linux/wait.h 2015-03-26 12:42:18.671588332 +0100
  9807. @@ -8,6 +8,7 @@
  9808. #include <linux/spinlock.h>
  9809. #include <asm/current.h>
  9810. #include <uapi/linux/wait.h>
  9811. +#include <linux/atomic.h>
  9812. typedef struct __wait_queue wait_queue_t;
  9813. typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
  9814. diff -Nur linux-3.18.10.orig/include/linux/wait-simple.h linux-3.18.10/include/linux/wait-simple.h
  9815. --- linux-3.18.10.orig/include/linux/wait-simple.h 1970-01-01 01:00:00.000000000 +0100
  9816. +++ linux-3.18.10/include/linux/wait-simple.h 2015-03-26 12:42:18.671588332 +0100
  9817. @@ -0,0 +1,207 @@
  9818. +#ifndef _LINUX_WAIT_SIMPLE_H
  9819. +#define _LINUX_WAIT_SIMPLE_H
  9820. +
  9821. +#include <linux/spinlock.h>
  9822. +#include <linux/list.h>
  9823. +
  9824. +#include <asm/current.h>
  9825. +
  9826. +struct swaiter {
  9827. + struct task_struct *task;
  9828. + struct list_head node;
  9829. +};
  9830. +
  9831. +#define DEFINE_SWAITER(name) \
  9832. + struct swaiter name = { \
  9833. + .task = current, \
  9834. + .node = LIST_HEAD_INIT((name).node), \
  9835. + }
  9836. +
  9837. +struct swait_head {
  9838. + raw_spinlock_t lock;
  9839. + struct list_head list;
  9840. +};
  9841. +
  9842. +#define SWAIT_HEAD_INITIALIZER(name) { \
  9843. + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
  9844. + .list = LIST_HEAD_INIT((name).list), \
  9845. + }
  9846. +
  9847. +#define DEFINE_SWAIT_HEAD(name) \
  9848. + struct swait_head name = SWAIT_HEAD_INITIALIZER(name)
  9849. +
  9850. +extern void __init_swait_head(struct swait_head *h, struct lock_class_key *key);
  9851. +
  9852. +#define init_swait_head(swh) \
  9853. + do { \
  9854. + static struct lock_class_key __key; \
  9855. + \
  9856. + __init_swait_head((swh), &__key); \
  9857. + } while (0)
  9858. +
  9859. +/*
  9860. + * Waiter functions
  9861. + */
  9862. +extern void swait_prepare_locked(struct swait_head *head, struct swaiter *w);
  9863. +extern void swait_prepare(struct swait_head *head, struct swaiter *w, int state);
  9864. +extern void swait_finish_locked(struct swait_head *head, struct swaiter *w);
  9865. +extern void swait_finish(struct swait_head *head, struct swaiter *w);
  9866. +
  9867. +/* Check whether a head has waiters enqueued */
  9868. +static inline bool swaitqueue_active(struct swait_head *h)
  9869. +{
  9870. + /* Make sure the condition is visible before checking list_empty() */
  9871. + smp_mb();
  9872. + return !list_empty(&h->list);
  9873. +}
  9874. +
  9875. +/*
  9876. + * Wakeup functions
  9877. + */
  9878. +extern unsigned int __swait_wake(struct swait_head *head, unsigned int state, unsigned int num);
  9879. +extern unsigned int __swait_wake_locked(struct swait_head *head, unsigned int state, unsigned int num);
  9880. +
  9881. +#define swait_wake(head) __swait_wake(head, TASK_NORMAL, 1)
  9882. +#define swait_wake_interruptible(head) __swait_wake(head, TASK_INTERRUPTIBLE, 1)
  9883. +#define swait_wake_all(head) __swait_wake(head, TASK_NORMAL, 0)
  9884. +#define swait_wake_all_interruptible(head) __swait_wake(head, TASK_INTERRUPTIBLE, 0)
  9885. +
  9886. +/*
  9887. + * Event API
  9888. + */
  9889. +#define __swait_event(wq, condition) \
  9890. +do { \
  9891. + DEFINE_SWAITER(__wait); \
  9892. + \
  9893. + for (;;) { \
  9894. + swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
  9895. + if (condition) \
  9896. + break; \
  9897. + schedule(); \
  9898. + } \
  9899. + swait_finish(&wq, &__wait); \
  9900. +} while (0)
  9901. +
  9902. +/**
  9903. + * swait_event - sleep until a condition gets true
  9904. + * @wq: the waitqueue to wait on
  9905. + * @condition: a C expression for the event to wait for
  9906. + *
  9907. + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  9908. + * @condition evaluates to true. The @condition is checked each time
  9909. + * the waitqueue @wq is woken up.
  9910. + *
  9911. + * wake_up() has to be called after changing any variable that could
  9912. + * change the result of the wait condition.
  9913. + */
  9914. +#define swait_event(wq, condition) \
  9915. +do { \
  9916. + if (condition) \
  9917. + break; \
  9918. + __swait_event(wq, condition); \
  9919. +} while (0)
  9920. +
  9921. +#define __swait_event_interruptible(wq, condition, ret) \
  9922. +do { \
  9923. + DEFINE_SWAITER(__wait); \
  9924. + \
  9925. + for (;;) { \
  9926. + swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
  9927. + if (condition) \
  9928. + break; \
  9929. + if (signal_pending(current)) { \
  9930. + ret = -ERESTARTSYS; \
  9931. + break; \
  9932. + } \
  9933. + schedule(); \
  9934. + } \
  9935. + swait_finish(&wq, &__wait); \
  9936. +} while (0)
  9937. +
  9938. +#define __swait_event_interruptible_timeout(wq, condition, ret) \
  9939. +do { \
  9940. + DEFINE_SWAITER(__wait); \
  9941. + \
  9942. + for (;;) { \
  9943. + swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
  9944. + if (condition) \
  9945. + break; \
  9946. + if (signal_pending(current)) { \
  9947. + ret = -ERESTARTSYS; \
  9948. + break; \
  9949. + } \
  9950. + ret = schedule_timeout(ret); \
  9951. + if (!ret) \
  9952. + break; \
  9953. + } \
  9954. + swait_finish(&wq, &__wait); \
  9955. +} while (0)
  9956. +
  9957. +/**
  9958. + * swait_event_interruptible - sleep until a condition gets true
  9959. + * @wq: the waitqueue to wait on
  9960. + * @condition: a C expression for the event to wait for
  9961. + *
  9962. + * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  9963. + * @condition evaluates to true. The @condition is checked each time
  9964. + * the waitqueue @wq is woken up.
  9965. + *
  9966. + * wake_up() has to be called after changing any variable that could
  9967. + * change the result of the wait condition.
  9968. + */
  9969. +#define swait_event_interruptible(wq, condition) \
  9970. +({ \
  9971. + int __ret = 0; \
  9972. + if (!(condition)) \
  9973. + __swait_event_interruptible(wq, condition, __ret); \
  9974. + __ret; \
  9975. +})
  9976. +
  9977. +#define swait_event_interruptible_timeout(wq, condition, timeout) \
  9978. +({ \
  9979. + int __ret = timeout; \
  9980. + if (!(condition)) \
  9981. + __swait_event_interruptible_timeout(wq, condition, __ret); \
  9982. + __ret; \
  9983. +})
  9984. +
  9985. +#define __swait_event_timeout(wq, condition, ret) \
  9986. +do { \
  9987. + DEFINE_SWAITER(__wait); \
  9988. + \
  9989. + for (;;) { \
  9990. + swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
  9991. + if (condition) \
  9992. + break; \
  9993. + ret = schedule_timeout(ret); \
  9994. + if (!ret) \
  9995. + break; \
  9996. + } \
  9997. + swait_finish(&wq, &__wait); \
  9998. +} while (0)
  9999. +
  10000. +/**
  10001. + * swait_event_timeout - sleep until a condition gets true or a timeout elapses
  10002. + * @wq: the waitqueue to wait on
  10003. + * @condition: a C expression for the event to wait for
  10004. + * @timeout: timeout, in jiffies
  10005. + *
  10006. + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  10007. + * @condition evaluates to true. The @condition is checked each time
  10008. + * the waitqueue @wq is woken up.
  10009. + *
  10010. + * wake_up() has to be called after changing any variable that could
  10011. + * change the result of the wait condition.
  10012. + *
  10013. + * The function returns 0 if the @timeout elapsed, and the remaining
  10014. + * jiffies if the condition evaluated to true before the timeout elapsed.
  10015. + */
  10016. +#define swait_event_timeout(wq, condition, timeout) \
  10017. +({ \
  10018. + long __ret = timeout; \
  10019. + if (!(condition)) \
  10020. + __swait_event_timeout(wq, condition, __ret); \
  10021. + __ret; \
  10022. +})
  10023. +
  10024. +#endif
  10025. diff -Nur linux-3.18.10.orig/include/linux/work-simple.h linux-3.18.10/include/linux/work-simple.h
  10026. --- linux-3.18.10.orig/include/linux/work-simple.h 1970-01-01 01:00:00.000000000 +0100
  10027. +++ linux-3.18.10/include/linux/work-simple.h 2015-03-26 12:42:18.671588332 +0100
  10028. @@ -0,0 +1,24 @@
  10029. +#ifndef _LINUX_SWORK_H
  10030. +#define _LINUX_SWORK_H
  10031. +
  10032. +#include <linux/list.h>
  10033. +
  10034. +struct swork_event {
  10035. + struct list_head item;
  10036. + unsigned long flags;
  10037. + void (*func)(struct swork_event *);
  10038. +};
  10039. +
  10040. +static inline void INIT_SWORK(struct swork_event *event,
  10041. + void (*func)(struct swork_event *))
  10042. +{
  10043. + event->flags = 0;
  10044. + event->func = func;
  10045. +}
  10046. +
  10047. +bool swork_queue(struct swork_event *sev);
  10048. +
  10049. +int swork_get(void);
  10050. +void swork_put(void);
  10051. +
  10052. +#endif /* _LINUX_SWORK_H */
  10053. diff -Nur linux-3.18.10.orig/include/net/dst.h linux-3.18.10/include/net/dst.h
  10054. --- linux-3.18.10.orig/include/net/dst.h 2015-03-24 02:05:12.000000000 +0100
  10055. +++ linux-3.18.10/include/net/dst.h 2015-03-26 12:42:18.671588332 +0100
  10056. @@ -403,7 +403,7 @@
  10057. static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
  10058. struct sk_buff *skb)
  10059. {
  10060. - const struct hh_cache *hh;
  10061. + struct hh_cache *hh;
  10062. if (dst->pending_confirm) {
  10063. unsigned long now = jiffies;
  10064. diff -Nur linux-3.18.10.orig/include/net/neighbour.h linux-3.18.10/include/net/neighbour.h
  10065. --- linux-3.18.10.orig/include/net/neighbour.h 2015-03-24 02:05:12.000000000 +0100
  10066. +++ linux-3.18.10/include/net/neighbour.h 2015-03-26 12:42:18.671588332 +0100
  10067. @@ -387,7 +387,7 @@
  10068. }
  10069. #endif
  10070. -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
  10071. +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
  10072. {
  10073. unsigned int seq;
  10074. int hh_len;
  10075. @@ -442,7 +442,7 @@
  10076. #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
  10077. -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
  10078. +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n,
  10079. const struct net_device *dev)
  10080. {
  10081. unsigned int seq;
  10082. diff -Nur linux-3.18.10.orig/include/net/netns/ipv4.h linux-3.18.10/include/net/netns/ipv4.h
  10083. --- linux-3.18.10.orig/include/net/netns/ipv4.h 2015-03-24 02:05:12.000000000 +0100
  10084. +++ linux-3.18.10/include/net/netns/ipv4.h 2015-03-26 12:42:18.671588332 +0100
  10085. @@ -67,6 +67,7 @@
  10086. int sysctl_icmp_echo_ignore_all;
  10087. int sysctl_icmp_echo_ignore_broadcasts;
  10088. + int sysctl_icmp_echo_sysrq;
  10089. int sysctl_icmp_ignore_bogus_error_responses;
  10090. int sysctl_icmp_ratelimit;
  10091. int sysctl_icmp_ratemask;
  10092. diff -Nur linux-3.18.10.orig/include/trace/events/hist.h linux-3.18.10/include/trace/events/hist.h
  10093. --- linux-3.18.10.orig/include/trace/events/hist.h 1970-01-01 01:00:00.000000000 +0100
  10094. +++ linux-3.18.10/include/trace/events/hist.h 2015-03-26 12:42:18.671588332 +0100
  10095. @@ -0,0 +1,72 @@
  10096. +#undef TRACE_SYSTEM
  10097. +#define TRACE_SYSTEM hist
  10098. +
  10099. +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ)
  10100. +#define _TRACE_HIST_H
  10101. +
  10102. +#include "latency_hist.h"
  10103. +#include <linux/tracepoint.h>
  10104. +
  10105. +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST)
  10106. +#define trace_preemptirqsoff_hist(a, b)
  10107. +#else
  10108. +TRACE_EVENT(preemptirqsoff_hist,
  10109. +
  10110. + TP_PROTO(int reason, int starthist),
  10111. +
  10112. + TP_ARGS(reason, starthist),
  10113. +
  10114. + TP_STRUCT__entry(
  10115. + __field(int, reason)
  10116. + __field(int, starthist)
  10117. + ),
  10118. +
  10119. + TP_fast_assign(
  10120. + __entry->reason = reason;
  10121. + __entry->starthist = starthist;
  10122. + ),
  10123. +
  10124. + TP_printk("reason=%s starthist=%s", getaction(__entry->reason),
  10125. + __entry->starthist ? "start" : "stop")
  10126. +);
  10127. +#endif
  10128. +
  10129. +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST
  10130. +#define trace_hrtimer_interrupt(a, b, c, d)
  10131. +#else
  10132. +TRACE_EVENT(hrtimer_interrupt,
  10133. +
  10134. + TP_PROTO(int cpu, long long offset, struct task_struct *curr,
  10135. + struct task_struct *task),
  10136. +
  10137. + TP_ARGS(cpu, offset, curr, task),
  10138. +
  10139. + TP_STRUCT__entry(
  10140. + __field(int, cpu)
  10141. + __field(long long, offset)
  10142. + __array(char, ccomm, TASK_COMM_LEN)
  10143. + __field(int, cprio)
  10144. + __array(char, tcomm, TASK_COMM_LEN)
  10145. + __field(int, tprio)
  10146. + ),
  10147. +
  10148. + TP_fast_assign(
  10149. + __entry->cpu = cpu;
  10150. + __entry->offset = offset;
  10151. + memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN);
  10152. + __entry->cprio = curr->prio;
  10153. + memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>",
  10154. + task != NULL ? TASK_COMM_LEN : 7);
  10155. + __entry->tprio = task != NULL ? task->prio : -1;
  10156. + ),
  10157. +
  10158. + TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]",
  10159. + __entry->cpu, __entry->offset, __entry->ccomm,
  10160. + __entry->cprio, __entry->tcomm, __entry->tprio)
  10161. +);
  10162. +#endif
  10163. +
  10164. +#endif /* _TRACE_HIST_H */
  10165. +
  10166. +/* This part must be outside protection */
  10167. +#include <trace/define_trace.h>
  10168. diff -Nur linux-3.18.10.orig/include/trace/events/latency_hist.h linux-3.18.10/include/trace/events/latency_hist.h
  10169. --- linux-3.18.10.orig/include/trace/events/latency_hist.h 1970-01-01 01:00:00.000000000 +0100
  10170. +++ linux-3.18.10/include/trace/events/latency_hist.h 2015-03-26 12:42:18.671588332 +0100
  10171. @@ -0,0 +1,29 @@
  10172. +#ifndef _LATENCY_HIST_H
  10173. +#define _LATENCY_HIST_H
  10174. +
  10175. +enum hist_action {
  10176. + IRQS_ON,
  10177. + PREEMPT_ON,
  10178. + TRACE_STOP,
  10179. + IRQS_OFF,
  10180. + PREEMPT_OFF,
  10181. + TRACE_START,
  10182. +};
  10183. +
  10184. +static char *actions[] = {
  10185. + "IRQS_ON",
  10186. + "PREEMPT_ON",
  10187. + "TRACE_STOP",
  10188. + "IRQS_OFF",
  10189. + "PREEMPT_OFF",
  10190. + "TRACE_START",
  10191. +};
  10192. +
  10193. +static inline char *getaction(int action)
  10194. +{
  10195. + if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0]))
  10196. + return actions[action];
  10197. + return "unknown";
  10198. +}
  10199. +
  10200. +#endif /* _LATENCY_HIST_H */
  10201. diff -Nur linux-3.18.10.orig/init/Kconfig linux-3.18.10/init/Kconfig
  10202. --- linux-3.18.10.orig/init/Kconfig 2015-03-24 02:05:12.000000000 +0100
  10203. +++ linux-3.18.10/init/Kconfig 2015-03-26 12:42:18.671588332 +0100
  10204. @@ -635,7 +635,7 @@
  10205. config RCU_FAST_NO_HZ
  10206. bool "Accelerate last non-dyntick-idle CPU's grace periods"
  10207. - depends on NO_HZ_COMMON && SMP
  10208. + depends on NO_HZ_COMMON && SMP && !PREEMPT_RT_FULL
  10209. default n
  10210. help
  10211. This option permits CPUs to enter dynticks-idle state even if
  10212. @@ -662,7 +662,7 @@
  10213. config RCU_BOOST
  10214. bool "Enable RCU priority boosting"
  10215. depends on RT_MUTEXES && PREEMPT_RCU
  10216. - default n
  10217. + default y if PREEMPT_RT_FULL
  10218. help
  10219. This option boosts the priority of preempted RCU readers that
  10220. block the current preemptible RCU grace period for too long.
  10221. @@ -1106,6 +1106,7 @@
  10222. config RT_GROUP_SCHED
  10223. bool "Group scheduling for SCHED_RR/FIFO"
  10224. depends on CGROUP_SCHED
  10225. + depends on !PREEMPT_RT_FULL
  10226. default n
  10227. help
  10228. This feature lets you explicitly allocate real CPU bandwidth
  10229. @@ -1677,6 +1678,7 @@
  10230. config SLAB
  10231. bool "SLAB"
  10232. + depends on !PREEMPT_RT_FULL
  10233. help
  10234. The regular slab allocator that is established and known to work
  10235. well in all environments. It organizes cache hot objects in
  10236. @@ -1695,6 +1697,7 @@
  10237. config SLOB
  10238. depends on EXPERT
  10239. bool "SLOB (Simple Allocator)"
  10240. + depends on !PREEMPT_RT_FULL
  10241. help
  10242. SLOB replaces the stock allocator with a drastically simpler
  10243. allocator. SLOB is generally more space efficient but
  10244. diff -Nur linux-3.18.10.orig/init/main.c linux-3.18.10/init/main.c
  10245. --- linux-3.18.10.orig/init/main.c 2015-03-24 02:05:12.000000000 +0100
  10246. +++ linux-3.18.10/init/main.c 2015-03-26 12:42:18.671588332 +0100
  10247. @@ -533,6 +533,7 @@
  10248. setup_command_line(command_line);
  10249. setup_nr_cpu_ids();
  10250. setup_per_cpu_areas();
  10251. + softirq_early_init();
  10252. smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
  10253. build_all_zonelists(NULL, NULL);
  10254. diff -Nur linux-3.18.10.orig/init/Makefile linux-3.18.10/init/Makefile
  10255. --- linux-3.18.10.orig/init/Makefile 2015-03-24 02:05:12.000000000 +0100
  10256. +++ linux-3.18.10/init/Makefile 2015-03-26 12:42:18.671588332 +0100
  10257. @@ -33,4 +33,4 @@
  10258. include/generated/compile.h: FORCE
  10259. @$($(quiet)chk_compile.h)
  10260. $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
  10261. - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
  10262. + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)"
  10263. diff -Nur linux-3.18.10.orig/ipc/mqueue.c linux-3.18.10/ipc/mqueue.c
  10264. --- linux-3.18.10.orig/ipc/mqueue.c 2015-03-24 02:05:12.000000000 +0100
  10265. +++ linux-3.18.10/ipc/mqueue.c 2015-03-26 12:42:18.671588332 +0100
  10266. @@ -923,12 +923,17 @@
  10267. struct msg_msg *message,
  10268. struct ext_wait_queue *receiver)
  10269. {
  10270. + /*
  10271. + * Keep them in one critical section for PREEMPT_RT:
  10272. + */
  10273. + preempt_disable_rt();
  10274. receiver->msg = message;
  10275. list_del(&receiver->list);
  10276. receiver->state = STATE_PENDING;
  10277. wake_up_process(receiver->task);
  10278. smp_wmb();
  10279. receiver->state = STATE_READY;
  10280. + preempt_enable_rt();
  10281. }
  10282. /* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
  10283. @@ -942,13 +947,18 @@
  10284. wake_up_interruptible(&info->wait_q);
  10285. return;
  10286. }
  10287. - if (msg_insert(sender->msg, info))
  10288. - return;
  10289. - list_del(&sender->list);
  10290. - sender->state = STATE_PENDING;
  10291. - wake_up_process(sender->task);
  10292. - smp_wmb();
  10293. - sender->state = STATE_READY;
  10294. + /*
  10295. + * Keep them in one critical section for PREEMPT_RT:
  10296. + */
  10297. + preempt_disable_rt();
  10298. + if (!msg_insert(sender->msg, info)) {
  10299. + list_del(&sender->list);
  10300. + sender->state = STATE_PENDING;
  10301. + wake_up_process(sender->task);
  10302. + smp_wmb();
  10303. + sender->state = STATE_READY;
  10304. + }
  10305. + preempt_enable_rt();
  10306. }
  10307. SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
  10308. diff -Nur linux-3.18.10.orig/ipc/msg.c linux-3.18.10/ipc/msg.c
  10309. --- linux-3.18.10.orig/ipc/msg.c 2015-03-24 02:05:12.000000000 +0100
  10310. +++ linux-3.18.10/ipc/msg.c 2015-03-26 12:42:18.671588332 +0100
  10311. @@ -188,6 +188,12 @@
  10312. struct msg_receiver *msr, *t;
  10313. list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
  10314. + /*
  10315. + * Make sure that the wakeup doesnt preempt
  10316. + * this CPU prematurely. (on PREEMPT_RT)
  10317. + */
  10318. + preempt_disable_rt();
  10319. +
  10320. msr->r_msg = NULL; /* initialize expunge ordering */
  10321. wake_up_process(msr->r_tsk);
  10322. /*
  10323. @@ -198,6 +204,8 @@
  10324. */
  10325. smp_mb();
  10326. msr->r_msg = ERR_PTR(res);
  10327. +
  10328. + preempt_enable_rt();
  10329. }
  10330. }
  10331. @@ -574,6 +582,11 @@
  10332. if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
  10333. !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
  10334. msr->r_msgtype, msr->r_mode)) {
  10335. + /*
  10336. + * Make sure that the wakeup doesnt preempt
  10337. + * this CPU prematurely. (on PREEMPT_RT)
  10338. + */
  10339. + preempt_disable_rt();
  10340. list_del(&msr->r_list);
  10341. if (msr->r_maxsize < msg->m_ts) {
  10342. @@ -595,12 +608,13 @@
  10343. */
  10344. smp_mb();
  10345. msr->r_msg = msg;
  10346. + preempt_enable_rt();
  10347. return 1;
  10348. }
  10349. + preempt_enable_rt();
  10350. }
  10351. }
  10352. -
  10353. return 0;
  10354. }
  10355. diff -Nur linux-3.18.10.orig/ipc/sem.c linux-3.18.10/ipc/sem.c
  10356. --- linux-3.18.10.orig/ipc/sem.c 2015-03-24 02:05:12.000000000 +0100
  10357. +++ linux-3.18.10/ipc/sem.c 2015-03-26 12:42:18.671588332 +0100
  10358. @@ -673,6 +673,13 @@
  10359. static void wake_up_sem_queue_prepare(struct list_head *pt,
  10360. struct sem_queue *q, int error)
  10361. {
  10362. +#ifdef CONFIG_PREEMPT_RT_BASE
  10363. + struct task_struct *p = q->sleeper;
  10364. + get_task_struct(p);
  10365. + q->status = error;
  10366. + wake_up_process(p);
  10367. + put_task_struct(p);
  10368. +#else
  10369. if (list_empty(pt)) {
  10370. /*
  10371. * Hold preempt off so that we don't get preempted and have the
  10372. @@ -684,6 +691,7 @@
  10373. q->pid = error;
  10374. list_add_tail(&q->list, pt);
  10375. +#endif
  10376. }
  10377. /**
  10378. @@ -697,6 +705,7 @@
  10379. */
  10380. static void wake_up_sem_queue_do(struct list_head *pt)
  10381. {
  10382. +#ifndef CONFIG_PREEMPT_RT_BASE
  10383. struct sem_queue *q, *t;
  10384. int did_something;
  10385. @@ -709,6 +718,7 @@
  10386. }
  10387. if (did_something)
  10388. preempt_enable();
  10389. +#endif
  10390. }
  10391. static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
  10392. diff -Nur linux-3.18.10.orig/kernel/cgroup.c linux-3.18.10/kernel/cgroup.c
  10393. --- linux-3.18.10.orig/kernel/cgroup.c 2015-03-24 02:05:12.000000000 +0100
  10394. +++ linux-3.18.10/kernel/cgroup.c 2015-03-26 12:42:18.671588332 +0100
  10395. @@ -4355,10 +4355,10 @@
  10396. queue_work(cgroup_destroy_wq, &css->destroy_work);
  10397. }
  10398. -static void css_release_work_fn(struct work_struct *work)
  10399. +static void css_release_work_fn(struct swork_event *sev)
  10400. {
  10401. struct cgroup_subsys_state *css =
  10402. - container_of(work, struct cgroup_subsys_state, destroy_work);
  10403. + container_of(sev, struct cgroup_subsys_state, destroy_swork);
  10404. struct cgroup_subsys *ss = css->ss;
  10405. struct cgroup *cgrp = css->cgroup;
  10406. @@ -4395,8 +4395,8 @@
  10407. struct cgroup_subsys_state *css =
  10408. container_of(ref, struct cgroup_subsys_state, refcnt);
  10409. - INIT_WORK(&css->destroy_work, css_release_work_fn);
  10410. - queue_work(cgroup_destroy_wq, &css->destroy_work);
  10411. + INIT_SWORK(&css->destroy_swork, css_release_work_fn);
  10412. + swork_queue(&css->destroy_swork);
  10413. }
  10414. static void init_and_link_css(struct cgroup_subsys_state *css,
  10415. @@ -4997,6 +4997,7 @@
  10416. */
  10417. cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
  10418. BUG_ON(!cgroup_destroy_wq);
  10419. + BUG_ON(swork_get());
  10420. /*
  10421. * Used to destroy pidlists and separate to serve as flush domain.
  10422. diff -Nur linux-3.18.10.orig/kernel/cpu.c linux-3.18.10/kernel/cpu.c
  10423. --- linux-3.18.10.orig/kernel/cpu.c 2015-03-24 02:05:12.000000000 +0100
  10424. +++ linux-3.18.10/kernel/cpu.c 2015-03-26 12:42:18.671588332 +0100
  10425. @@ -86,6 +86,290 @@
  10426. #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
  10427. #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
  10428. +/**
  10429. + * hotplug_pcp - per cpu hotplug descriptor
  10430. + * @unplug: set when pin_current_cpu() needs to sync tasks
  10431. + * @sync_tsk: the task that waits for tasks to finish pinned sections
  10432. + * @refcount: counter of tasks in pinned sections
  10433. + * @grab_lock: set when the tasks entering pinned sections should wait
  10434. + * @synced: notifier for @sync_tsk to tell cpu_down it's finished
  10435. + * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
  10436. + * @mutex_init: zero if the mutex hasn't been initialized yet.
  10437. + *
  10438. + * Although @unplug and @sync_tsk may point to the same task, the @unplug
  10439. + * is used as a flag and still exists after @sync_tsk has exited and
  10440. + * @sync_tsk set to NULL.
  10441. + */
  10442. +struct hotplug_pcp {
  10443. + struct task_struct *unplug;
  10444. + struct task_struct *sync_tsk;
  10445. + int refcount;
  10446. + int grab_lock;
  10447. + struct completion synced;
  10448. + struct completion unplug_wait;
  10449. +#ifdef CONFIG_PREEMPT_RT_FULL
  10450. + /*
  10451. + * Note, on PREEMPT_RT, the hotplug lock must save the state of
  10452. + * the task, otherwise the mutex will cause the task to fail
  10453. + * to sleep when required. (Because it's called from migrate_disable())
  10454. + *
  10455. + * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
  10456. + * state.
  10457. + */
  10458. + spinlock_t lock;
  10459. +#else
  10460. + struct mutex mutex;
  10461. +#endif
  10462. + int mutex_init;
  10463. +};
  10464. +
  10465. +#ifdef CONFIG_PREEMPT_RT_FULL
  10466. +# define hotplug_lock(hp) rt_spin_lock(&(hp)->lock)
  10467. +# define hotplug_unlock(hp) rt_spin_unlock(&(hp)->lock)
  10468. +#else
  10469. +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
  10470. +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
  10471. +#endif
  10472. +
  10473. +static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
  10474. +
  10475. +/**
  10476. + * pin_current_cpu - Prevent the current cpu from being unplugged
  10477. + *
  10478. + * Lightweight version of get_online_cpus() to prevent cpu from being
  10479. + * unplugged when code runs in a migration disabled region.
  10480. + *
  10481. + * Must be called with preemption disabled (preempt_count = 1)!
  10482. + */
  10483. +void pin_current_cpu(void)
  10484. +{
  10485. + struct hotplug_pcp *hp;
  10486. + int force = 0;
  10487. +
  10488. +retry:
  10489. + hp = &__get_cpu_var(hotplug_pcp);
  10490. +
  10491. + if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
  10492. + hp->unplug == current) {
  10493. + hp->refcount++;
  10494. + return;
  10495. + }
  10496. + if (hp->grab_lock) {
  10497. + preempt_enable();
  10498. + hotplug_lock(hp);
  10499. + hotplug_unlock(hp);
  10500. + } else {
  10501. + preempt_enable();
  10502. + /*
  10503. + * Try to push this task off of this CPU.
  10504. + */
  10505. + if (!migrate_me()) {
  10506. + preempt_disable();
  10507. + hp = &__get_cpu_var(hotplug_pcp);
  10508. + if (!hp->grab_lock) {
  10509. + /*
  10510. + * Just let it continue it's already pinned
  10511. + * or about to sleep.
  10512. + */
  10513. + force = 1;
  10514. + goto retry;
  10515. + }
  10516. + preempt_enable();
  10517. + }
  10518. + }
  10519. + preempt_disable();
  10520. + goto retry;
  10521. +}
  10522. +
  10523. +/**
  10524. + * unpin_current_cpu - Allow unplug of current cpu
  10525. + *
  10526. + * Must be called with preemption or interrupts disabled!
  10527. + */
  10528. +void unpin_current_cpu(void)
  10529. +{
  10530. + struct hotplug_pcp *hp = &__get_cpu_var(hotplug_pcp);
  10531. +
  10532. + WARN_ON(hp->refcount <= 0);
  10533. +
  10534. + /* This is safe. sync_unplug_thread is pinned to this cpu */
  10535. + if (!--hp->refcount && hp->unplug && hp->unplug != current)
  10536. + wake_up_process(hp->unplug);
  10537. +}
  10538. +
  10539. +static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
  10540. +{
  10541. + set_current_state(TASK_UNINTERRUPTIBLE);
  10542. + while (hp->refcount) {
  10543. + schedule_preempt_disabled();
  10544. + set_current_state(TASK_UNINTERRUPTIBLE);
  10545. + }
  10546. +}
  10547. +
  10548. +static int sync_unplug_thread(void *data)
  10549. +{
  10550. + struct hotplug_pcp *hp = data;
  10551. +
  10552. + wait_for_completion(&hp->unplug_wait);
  10553. + preempt_disable();
  10554. + hp->unplug = current;
  10555. + wait_for_pinned_cpus(hp);
  10556. +
  10557. + /*
  10558. + * This thread will synchronize the cpu_down() with threads
  10559. + * that have pinned the CPU. When the pinned CPU count reaches
  10560. + * zero, we inform the cpu_down code to continue to the next step.
  10561. + */
  10562. + set_current_state(TASK_UNINTERRUPTIBLE);
  10563. + preempt_enable();
  10564. + complete(&hp->synced);
  10565. +
  10566. + /*
  10567. + * If all succeeds, the next step will need tasks to wait till
  10568. + * the CPU is offline before continuing. To do this, the grab_lock
  10569. + * is set and tasks going into pin_current_cpu() will block on the
  10570. + * mutex. But we still need to wait for those that are already in
  10571. + * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
  10572. + * will kick this thread out.
  10573. + */
  10574. + while (!hp->grab_lock && !kthread_should_stop()) {
  10575. + schedule();
  10576. + set_current_state(TASK_UNINTERRUPTIBLE);
  10577. + }
  10578. +
  10579. + /* Make sure grab_lock is seen before we see a stale completion */
  10580. + smp_mb();
  10581. +
  10582. + /*
  10583. + * Now just before cpu_down() enters stop machine, we need to make
  10584. + * sure all tasks that are in pinned CPU sections are out, and new
  10585. + * tasks will now grab the lock, keeping them from entering pinned
  10586. + * CPU sections.
  10587. + */
  10588. + if (!kthread_should_stop()) {
  10589. + preempt_disable();
  10590. + wait_for_pinned_cpus(hp);
  10591. + preempt_enable();
  10592. + complete(&hp->synced);
  10593. + }
  10594. +
  10595. + set_current_state(TASK_UNINTERRUPTIBLE);
  10596. + while (!kthread_should_stop()) {
  10597. + schedule();
  10598. + set_current_state(TASK_UNINTERRUPTIBLE);
  10599. + }
  10600. + set_current_state(TASK_RUNNING);
  10601. +
  10602. + /*
  10603. + * Force this thread off this CPU as it's going down and
  10604. + * we don't want any more work on this CPU.
  10605. + */
  10606. + current->flags &= ~PF_NO_SETAFFINITY;
  10607. + do_set_cpus_allowed(current, cpu_present_mask);
  10608. + migrate_me();
  10609. + return 0;
  10610. +}
  10611. +
  10612. +static void __cpu_unplug_sync(struct hotplug_pcp *hp)
  10613. +{
  10614. + wake_up_process(hp->sync_tsk);
  10615. + wait_for_completion(&hp->synced);
  10616. +}
  10617. +
  10618. +static void __cpu_unplug_wait(unsigned int cpu)
  10619. +{
  10620. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  10621. +
  10622. + complete(&hp->unplug_wait);
  10623. + wait_for_completion(&hp->synced);
  10624. +}
  10625. +
  10626. +/*
  10627. + * Start the sync_unplug_thread on the target cpu and wait for it to
  10628. + * complete.
  10629. + */
  10630. +static int cpu_unplug_begin(unsigned int cpu)
  10631. +{
  10632. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  10633. + int err;
  10634. +
  10635. + /* Protected by cpu_hotplug.lock */
  10636. + if (!hp->mutex_init) {
  10637. +#ifdef CONFIG_PREEMPT_RT_FULL
  10638. + spin_lock_init(&hp->lock);
  10639. +#else
  10640. + mutex_init(&hp->mutex);
  10641. +#endif
  10642. + hp->mutex_init = 1;
  10643. + }
  10644. +
  10645. + /* Inform the scheduler to migrate tasks off this CPU */
  10646. + tell_sched_cpu_down_begin(cpu);
  10647. +
  10648. + init_completion(&hp->synced);
  10649. + init_completion(&hp->unplug_wait);
  10650. +
  10651. + hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
  10652. + if (IS_ERR(hp->sync_tsk)) {
  10653. + err = PTR_ERR(hp->sync_tsk);
  10654. + hp->sync_tsk = NULL;
  10655. + return err;
  10656. + }
  10657. + kthread_bind(hp->sync_tsk, cpu);
  10658. +
  10659. + /*
  10660. + * Wait for tasks to get out of the pinned sections,
  10661. + * it's still OK if new tasks enter. Some CPU notifiers will
  10662. + * wait for tasks that are going to enter these sections and
  10663. + * we must not have them block.
  10664. + */
  10665. + wake_up_process(hp->sync_tsk);
  10666. + return 0;
  10667. +}
  10668. +
  10669. +static void cpu_unplug_sync(unsigned int cpu)
  10670. +{
  10671. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  10672. +
  10673. + init_completion(&hp->synced);
  10674. + /* The completion needs to be initialzied before setting grab_lock */
  10675. + smp_wmb();
  10676. +
  10677. + /* Grab the mutex before setting grab_lock */
  10678. + hotplug_lock(hp);
  10679. + hp->grab_lock = 1;
  10680. +
  10681. + /*
  10682. + * The CPU notifiers have been completed.
  10683. + * Wait for tasks to get out of pinned CPU sections and have new
  10684. + * tasks block until the CPU is completely down.
  10685. + */
  10686. + __cpu_unplug_sync(hp);
  10687. +
  10688. + /* All done with the sync thread */
  10689. + kthread_stop(hp->sync_tsk);
  10690. + hp->sync_tsk = NULL;
  10691. +}
  10692. +
  10693. +static void cpu_unplug_done(unsigned int cpu)
  10694. +{
  10695. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  10696. +
  10697. + hp->unplug = NULL;
  10698. + /* Let all tasks know cpu unplug is finished before cleaning up */
  10699. + smp_wmb();
  10700. +
  10701. + if (hp->sync_tsk)
  10702. + kthread_stop(hp->sync_tsk);
  10703. +
  10704. + if (hp->grab_lock) {
  10705. + hotplug_unlock(hp);
  10706. + /* protected by cpu_hotplug.lock */
  10707. + hp->grab_lock = 0;
  10708. + }
  10709. + tell_sched_cpu_down_done(cpu);
  10710. +}
  10711. +
  10712. void get_online_cpus(void)
  10713. {
  10714. might_sleep();
  10715. @@ -102,6 +386,7 @@
  10716. {
  10717. if (cpu_hotplug.active_writer == current)
  10718. return true;
  10719. +
  10720. if (!mutex_trylock(&cpu_hotplug.lock))
  10721. return false;
  10722. cpuhp_lock_acquire_tryread();
  10723. @@ -349,13 +634,15 @@
  10724. /* Requires cpu_add_remove_lock to be held */
  10725. static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
  10726. {
  10727. - int err, nr_calls = 0;
  10728. + int mycpu, err, nr_calls = 0;
  10729. void *hcpu = (void *)(long)cpu;
  10730. unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
  10731. struct take_cpu_down_param tcd_param = {
  10732. .mod = mod,
  10733. .hcpu = hcpu,
  10734. };
  10735. + cpumask_var_t cpumask;
  10736. + cpumask_var_t cpumask_org;
  10737. if (num_online_cpus() == 1)
  10738. return -EBUSY;
  10739. @@ -363,7 +650,34 @@
  10740. if (!cpu_online(cpu))
  10741. return -EINVAL;
  10742. + /* Move the downtaker off the unplug cpu */
  10743. + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
  10744. + return -ENOMEM;
  10745. + if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) {
  10746. + free_cpumask_var(cpumask);
  10747. + return -ENOMEM;
  10748. + }
  10749. +
  10750. + cpumask_copy(cpumask_org, tsk_cpus_allowed(current));
  10751. + cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
  10752. + set_cpus_allowed_ptr(current, cpumask);
  10753. + free_cpumask_var(cpumask);
  10754. + migrate_disable();
  10755. + mycpu = smp_processor_id();
  10756. + if (mycpu == cpu) {
  10757. + printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
  10758. + migrate_enable();
  10759. + err = -EBUSY;
  10760. + goto restore_cpus;
  10761. + }
  10762. + migrate_enable();
  10763. +
  10764. cpu_hotplug_begin();
  10765. + err = cpu_unplug_begin(cpu);
  10766. + if (err) {
  10767. + printk("cpu_unplug_begin(%d) failed\n", cpu);
  10768. + goto out_cancel;
  10769. + }
  10770. err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
  10771. if (err) {
  10772. @@ -389,8 +703,12 @@
  10773. #endif
  10774. synchronize_rcu();
  10775. + __cpu_unplug_wait(cpu);
  10776. smpboot_park_threads(cpu);
  10777. + /* Notifiers are done. Don't let any more tasks pin this CPU. */
  10778. + cpu_unplug_sync(cpu);
  10779. +
  10780. /*
  10781. * So now all preempt/rcu users must observe !cpu_active().
  10782. */
  10783. @@ -423,9 +741,14 @@
  10784. check_for_tasks(cpu);
  10785. out_release:
  10786. + cpu_unplug_done(cpu);
  10787. +out_cancel:
  10788. cpu_hotplug_done();
  10789. if (!err)
  10790. cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
  10791. +restore_cpus:
  10792. + set_cpus_allowed_ptr(current, cpumask_org);
  10793. + free_cpumask_var(cpumask_org);
  10794. return err;
  10795. }
  10796. diff -Nur linux-3.18.10.orig/kernel/debug/kdb/kdb_io.c linux-3.18.10/kernel/debug/kdb/kdb_io.c
  10797. --- linux-3.18.10.orig/kernel/debug/kdb/kdb_io.c 2015-03-24 02:05:12.000000000 +0100
  10798. +++ linux-3.18.10/kernel/debug/kdb/kdb_io.c 2015-03-26 12:42:18.671588332 +0100
  10799. @@ -554,7 +554,6 @@
  10800. int linecount;
  10801. int colcount;
  10802. int logging, saved_loglevel = 0;
  10803. - int saved_trap_printk;
  10804. int got_printf_lock = 0;
  10805. int retlen = 0;
  10806. int fnd, len;
  10807. @@ -565,8 +564,6 @@
  10808. unsigned long uninitialized_var(flags);
  10809. preempt_disable();
  10810. - saved_trap_printk = kdb_trap_printk;
  10811. - kdb_trap_printk = 0;
  10812. /* Serialize kdb_printf if multiple cpus try to write at once.
  10813. * But if any cpu goes recursive in kdb, just print the output,
  10814. @@ -833,7 +830,6 @@
  10815. } else {
  10816. __release(kdb_printf_lock);
  10817. }
  10818. - kdb_trap_printk = saved_trap_printk;
  10819. preempt_enable();
  10820. return retlen;
  10821. }
  10822. @@ -843,9 +839,11 @@
  10823. va_list ap;
  10824. int r;
  10825. + kdb_trap_printk++;
  10826. va_start(ap, fmt);
  10827. r = vkdb_printf(fmt, ap);
  10828. va_end(ap);
  10829. + kdb_trap_printk--;
  10830. return r;
  10831. }
  10832. diff -Nur linux-3.18.10.orig/kernel/events/core.c linux-3.18.10/kernel/events/core.c
  10833. --- linux-3.18.10.orig/kernel/events/core.c 2015-03-24 02:05:12.000000000 +0100
  10834. +++ linux-3.18.10/kernel/events/core.c 2015-03-26 12:42:18.675588336 +0100
  10835. @@ -6336,6 +6336,7 @@
  10836. hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  10837. hwc->hrtimer.function = perf_swevent_hrtimer;
  10838. + hwc->hrtimer.irqsafe = 1;
  10839. /*
  10840. * Since hrtimers have a fixed rate, we can do a static freq->period
  10841. diff -Nur linux-3.18.10.orig/kernel/exit.c linux-3.18.10/kernel/exit.c
  10842. --- linux-3.18.10.orig/kernel/exit.c 2015-03-24 02:05:12.000000000 +0100
  10843. +++ linux-3.18.10/kernel/exit.c 2015-03-26 12:42:18.675588336 +0100
  10844. @@ -147,7 +147,7 @@
  10845. * Do this under ->siglock, we can race with another thread
  10846. * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
  10847. */
  10848. - flush_sigqueue(&tsk->pending);
  10849. + flush_task_sigqueue(tsk);
  10850. tsk->sighand = NULL;
  10851. spin_unlock(&sighand->siglock);
  10852. diff -Nur linux-3.18.10.orig/kernel/fork.c linux-3.18.10/kernel/fork.c
  10853. --- linux-3.18.10.orig/kernel/fork.c 2015-03-24 02:05:12.000000000 +0100
  10854. +++ linux-3.18.10/kernel/fork.c 2015-03-26 12:42:18.675588336 +0100
  10855. @@ -97,7 +97,7 @@
  10856. DEFINE_PER_CPU(unsigned long, process_counts) = 0;
  10857. -__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
  10858. +DEFINE_RWLOCK(tasklist_lock); /* outer */
  10859. #ifdef CONFIG_PROVE_RCU
  10860. int lockdep_tasklist_lock_is_held(void)
  10861. @@ -233,7 +233,9 @@
  10862. if (atomic_dec_and_test(&sig->sigcnt))
  10863. free_signal_struct(sig);
  10864. }
  10865. -
  10866. +#ifdef CONFIG_PREEMPT_RT_BASE
  10867. +static
  10868. +#endif
  10869. void __put_task_struct(struct task_struct *tsk)
  10870. {
  10871. WARN_ON(!tsk->exit_state);
  10872. @@ -249,7 +251,18 @@
  10873. if (!profile_handoff_task(tsk))
  10874. free_task(tsk);
  10875. }
  10876. +#ifndef CONFIG_PREEMPT_RT_BASE
  10877. EXPORT_SYMBOL_GPL(__put_task_struct);
  10878. +#else
  10879. +void __put_task_struct_cb(struct rcu_head *rhp)
  10880. +{
  10881. + struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu);
  10882. +
  10883. + __put_task_struct(tsk);
  10884. +
  10885. +}
  10886. +EXPORT_SYMBOL_GPL(__put_task_struct_cb);
  10887. +#endif
  10888. void __init __weak arch_task_cache_init(void) { }
  10889. @@ -643,6 +656,19 @@
  10890. }
  10891. EXPORT_SYMBOL_GPL(__mmdrop);
  10892. +#ifdef CONFIG_PREEMPT_RT_BASE
  10893. +/*
  10894. + * RCU callback for delayed mm drop. Not strictly rcu, but we don't
  10895. + * want another facility to make this work.
  10896. + */
  10897. +void __mmdrop_delayed(struct rcu_head *rhp)
  10898. +{
  10899. + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
  10900. +
  10901. + __mmdrop(mm);
  10902. +}
  10903. +#endif
  10904. +
  10905. /*
  10906. * Decrement the use count and release all resources for an mm.
  10907. */
  10908. @@ -1157,6 +1183,9 @@
  10909. */
  10910. static void posix_cpu_timers_init(struct task_struct *tsk)
  10911. {
  10912. +#ifdef CONFIG_PREEMPT_RT_BASE
  10913. + tsk->posix_timer_list = NULL;
  10914. +#endif
  10915. tsk->cputime_expires.prof_exp = 0;
  10916. tsk->cputime_expires.virt_exp = 0;
  10917. tsk->cputime_expires.sched_exp = 0;
  10918. @@ -1284,6 +1313,7 @@
  10919. spin_lock_init(&p->alloc_lock);
  10920. init_sigpending(&p->pending);
  10921. + p->sigqueue_cache = NULL;
  10922. p->utime = p->stime = p->gtime = 0;
  10923. p->utimescaled = p->stimescaled = 0;
  10924. @@ -1291,7 +1321,8 @@
  10925. p->prev_cputime.utime = p->prev_cputime.stime = 0;
  10926. #endif
  10927. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  10928. - seqlock_init(&p->vtime_seqlock);
  10929. + raw_spin_lock_init(&p->vtime_lock);
  10930. + seqcount_init(&p->vtime_seq);
  10931. p->vtime_snap = 0;
  10932. p->vtime_snap_whence = VTIME_SLEEPING;
  10933. #endif
  10934. @@ -1342,6 +1373,9 @@
  10935. p->hardirq_context = 0;
  10936. p->softirq_context = 0;
  10937. #endif
  10938. +#ifdef CONFIG_PREEMPT_RT_FULL
  10939. + p->pagefault_disabled = 0;
  10940. +#endif
  10941. #ifdef CONFIG_LOCKDEP
  10942. p->lockdep_depth = 0; /* no locks held yet */
  10943. p->curr_chain_key = 0;
  10944. diff -Nur linux-3.18.10.orig/kernel/futex.c linux-3.18.10/kernel/futex.c
  10945. --- linux-3.18.10.orig/kernel/futex.c 2015-03-24 02:05:12.000000000 +0100
  10946. +++ linux-3.18.10/kernel/futex.c 2015-03-26 12:42:18.675588336 +0100
  10947. @@ -738,7 +738,9 @@
  10948. * task still owns the PI-state:
  10949. */
  10950. if (head->next != next) {
  10951. + raw_spin_unlock_irq(&curr->pi_lock);
  10952. spin_unlock(&hb->lock);
  10953. + raw_spin_lock_irq(&curr->pi_lock);
  10954. continue;
  10955. }
  10956. @@ -1705,6 +1707,16 @@
  10957. requeue_pi_wake_futex(this, &key2, hb2);
  10958. drop_count++;
  10959. continue;
  10960. + } else if (ret == -EAGAIN) {
  10961. + /*
  10962. + * Waiter was woken by timeout or
  10963. + * signal and has set pi_blocked_on to
  10964. + * PI_WAKEUP_INPROGRESS before we
  10965. + * tried to enqueue it on the rtmutex.
  10966. + */
  10967. + this->pi_state = NULL;
  10968. + free_pi_state(pi_state);
  10969. + continue;
  10970. } else if (ret) {
  10971. /* -EDEADLK */
  10972. this->pi_state = NULL;
  10973. @@ -2549,7 +2561,7 @@
  10974. struct hrtimer_sleeper timeout, *to = NULL;
  10975. struct rt_mutex_waiter rt_waiter;
  10976. struct rt_mutex *pi_mutex = NULL;
  10977. - struct futex_hash_bucket *hb;
  10978. + struct futex_hash_bucket *hb, *hb2;
  10979. union futex_key key2 = FUTEX_KEY_INIT;
  10980. struct futex_q q = futex_q_init;
  10981. int res, ret;
  10982. @@ -2574,10 +2586,7 @@
  10983. * The waiter is allocated on our stack, manipulated by the requeue
  10984. * code while we sleep on uaddr.
  10985. */
  10986. - debug_rt_mutex_init_waiter(&rt_waiter);
  10987. - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
  10988. - RB_CLEAR_NODE(&rt_waiter.tree_entry);
  10989. - rt_waiter.task = NULL;
  10990. + rt_mutex_init_waiter(&rt_waiter, false);
  10991. ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
  10992. if (unlikely(ret != 0))
  10993. @@ -2608,20 +2617,55 @@
  10994. /* Queue the futex_q, drop the hb lock, wait for wakeup. */
  10995. futex_wait_queue_me(hb, &q, to);
  10996. - spin_lock(&hb->lock);
  10997. - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  10998. - spin_unlock(&hb->lock);
  10999. - if (ret)
  11000. - goto out_put_keys;
  11001. + /*
  11002. + * On RT we must avoid races with requeue and trying to block
  11003. + * on two mutexes (hb->lock and uaddr2's rtmutex) by
  11004. + * serializing access to pi_blocked_on with pi_lock.
  11005. + */
  11006. + raw_spin_lock_irq(&current->pi_lock);
  11007. + if (current->pi_blocked_on) {
  11008. + /*
  11009. + * We have been requeued or are in the process of
  11010. + * being requeued.
  11011. + */
  11012. + raw_spin_unlock_irq(&current->pi_lock);
  11013. + } else {
  11014. + /*
  11015. + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS
  11016. + * prevents a concurrent requeue from moving us to the
  11017. + * uaddr2 rtmutex. After that we can safely acquire
  11018. + * (and possibly block on) hb->lock.
  11019. + */
  11020. + current->pi_blocked_on = PI_WAKEUP_INPROGRESS;
  11021. + raw_spin_unlock_irq(&current->pi_lock);
  11022. +
  11023. + spin_lock(&hb->lock);
  11024. +
  11025. + /*
  11026. + * Clean up pi_blocked_on. We might leak it otherwise
  11027. + * when we succeeded with the hb->lock in the fast
  11028. + * path.
  11029. + */
  11030. + raw_spin_lock_irq(&current->pi_lock);
  11031. + current->pi_blocked_on = NULL;
  11032. + raw_spin_unlock_irq(&current->pi_lock);
  11033. +
  11034. + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  11035. + spin_unlock(&hb->lock);
  11036. + if (ret)
  11037. + goto out_put_keys;
  11038. + }
  11039. /*
  11040. - * In order for us to be here, we know our q.key == key2, and since
  11041. - * we took the hb->lock above, we also know that futex_requeue() has
  11042. - * completed and we no longer have to concern ourselves with a wakeup
  11043. - * race with the atomic proxy lock acquisition by the requeue code. The
  11044. - * futex_requeue dropped our key1 reference and incremented our key2
  11045. - * reference count.
  11046. + * In order to be here, we have either been requeued, are in
  11047. + * the process of being requeued, or requeue successfully
  11048. + * acquired uaddr2 on our behalf. If pi_blocked_on was
  11049. + * non-null above, we may be racing with a requeue. Do not
  11050. + * rely on q->lock_ptr to be hb2->lock until after blocking on
  11051. + * hb->lock or hb2->lock. The futex_requeue dropped our key1
  11052. + * reference and incremented our key2 reference count.
  11053. */
  11054. + hb2 = hash_futex(&key2);
  11055. /* Check if the requeue code acquired the second futex for us. */
  11056. if (!q.rt_waiter) {
  11057. @@ -2630,9 +2674,10 @@
  11058. * did a lock-steal - fix up the PI-state in that case.
  11059. */
  11060. if (q.pi_state && (q.pi_state->owner != current)) {
  11061. - spin_lock(q.lock_ptr);
  11062. + spin_lock(&hb2->lock);
  11063. + BUG_ON(&hb2->lock != q.lock_ptr);
  11064. ret = fixup_pi_state_owner(uaddr2, &q, current);
  11065. - spin_unlock(q.lock_ptr);
  11066. + spin_unlock(&hb2->lock);
  11067. }
  11068. } else {
  11069. /*
  11070. @@ -2645,7 +2690,8 @@
  11071. ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
  11072. debug_rt_mutex_free_waiter(&rt_waiter);
  11073. - spin_lock(q.lock_ptr);
  11074. + spin_lock(&hb2->lock);
  11075. + BUG_ON(&hb2->lock != q.lock_ptr);
  11076. /*
  11077. * Fixup the pi_state owner and possibly acquire the lock if we
  11078. * haven't already.
  11079. diff -Nur linux-3.18.10.orig/kernel/irq/handle.c linux-3.18.10/kernel/irq/handle.c
  11080. --- linux-3.18.10.orig/kernel/irq/handle.c 2015-03-24 02:05:12.000000000 +0100
  11081. +++ linux-3.18.10/kernel/irq/handle.c 2015-03-26 12:42:18.675588336 +0100
  11082. @@ -133,6 +133,8 @@
  11083. irqreturn_t
  11084. handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
  11085. {
  11086. + struct pt_regs *regs = get_irq_regs();
  11087. + u64 ip = regs ? instruction_pointer(regs) : 0;
  11088. irqreturn_t retval = IRQ_NONE;
  11089. unsigned int flags = 0, irq = desc->irq_data.irq;
  11090. @@ -173,7 +175,11 @@
  11091. action = action->next;
  11092. } while (action);
  11093. - add_interrupt_randomness(irq, flags);
  11094. +#ifndef CONFIG_PREEMPT_RT_FULL
  11095. + add_interrupt_randomness(irq, flags, ip);
  11096. +#else
  11097. + desc->random_ip = ip;
  11098. +#endif
  11099. if (!noirqdebug)
  11100. note_interrupt(irq, desc, retval);
  11101. diff -Nur linux-3.18.10.orig/kernel/irq/manage.c linux-3.18.10/kernel/irq/manage.c
  11102. --- linux-3.18.10.orig/kernel/irq/manage.c 2015-03-24 02:05:12.000000000 +0100
  11103. +++ linux-3.18.10/kernel/irq/manage.c 2015-03-26 12:42:18.675588336 +0100
  11104. @@ -22,6 +22,7 @@
  11105. #include "internals.h"
  11106. #ifdef CONFIG_IRQ_FORCED_THREADING
  11107. +# ifndef CONFIG_PREEMPT_RT_BASE
  11108. __read_mostly bool force_irqthreads;
  11109. static int __init setup_forced_irqthreads(char *arg)
  11110. @@ -30,6 +31,7 @@
  11111. return 0;
  11112. }
  11113. early_param("threadirqs", setup_forced_irqthreads);
  11114. +# endif
  11115. #endif
  11116. static void __synchronize_hardirq(struct irq_desc *desc)
  11117. @@ -173,6 +175,62 @@
  11118. irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
  11119. #endif
  11120. +#ifdef CONFIG_PREEMPT_RT_FULL
  11121. +static void _irq_affinity_notify(struct irq_affinity_notify *notify);
  11122. +static struct task_struct *set_affinity_helper;
  11123. +static LIST_HEAD(affinity_list);
  11124. +static DEFINE_RAW_SPINLOCK(affinity_list_lock);
  11125. +
  11126. +static int set_affinity_thread(void *unused)
  11127. +{
  11128. + while (1) {
  11129. + struct irq_affinity_notify *notify;
  11130. + int empty;
  11131. +
  11132. + set_current_state(TASK_INTERRUPTIBLE);
  11133. +
  11134. + raw_spin_lock_irq(&affinity_list_lock);
  11135. + empty = list_empty(&affinity_list);
  11136. + raw_spin_unlock_irq(&affinity_list_lock);
  11137. +
  11138. + if (empty)
  11139. + schedule();
  11140. + if (kthread_should_stop())
  11141. + break;
  11142. + set_current_state(TASK_RUNNING);
  11143. +try_next:
  11144. + notify = NULL;
  11145. +
  11146. + raw_spin_lock_irq(&affinity_list_lock);
  11147. + if (!list_empty(&affinity_list)) {
  11148. + notify = list_first_entry(&affinity_list,
  11149. + struct irq_affinity_notify, list);
  11150. + list_del_init(&notify->list);
  11151. + }
  11152. + raw_spin_unlock_irq(&affinity_list_lock);
  11153. +
  11154. + if (!notify)
  11155. + continue;
  11156. + _irq_affinity_notify(notify);
  11157. + goto try_next;
  11158. + }
  11159. + return 0;
  11160. +}
  11161. +
  11162. +static void init_helper_thread(void)
  11163. +{
  11164. + if (set_affinity_helper)
  11165. + return;
  11166. + set_affinity_helper = kthread_run(set_affinity_thread, NULL,
  11167. + "affinity-cb");
  11168. + WARN_ON(IS_ERR(set_affinity_helper));
  11169. +}
  11170. +#else
  11171. +
  11172. +static inline void init_helper_thread(void) { }
  11173. +
  11174. +#endif
  11175. +
  11176. int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
  11177. bool force)
  11178. {
  11179. @@ -211,7 +269,17 @@
  11180. if (desc->affinity_notify) {
  11181. kref_get(&desc->affinity_notify->kref);
  11182. +
  11183. +#ifdef CONFIG_PREEMPT_RT_FULL
  11184. + raw_spin_lock(&affinity_list_lock);
  11185. + if (list_empty(&desc->affinity_notify->list))
  11186. + list_add_tail(&affinity_list,
  11187. + &desc->affinity_notify->list);
  11188. + raw_spin_unlock(&affinity_list_lock);
  11189. + wake_up_process(set_affinity_helper);
  11190. +#else
  11191. schedule_work(&desc->affinity_notify->work);
  11192. +#endif
  11193. }
  11194. irqd_set(data, IRQD_AFFINITY_SET);
  11195. @@ -246,10 +314,8 @@
  11196. }
  11197. EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
  11198. -static void irq_affinity_notify(struct work_struct *work)
  11199. +static void _irq_affinity_notify(struct irq_affinity_notify *notify)
  11200. {
  11201. - struct irq_affinity_notify *notify =
  11202. - container_of(work, struct irq_affinity_notify, work);
  11203. struct irq_desc *desc = irq_to_desc(notify->irq);
  11204. cpumask_var_t cpumask;
  11205. unsigned long flags;
  11206. @@ -271,6 +337,13 @@
  11207. kref_put(&notify->kref, notify->release);
  11208. }
  11209. +static void irq_affinity_notify(struct work_struct *work)
  11210. +{
  11211. + struct irq_affinity_notify *notify =
  11212. + container_of(work, struct irq_affinity_notify, work);
  11213. + _irq_affinity_notify(notify);
  11214. +}
  11215. +
  11216. /**
  11217. * irq_set_affinity_notifier - control notification of IRQ affinity changes
  11218. * @irq: Interrupt for which to enable/disable notification
  11219. @@ -300,6 +373,8 @@
  11220. notify->irq = irq;
  11221. kref_init(&notify->kref);
  11222. INIT_WORK(&notify->work, irq_affinity_notify);
  11223. + INIT_LIST_HEAD(&notify->list);
  11224. + init_helper_thread();
  11225. }
  11226. raw_spin_lock_irqsave(&desc->lock, flags);
  11227. @@ -788,7 +863,15 @@
  11228. local_bh_disable();
  11229. ret = action->thread_fn(action->irq, action->dev_id);
  11230. irq_finalize_oneshot(desc, action);
  11231. - local_bh_enable();
  11232. + /*
  11233. + * Interrupts which have real time requirements can be set up
  11234. + * to avoid softirq processing in the thread handler. This is
  11235. + * safe as these interrupts do not raise soft interrupts.
  11236. + */
  11237. + if (irq_settings_no_softirq_call(desc))
  11238. + _local_bh_enable();
  11239. + else
  11240. + local_bh_enable();
  11241. return ret;
  11242. }
  11243. @@ -871,6 +954,12 @@
  11244. if (action_ret == IRQ_HANDLED)
  11245. atomic_inc(&desc->threads_handled);
  11246. +#ifdef CONFIG_PREEMPT_RT_FULL
  11247. + migrate_disable();
  11248. + add_interrupt_randomness(action->irq, 0,
  11249. + desc->random_ip ^ (unsigned long) action);
  11250. + migrate_enable();
  11251. +#endif
  11252. wake_threads_waitq(desc);
  11253. }
  11254. @@ -1184,6 +1273,9 @@
  11255. irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
  11256. }
  11257. + if (new->flags & IRQF_NO_SOFTIRQ_CALL)
  11258. + irq_settings_set_no_softirq_call(desc);
  11259. +
  11260. /* Set default affinity mask once everything is setup */
  11261. setup_affinity(irq, desc, mask);
  11262. diff -Nur linux-3.18.10.orig/kernel/irq/settings.h linux-3.18.10/kernel/irq/settings.h
  11263. --- linux-3.18.10.orig/kernel/irq/settings.h 2015-03-24 02:05:12.000000000 +0100
  11264. +++ linux-3.18.10/kernel/irq/settings.h 2015-03-26 12:42:18.675588336 +0100
  11265. @@ -15,6 +15,7 @@
  11266. _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD,
  11267. _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID,
  11268. _IRQ_IS_POLLED = IRQ_IS_POLLED,
  11269. + _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL,
  11270. _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
  11271. };
  11272. @@ -28,6 +29,7 @@
  11273. #define IRQ_NESTED_THREAD GOT_YOU_MORON
  11274. #define IRQ_PER_CPU_DEVID GOT_YOU_MORON
  11275. #define IRQ_IS_POLLED GOT_YOU_MORON
  11276. +#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON
  11277. #undef IRQF_MODIFY_MASK
  11278. #define IRQF_MODIFY_MASK GOT_YOU_MORON
  11279. @@ -38,6 +40,16 @@
  11280. desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
  11281. }
  11282. +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc)
  11283. +{
  11284. + return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL;
  11285. +}
  11286. +
  11287. +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc)
  11288. +{
  11289. + desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL;
  11290. +}
  11291. +
  11292. static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
  11293. {
  11294. return desc->status_use_accessors & _IRQ_PER_CPU;
  11295. diff -Nur linux-3.18.10.orig/kernel/irq/spurious.c linux-3.18.10/kernel/irq/spurious.c
  11296. --- linux-3.18.10.orig/kernel/irq/spurious.c 2015-03-24 02:05:12.000000000 +0100
  11297. +++ linux-3.18.10/kernel/irq/spurious.c 2015-03-26 12:42:18.675588336 +0100
  11298. @@ -444,6 +444,10 @@
  11299. static int __init irqfixup_setup(char *str)
  11300. {
  11301. +#ifdef CONFIG_PREEMPT_RT_BASE
  11302. + pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
  11303. + return 1;
  11304. +#endif
  11305. irqfixup = 1;
  11306. printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
  11307. printk(KERN_WARNING "This may impact system performance.\n");
  11308. @@ -456,6 +460,10 @@
  11309. static int __init irqpoll_setup(char *str)
  11310. {
  11311. +#ifdef CONFIG_PREEMPT_RT_BASE
  11312. + pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
  11313. + return 1;
  11314. +#endif
  11315. irqfixup = 2;
  11316. printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
  11317. "enabled\n");
  11318. diff -Nur linux-3.18.10.orig/kernel/irq_work.c linux-3.18.10/kernel/irq_work.c
  11319. --- linux-3.18.10.orig/kernel/irq_work.c 2015-03-24 02:05:12.000000000 +0100
  11320. +++ linux-3.18.10/kernel/irq_work.c 2015-03-26 12:42:18.675588336 +0100
  11321. @@ -22,7 +22,9 @@
  11322. static DEFINE_PER_CPU(struct llist_head, raised_list);
  11323. static DEFINE_PER_CPU(struct llist_head, lazy_list);
  11324. -
  11325. +#ifdef CONFIG_PREEMPT_RT_FULL
  11326. +static DEFINE_PER_CPU(struct llist_head, hirq_work_list);
  11327. +#endif
  11328. /*
  11329. * Claim the entry so that no one else will poke at it.
  11330. */
  11331. @@ -49,7 +51,11 @@
  11332. return true;
  11333. }
  11334. +#ifdef CONFIG_PREEMPT_RT_FULL
  11335. +void arch_irq_work_raise(void)
  11336. +#else
  11337. void __weak arch_irq_work_raise(void)
  11338. +#endif
  11339. {
  11340. /*
  11341. * Lame architectures will get the timer tick callback
  11342. @@ -65,6 +71,8 @@
  11343. */
  11344. bool irq_work_queue_on(struct irq_work *work, int cpu)
  11345. {
  11346. + bool raise_irqwork;
  11347. +
  11348. /* All work should have been flushed before going offline */
  11349. WARN_ON_ONCE(cpu_is_offline(cpu));
  11350. @@ -75,7 +83,19 @@
  11351. if (!irq_work_claim(work))
  11352. return false;
  11353. - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
  11354. +#ifdef CONFIG_PREEMPT_RT_FULL
  11355. + if (work->flags & IRQ_WORK_HARD_IRQ)
  11356. + raise_irqwork = llist_add(&work->llnode,
  11357. + &per_cpu(hirq_work_list, cpu));
  11358. + else
  11359. + raise_irqwork = llist_add(&work->llnode,
  11360. + &per_cpu(lazy_list, cpu));
  11361. +#else
  11362. + raise_irqwork = llist_add(&work->llnode,
  11363. + &per_cpu(raised_list, cpu));
  11364. +#endif
  11365. +
  11366. + if (raise_irqwork)
  11367. arch_send_call_function_single_ipi(cpu);
  11368. return true;
  11369. @@ -93,7 +113,15 @@
  11370. /* Queue the entry and raise the IPI if needed. */
  11371. preempt_disable();
  11372. - /* If the work is "lazy", handle it from next tick if any */
  11373. +#ifdef CONFIG_PREEMPT_RT_FULL
  11374. + if (work->flags & IRQ_WORK_HARD_IRQ) {
  11375. + if (llist_add(&work->llnode, this_cpu_ptr(&hirq_work_list)))
  11376. + arch_irq_work_raise();
  11377. + } else {
  11378. + if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)))
  11379. + arch_irq_work_raise();
  11380. + }
  11381. +#else
  11382. if (work->flags & IRQ_WORK_LAZY) {
  11383. if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
  11384. tick_nohz_tick_stopped())
  11385. @@ -102,6 +130,7 @@
  11386. if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
  11387. arch_irq_work_raise();
  11388. }
  11389. +#endif
  11390. preempt_enable();
  11391. @@ -116,9 +145,12 @@
  11392. raised = this_cpu_ptr(&raised_list);
  11393. lazy = this_cpu_ptr(&lazy_list);
  11394. - if (llist_empty(raised) || arch_irq_work_has_interrupt())
  11395. + if (llist_empty(raised))
  11396. if (llist_empty(lazy))
  11397. - return false;
  11398. +#ifdef CONFIG_PREEMPT_RT_FULL
  11399. + if (llist_empty(this_cpu_ptr(&hirq_work_list)))
  11400. +#endif
  11401. + return false;
  11402. /* All work should have been flushed before going offline */
  11403. WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
  11404. @@ -132,7 +164,9 @@
  11405. struct irq_work *work;
  11406. struct llist_node *llnode;
  11407. +#ifndef CONFIG_PREEMPT_RT_FULL
  11408. BUG_ON(!irqs_disabled());
  11409. +#endif
  11410. if (llist_empty(list))
  11411. return;
  11412. @@ -168,6 +202,12 @@
  11413. */
  11414. void irq_work_run(void)
  11415. {
  11416. +#ifdef CONFIG_PREEMPT_RT_FULL
  11417. + if (in_irq()) {
  11418. + irq_work_run_list(this_cpu_ptr(&hirq_work_list));
  11419. + return;
  11420. + }
  11421. +#endif
  11422. irq_work_run_list(this_cpu_ptr(&raised_list));
  11423. irq_work_run_list(this_cpu_ptr(&lazy_list));
  11424. }
  11425. @@ -175,9 +215,16 @@
  11426. void irq_work_tick(void)
  11427. {
  11428. - struct llist_head *raised = &__get_cpu_var(raised_list);
  11429. + struct llist_head *raised;
  11430. - if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
  11431. +#ifdef CONFIG_PREEMPT_RT_FULL
  11432. + if (in_irq()) {
  11433. + irq_work_run_list(this_cpu_ptr(&hirq_work_list));
  11434. + return;
  11435. + }
  11436. +#endif
  11437. + raised = &__get_cpu_var(raised_list);
  11438. + if (!llist_empty(raised))
  11439. irq_work_run_list(raised);
  11440. irq_work_run_list(&__get_cpu_var(lazy_list));
  11441. }
  11442. diff -Nur linux-3.18.10.orig/kernel/Kconfig.locks linux-3.18.10/kernel/Kconfig.locks
  11443. --- linux-3.18.10.orig/kernel/Kconfig.locks 2015-03-24 02:05:12.000000000 +0100
  11444. +++ linux-3.18.10/kernel/Kconfig.locks 2015-03-26 12:42:18.671588332 +0100
  11445. @@ -225,11 +225,11 @@
  11446. config MUTEX_SPIN_ON_OWNER
  11447. def_bool y
  11448. - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
  11449. + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
  11450. config RWSEM_SPIN_ON_OWNER
  11451. def_bool y
  11452. - depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
  11453. + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
  11454. config ARCH_USE_QUEUE_RWLOCK
  11455. bool
  11456. diff -Nur linux-3.18.10.orig/kernel/Kconfig.preempt linux-3.18.10/kernel/Kconfig.preempt
  11457. --- linux-3.18.10.orig/kernel/Kconfig.preempt 2015-03-24 02:05:12.000000000 +0100
  11458. +++ linux-3.18.10/kernel/Kconfig.preempt 2015-03-26 12:42:18.671588332 +0100
  11459. @@ -1,3 +1,16 @@
  11460. +config PREEMPT
  11461. + bool
  11462. + select PREEMPT_COUNT
  11463. +
  11464. +config PREEMPT_RT_BASE
  11465. + bool
  11466. + select PREEMPT
  11467. +
  11468. +config HAVE_PREEMPT_LAZY
  11469. + bool
  11470. +
  11471. +config PREEMPT_LAZY
  11472. + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL
  11473. choice
  11474. prompt "Preemption Model"
  11475. @@ -33,9 +46,9 @@
  11476. Select this if you are building a kernel for a desktop system.
  11477. -config PREEMPT
  11478. +config PREEMPT__LL
  11479. bool "Preemptible Kernel (Low-Latency Desktop)"
  11480. - select PREEMPT_COUNT
  11481. + select PREEMPT
  11482. select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
  11483. help
  11484. This option reduces the latency of the kernel by making
  11485. @@ -52,6 +65,22 @@
  11486. embedded system with latency requirements in the milliseconds
  11487. range.
  11488. +config PREEMPT_RTB
  11489. + bool "Preemptible Kernel (Basic RT)"
  11490. + select PREEMPT_RT_BASE
  11491. + help
  11492. + This option is basically the same as (Low-Latency Desktop) but
  11493. + enables changes which are preliminary for the full preemptible
  11494. + RT kernel.
  11495. +
  11496. +config PREEMPT_RT_FULL
  11497. + bool "Fully Preemptible Kernel (RT)"
  11498. + depends on IRQ_FORCED_THREADING
  11499. + select PREEMPT_RT_BASE
  11500. + select PREEMPT_RCU
  11501. + help
  11502. + All and everything
  11503. +
  11504. endchoice
  11505. config PREEMPT_COUNT
  11506. diff -Nur linux-3.18.10.orig/kernel/ksysfs.c linux-3.18.10/kernel/ksysfs.c
  11507. --- linux-3.18.10.orig/kernel/ksysfs.c 2015-03-24 02:05:12.000000000 +0100
  11508. +++ linux-3.18.10/kernel/ksysfs.c 2015-03-26 12:42:18.675588336 +0100
  11509. @@ -136,6 +136,15 @@
  11510. #endif /* CONFIG_KEXEC */
  11511. +#if defined(CONFIG_PREEMPT_RT_FULL)
  11512. +static ssize_t realtime_show(struct kobject *kobj,
  11513. + struct kobj_attribute *attr, char *buf)
  11514. +{
  11515. + return sprintf(buf, "%d\n", 1);
  11516. +}
  11517. +KERNEL_ATTR_RO(realtime);
  11518. +#endif
  11519. +
  11520. /* whether file capabilities are enabled */
  11521. static ssize_t fscaps_show(struct kobject *kobj,
  11522. struct kobj_attribute *attr, char *buf)
  11523. @@ -203,6 +212,9 @@
  11524. &vmcoreinfo_attr.attr,
  11525. #endif
  11526. &rcu_expedited_attr.attr,
  11527. +#ifdef CONFIG_PREEMPT_RT_FULL
  11528. + &realtime_attr.attr,
  11529. +#endif
  11530. NULL
  11531. };
  11532. diff -Nur linux-3.18.10.orig/kernel/locking/lglock.c linux-3.18.10/kernel/locking/lglock.c
  11533. --- linux-3.18.10.orig/kernel/locking/lglock.c 2015-03-24 02:05:12.000000000 +0100
  11534. +++ linux-3.18.10/kernel/locking/lglock.c 2015-03-26 12:42:18.675588336 +0100
  11535. @@ -4,6 +4,15 @@
  11536. #include <linux/cpu.h>
  11537. #include <linux/string.h>
  11538. +#ifndef CONFIG_PREEMPT_RT_FULL
  11539. +# define lg_lock_ptr arch_spinlock_t
  11540. +# define lg_do_lock(l) arch_spin_lock(l)
  11541. +# define lg_do_unlock(l) arch_spin_unlock(l)
  11542. +#else
  11543. +# define lg_lock_ptr struct rt_mutex
  11544. +# define lg_do_lock(l) __rt_spin_lock(l)
  11545. +# define lg_do_unlock(l) __rt_spin_unlock(l)
  11546. +#endif
  11547. /*
  11548. * Note there is no uninit, so lglocks cannot be defined in
  11549. * modules (but it's fine to use them from there)
  11550. @@ -12,51 +21,60 @@
  11551. void lg_lock_init(struct lglock *lg, char *name)
  11552. {
  11553. +#ifdef CONFIG_PREEMPT_RT_FULL
  11554. + int i;
  11555. +
  11556. + for_each_possible_cpu(i) {
  11557. + struct rt_mutex *lock = per_cpu_ptr(lg->lock, i);
  11558. +
  11559. + rt_mutex_init(lock);
  11560. + }
  11561. +#endif
  11562. LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
  11563. }
  11564. EXPORT_SYMBOL(lg_lock_init);
  11565. void lg_local_lock(struct lglock *lg)
  11566. {
  11567. - arch_spinlock_t *lock;
  11568. + lg_lock_ptr *lock;
  11569. - preempt_disable();
  11570. + migrate_disable();
  11571. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  11572. lock = this_cpu_ptr(lg->lock);
  11573. - arch_spin_lock(lock);
  11574. + lg_do_lock(lock);
  11575. }
  11576. EXPORT_SYMBOL(lg_local_lock);
  11577. void lg_local_unlock(struct lglock *lg)
  11578. {
  11579. - arch_spinlock_t *lock;
  11580. + lg_lock_ptr *lock;
  11581. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  11582. lock = this_cpu_ptr(lg->lock);
  11583. - arch_spin_unlock(lock);
  11584. - preempt_enable();
  11585. + lg_do_unlock(lock);
  11586. + migrate_enable();
  11587. }
  11588. EXPORT_SYMBOL(lg_local_unlock);
  11589. void lg_local_lock_cpu(struct lglock *lg, int cpu)
  11590. {
  11591. - arch_spinlock_t *lock;
  11592. + lg_lock_ptr *lock;
  11593. - preempt_disable();
  11594. + preempt_disable_nort();
  11595. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  11596. lock = per_cpu_ptr(lg->lock, cpu);
  11597. - arch_spin_lock(lock);
  11598. + lg_do_lock(lock);
  11599. }
  11600. EXPORT_SYMBOL(lg_local_lock_cpu);
  11601. void lg_local_unlock_cpu(struct lglock *lg, int cpu)
  11602. {
  11603. - arch_spinlock_t *lock;
  11604. + lg_lock_ptr *lock;
  11605. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  11606. lock = per_cpu_ptr(lg->lock, cpu);
  11607. - arch_spin_unlock(lock);
  11608. - preempt_enable();
  11609. + lg_do_unlock(lock);
  11610. + preempt_enable_nort();
  11611. }
  11612. EXPORT_SYMBOL(lg_local_unlock_cpu);
  11613. @@ -64,12 +82,12 @@
  11614. {
  11615. int i;
  11616. - preempt_disable();
  11617. + preempt_disable_nort();
  11618. lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  11619. for_each_possible_cpu(i) {
  11620. - arch_spinlock_t *lock;
  11621. + lg_lock_ptr *lock;
  11622. lock = per_cpu_ptr(lg->lock, i);
  11623. - arch_spin_lock(lock);
  11624. + lg_do_lock(lock);
  11625. }
  11626. }
  11627. EXPORT_SYMBOL(lg_global_lock);
  11628. @@ -80,10 +98,35 @@
  11629. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  11630. for_each_possible_cpu(i) {
  11631. - arch_spinlock_t *lock;
  11632. + lg_lock_ptr *lock;
  11633. lock = per_cpu_ptr(lg->lock, i);
  11634. - arch_spin_unlock(lock);
  11635. + lg_do_unlock(lock);
  11636. }
  11637. - preempt_enable();
  11638. + preempt_enable_nort();
  11639. }
  11640. EXPORT_SYMBOL(lg_global_unlock);
  11641. +
  11642. +#ifdef CONFIG_PREEMPT_RT_FULL
  11643. +/*
  11644. + * HACK: If you use this, you get to keep the pieces.
  11645. + * Used in queue_stop_cpus_work() when stop machinery
  11646. + * is called from inactive CPU, so we can't schedule.
  11647. + */
  11648. +# define lg_do_trylock_relax(l) \
  11649. + do { \
  11650. + while (!__rt_spin_trylock(l)) \
  11651. + cpu_relax(); \
  11652. + } while (0)
  11653. +
  11654. +void lg_global_trylock_relax(struct lglock *lg)
  11655. +{
  11656. + int i;
  11657. +
  11658. + lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  11659. + for_each_possible_cpu(i) {
  11660. + lg_lock_ptr *lock;
  11661. + lock = per_cpu_ptr(lg->lock, i);
  11662. + lg_do_trylock_relax(lock);
  11663. + }
  11664. +}
  11665. +#endif
  11666. diff -Nur linux-3.18.10.orig/kernel/locking/lockdep.c linux-3.18.10/kernel/locking/lockdep.c
  11667. --- linux-3.18.10.orig/kernel/locking/lockdep.c 2015-03-24 02:05:12.000000000 +0100
  11668. +++ linux-3.18.10/kernel/locking/lockdep.c 2015-03-26 12:42:18.675588336 +0100
  11669. @@ -3542,6 +3542,7 @@
  11670. }
  11671. }
  11672. +#ifndef CONFIG_PREEMPT_RT_FULL
  11673. /*
  11674. * We dont accurately track softirq state in e.g.
  11675. * hardirq contexts (such as on 4KSTACKS), so only
  11676. @@ -3556,6 +3557,7 @@
  11677. DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
  11678. }
  11679. }
  11680. +#endif
  11681. if (!debug_locks)
  11682. print_irqtrace_events(current);
  11683. diff -Nur linux-3.18.10.orig/kernel/locking/Makefile linux-3.18.10/kernel/locking/Makefile
  11684. --- linux-3.18.10.orig/kernel/locking/Makefile 2015-03-24 02:05:12.000000000 +0100
  11685. +++ linux-3.18.10/kernel/locking/Makefile 2015-03-26 12:42:18.675588336 +0100
  11686. @@ -1,5 +1,5 @@
  11687. -obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o
  11688. +obj-y += semaphore.o mcs_spinlock.o
  11689. ifdef CONFIG_FUNCTION_TRACER
  11690. CFLAGS_REMOVE_lockdep.o = -pg
  11691. @@ -8,7 +8,11 @@
  11692. CFLAGS_REMOVE_rtmutex-debug.o = -pg
  11693. endif
  11694. +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
  11695. +obj-y += mutex.o
  11696. obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
  11697. +obj-y += rwsem.o
  11698. +endif
  11699. obj-$(CONFIG_LOCKDEP) += lockdep.o
  11700. ifeq ($(CONFIG_PROC_FS),y)
  11701. obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
  11702. @@ -21,8 +25,11 @@
  11703. obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
  11704. obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
  11705. obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
  11706. +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
  11707. obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
  11708. obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
  11709. +endif
  11710. obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
  11711. +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o
  11712. obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o
  11713. obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
  11714. diff -Nur linux-3.18.10.orig/kernel/locking/percpu-rwsem.c linux-3.18.10/kernel/locking/percpu-rwsem.c
  11715. --- linux-3.18.10.orig/kernel/locking/percpu-rwsem.c 2015-03-24 02:05:12.000000000 +0100
  11716. +++ linux-3.18.10/kernel/locking/percpu-rwsem.c 2015-03-26 12:42:18.675588336 +0100
  11717. @@ -84,8 +84,12 @@
  11718. down_read(&brw->rw_sem);
  11719. atomic_inc(&brw->slow_read_ctr);
  11720. +#ifdef CONFIG_PREEMPT_RT_FULL
  11721. + up_read(&brw->rw_sem);
  11722. +#else
  11723. /* avoid up_read()->rwsem_release() */
  11724. __up_read(&brw->rw_sem);
  11725. +#endif
  11726. }
  11727. void percpu_up_read(struct percpu_rw_semaphore *brw)
  11728. diff -Nur linux-3.18.10.orig/kernel/locking/rt.c linux-3.18.10/kernel/locking/rt.c
  11729. --- linux-3.18.10.orig/kernel/locking/rt.c 1970-01-01 01:00:00.000000000 +0100
  11730. +++ linux-3.18.10/kernel/locking/rt.c 2015-03-26 12:42:18.675588336 +0100
  11731. @@ -0,0 +1,456 @@
  11732. +/*
  11733. + * kernel/rt.c
  11734. + *
  11735. + * Real-Time Preemption Support
  11736. + *
  11737. + * started by Ingo Molnar:
  11738. + *
  11739. + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  11740. + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  11741. + *
  11742. + * historic credit for proving that Linux spinlocks can be implemented via
  11743. + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow
  11744. + * and others) who prototyped it on 2.4 and did lots of comparative
  11745. + * research and analysis; TimeSys, for proving that you can implement a
  11746. + * fully preemptible kernel via the use of IRQ threading and mutexes;
  11747. + * Bill Huey for persuasively arguing on lkml that the mutex model is the
  11748. + * right one; and to MontaVista, who ported pmutexes to 2.6.
  11749. + *
  11750. + * This code is a from-scratch implementation and is not based on pmutexes,
  11751. + * but the idea of converting spinlocks to mutexes is used here too.
  11752. + *
  11753. + * lock debugging, locking tree, deadlock detection:
  11754. + *
  11755. + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
  11756. + * Released under the General Public License (GPL).
  11757. + *
  11758. + * Includes portions of the generic R/W semaphore implementation from:
  11759. + *
  11760. + * Copyright (c) 2001 David Howells (dhowells@redhat.com).
  11761. + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
  11762. + * - Derived also from comments by Linus
  11763. + *
  11764. + * Pending ownership of locks and ownership stealing:
  11765. + *
  11766. + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt
  11767. + *
  11768. + * (also by Steven Rostedt)
  11769. + * - Converted single pi_lock to individual task locks.
  11770. + *
  11771. + * By Esben Nielsen:
  11772. + * Doing priority inheritance with help of the scheduler.
  11773. + *
  11774. + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  11775. + * - major rework based on Esben Nielsens initial patch
  11776. + * - replaced thread_info references by task_struct refs
  11777. + * - removed task->pending_owner dependency
  11778. + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks
  11779. + * in the scheduler return path as discussed with Steven Rostedt
  11780. + *
  11781. + * Copyright (C) 2006, Kihon Technologies Inc.
  11782. + * Steven Rostedt <rostedt@goodmis.org>
  11783. + * - debugged and patched Thomas Gleixner's rework.
  11784. + * - added back the cmpxchg to the rework.
  11785. + * - turned atomic require back on for SMP.
  11786. + */
  11787. +
  11788. +#include <linux/spinlock.h>
  11789. +#include <linux/rtmutex.h>
  11790. +#include <linux/sched.h>
  11791. +#include <linux/delay.h>
  11792. +#include <linux/module.h>
  11793. +#include <linux/kallsyms.h>
  11794. +#include <linux/syscalls.h>
  11795. +#include <linux/interrupt.h>
  11796. +#include <linux/plist.h>
  11797. +#include <linux/fs.h>
  11798. +#include <linux/futex.h>
  11799. +#include <linux/hrtimer.h>
  11800. +
  11801. +#include "rtmutex_common.h"
  11802. +
  11803. +/*
  11804. + * struct mutex functions
  11805. + */
  11806. +void __mutex_do_init(struct mutex *mutex, const char *name,
  11807. + struct lock_class_key *key)
  11808. +{
  11809. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11810. + /*
  11811. + * Make sure we are not reinitializing a held lock:
  11812. + */
  11813. + debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
  11814. + lockdep_init_map(&mutex->dep_map, name, key, 0);
  11815. +#endif
  11816. + mutex->lock.save_state = 0;
  11817. +}
  11818. +EXPORT_SYMBOL(__mutex_do_init);
  11819. +
  11820. +void __lockfunc _mutex_lock(struct mutex *lock)
  11821. +{
  11822. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  11823. + rt_mutex_lock(&lock->lock);
  11824. +}
  11825. +EXPORT_SYMBOL(_mutex_lock);
  11826. +
  11827. +int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
  11828. +{
  11829. + int ret;
  11830. +
  11831. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  11832. + ret = rt_mutex_lock_interruptible(&lock->lock);
  11833. + if (ret)
  11834. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  11835. + return ret;
  11836. +}
  11837. +EXPORT_SYMBOL(_mutex_lock_interruptible);
  11838. +
  11839. +int __lockfunc _mutex_lock_killable(struct mutex *lock)
  11840. +{
  11841. + int ret;
  11842. +
  11843. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  11844. + ret = rt_mutex_lock_killable(&lock->lock);
  11845. + if (ret)
  11846. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  11847. + return ret;
  11848. +}
  11849. +EXPORT_SYMBOL(_mutex_lock_killable);
  11850. +
  11851. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11852. +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
  11853. +{
  11854. + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
  11855. + rt_mutex_lock(&lock->lock);
  11856. +}
  11857. +EXPORT_SYMBOL(_mutex_lock_nested);
  11858. +
  11859. +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
  11860. +{
  11861. + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
  11862. + rt_mutex_lock(&lock->lock);
  11863. +}
  11864. +EXPORT_SYMBOL(_mutex_lock_nest_lock);
  11865. +
  11866. +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass)
  11867. +{
  11868. + int ret;
  11869. +
  11870. + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
  11871. + ret = rt_mutex_lock_interruptible(&lock->lock);
  11872. + if (ret)
  11873. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  11874. + return ret;
  11875. +}
  11876. +EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
  11877. +
  11878. +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
  11879. +{
  11880. + int ret;
  11881. +
  11882. + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
  11883. + ret = rt_mutex_lock_killable(&lock->lock);
  11884. + if (ret)
  11885. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  11886. + return ret;
  11887. +}
  11888. +EXPORT_SYMBOL(_mutex_lock_killable_nested);
  11889. +#endif
  11890. +
  11891. +int __lockfunc _mutex_trylock(struct mutex *lock)
  11892. +{
  11893. + int ret = rt_mutex_trylock(&lock->lock);
  11894. +
  11895. + if (ret)
  11896. + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  11897. +
  11898. + return ret;
  11899. +}
  11900. +EXPORT_SYMBOL(_mutex_trylock);
  11901. +
  11902. +void __lockfunc _mutex_unlock(struct mutex *lock)
  11903. +{
  11904. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  11905. + rt_mutex_unlock(&lock->lock);
  11906. +}
  11907. +EXPORT_SYMBOL(_mutex_unlock);
  11908. +
  11909. +/*
  11910. + * rwlock_t functions
  11911. + */
  11912. +int __lockfunc rt_write_trylock(rwlock_t *rwlock)
  11913. +{
  11914. + int ret;
  11915. +
  11916. + migrate_disable();
  11917. + ret = rt_mutex_trylock(&rwlock->lock);
  11918. + if (ret)
  11919. + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
  11920. + else
  11921. + migrate_enable();
  11922. +
  11923. + return ret;
  11924. +}
  11925. +EXPORT_SYMBOL(rt_write_trylock);
  11926. +
  11927. +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
  11928. +{
  11929. + int ret;
  11930. +
  11931. + *flags = 0;
  11932. + ret = rt_write_trylock(rwlock);
  11933. + return ret;
  11934. +}
  11935. +EXPORT_SYMBOL(rt_write_trylock_irqsave);
  11936. +
  11937. +int __lockfunc rt_read_trylock(rwlock_t *rwlock)
  11938. +{
  11939. + struct rt_mutex *lock = &rwlock->lock;
  11940. + int ret = 1;
  11941. +
  11942. + /*
  11943. + * recursive read locks succeed when current owns the lock,
  11944. + * but not when read_depth == 0 which means that the lock is
  11945. + * write locked.
  11946. + */
  11947. + if (rt_mutex_owner(lock) != current) {
  11948. + migrate_disable();
  11949. + ret = rt_mutex_trylock(lock);
  11950. + if (ret)
  11951. + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
  11952. + else
  11953. + migrate_enable();
  11954. +
  11955. + } else if (!rwlock->read_depth) {
  11956. + ret = 0;
  11957. + }
  11958. +
  11959. + if (ret)
  11960. + rwlock->read_depth++;
  11961. +
  11962. + return ret;
  11963. +}
  11964. +EXPORT_SYMBOL(rt_read_trylock);
  11965. +
  11966. +void __lockfunc rt_write_lock(rwlock_t *rwlock)
  11967. +{
  11968. + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
  11969. + migrate_disable();
  11970. + __rt_spin_lock(&rwlock->lock);
  11971. +}
  11972. +EXPORT_SYMBOL(rt_write_lock);
  11973. +
  11974. +void __lockfunc rt_read_lock(rwlock_t *rwlock)
  11975. +{
  11976. + struct rt_mutex *lock = &rwlock->lock;
  11977. +
  11978. +
  11979. + /*
  11980. + * recursive read locks succeed when current owns the lock
  11981. + */
  11982. + if (rt_mutex_owner(lock) != current) {
  11983. + migrate_disable();
  11984. + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
  11985. + __rt_spin_lock(lock);
  11986. + }
  11987. + rwlock->read_depth++;
  11988. +}
  11989. +
  11990. +EXPORT_SYMBOL(rt_read_lock);
  11991. +
  11992. +void __lockfunc rt_write_unlock(rwlock_t *rwlock)
  11993. +{
  11994. + /* NOTE: we always pass in '1' for nested, for simplicity */
  11995. + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
  11996. + __rt_spin_unlock(&rwlock->lock);
  11997. + migrate_enable();
  11998. +}
  11999. +EXPORT_SYMBOL(rt_write_unlock);
  12000. +
  12001. +void __lockfunc rt_read_unlock(rwlock_t *rwlock)
  12002. +{
  12003. + /* Release the lock only when read_depth is down to 0 */
  12004. + if (--rwlock->read_depth == 0) {
  12005. + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
  12006. + __rt_spin_unlock(&rwlock->lock);
  12007. + migrate_enable();
  12008. + }
  12009. +}
  12010. +EXPORT_SYMBOL(rt_read_unlock);
  12011. +
  12012. +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
  12013. +{
  12014. + rt_write_lock(rwlock);
  12015. +
  12016. + return 0;
  12017. +}
  12018. +EXPORT_SYMBOL(rt_write_lock_irqsave);
  12019. +
  12020. +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
  12021. +{
  12022. + rt_read_lock(rwlock);
  12023. +
  12024. + return 0;
  12025. +}
  12026. +EXPORT_SYMBOL(rt_read_lock_irqsave);
  12027. +
  12028. +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
  12029. +{
  12030. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12031. + /*
  12032. + * Make sure we are not reinitializing a held lock:
  12033. + */
  12034. + debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
  12035. + lockdep_init_map(&rwlock->dep_map, name, key, 0);
  12036. +#endif
  12037. + rwlock->lock.save_state = 1;
  12038. + rwlock->read_depth = 0;
  12039. +}
  12040. +EXPORT_SYMBOL(__rt_rwlock_init);
  12041. +
  12042. +/*
  12043. + * rw_semaphores
  12044. + */
  12045. +
  12046. +void rt_up_write(struct rw_semaphore *rwsem)
  12047. +{
  12048. + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
  12049. + rt_mutex_unlock(&rwsem->lock);
  12050. +}
  12051. +EXPORT_SYMBOL(rt_up_write);
  12052. +
  12053. +void rt_up_read(struct rw_semaphore *rwsem)
  12054. +{
  12055. + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
  12056. + if (--rwsem->read_depth == 0)
  12057. + rt_mutex_unlock(&rwsem->lock);
  12058. +}
  12059. +EXPORT_SYMBOL(rt_up_read);
  12060. +
  12061. +/*
  12062. + * downgrade a write lock into a read lock
  12063. + * - just wake up any readers at the front of the queue
  12064. + */
  12065. +void rt_downgrade_write(struct rw_semaphore *rwsem)
  12066. +{
  12067. + BUG_ON(rt_mutex_owner(&rwsem->lock) != current);
  12068. + rwsem->read_depth = 1;
  12069. +}
  12070. +EXPORT_SYMBOL(rt_downgrade_write);
  12071. +
  12072. +int rt_down_write_trylock(struct rw_semaphore *rwsem)
  12073. +{
  12074. + int ret = rt_mutex_trylock(&rwsem->lock);
  12075. +
  12076. + if (ret)
  12077. + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
  12078. + return ret;
  12079. +}
  12080. +EXPORT_SYMBOL(rt_down_write_trylock);
  12081. +
  12082. +void rt_down_write(struct rw_semaphore *rwsem)
  12083. +{
  12084. + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_);
  12085. + rt_mutex_lock(&rwsem->lock);
  12086. +}
  12087. +EXPORT_SYMBOL(rt_down_write);
  12088. +
  12089. +void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass)
  12090. +{
  12091. + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
  12092. + rt_mutex_lock(&rwsem->lock);
  12093. +}
  12094. +EXPORT_SYMBOL(rt_down_write_nested);
  12095. +
  12096. +void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
  12097. + struct lockdep_map *nest)
  12098. +{
  12099. + rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_);
  12100. + rt_mutex_lock(&rwsem->lock);
  12101. +}
  12102. +EXPORT_SYMBOL(rt_down_write_nested_lock);
  12103. +
  12104. +int rt_down_read_trylock(struct rw_semaphore *rwsem)
  12105. +{
  12106. + struct rt_mutex *lock = &rwsem->lock;
  12107. + int ret = 1;
  12108. +
  12109. + /*
  12110. + * recursive read locks succeed when current owns the rwsem,
  12111. + * but not when read_depth == 0 which means that the rwsem is
  12112. + * write locked.
  12113. + */
  12114. + if (rt_mutex_owner(lock) != current)
  12115. + ret = rt_mutex_trylock(&rwsem->lock);
  12116. + else if (!rwsem->read_depth)
  12117. + ret = 0;
  12118. +
  12119. + if (ret) {
  12120. + rwsem->read_depth++;
  12121. + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
  12122. + }
  12123. + return ret;
  12124. +}
  12125. +EXPORT_SYMBOL(rt_down_read_trylock);
  12126. +
  12127. +static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
  12128. +{
  12129. + struct rt_mutex *lock = &rwsem->lock;
  12130. +
  12131. + rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_);
  12132. +
  12133. + if (rt_mutex_owner(lock) != current)
  12134. + rt_mutex_lock(&rwsem->lock);
  12135. + rwsem->read_depth++;
  12136. +}
  12137. +
  12138. +void rt_down_read(struct rw_semaphore *rwsem)
  12139. +{
  12140. + __rt_down_read(rwsem, 0);
  12141. +}
  12142. +EXPORT_SYMBOL(rt_down_read);
  12143. +
  12144. +void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass)
  12145. +{
  12146. + __rt_down_read(rwsem, subclass);
  12147. +}
  12148. +EXPORT_SYMBOL(rt_down_read_nested);
  12149. +
  12150. +void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
  12151. + struct lock_class_key *key)
  12152. +{
  12153. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12154. + /*
  12155. + * Make sure we are not reinitializing a held lock:
  12156. + */
  12157. + debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
  12158. + lockdep_init_map(&rwsem->dep_map, name, key, 0);
  12159. +#endif
  12160. + rwsem->read_depth = 0;
  12161. + rwsem->lock.save_state = 0;
  12162. +}
  12163. +EXPORT_SYMBOL(__rt_rwsem_init);
  12164. +
  12165. +/**
  12166. + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
  12167. + * @cnt: the atomic which we are to dec
  12168. + * @lock: the mutex to return holding if we dec to 0
  12169. + *
  12170. + * return true and hold lock if we dec to 0, return false otherwise
  12171. + */
  12172. +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
  12173. +{
  12174. + /* dec if we can't possibly hit 0 */
  12175. + if (atomic_add_unless(cnt, -1, 1))
  12176. + return 0;
  12177. + /* we might hit 0, so take the lock */
  12178. + mutex_lock(lock);
  12179. + if (!atomic_dec_and_test(cnt)) {
  12180. + /* when we actually did the dec, we didn't hit 0 */
  12181. + mutex_unlock(lock);
  12182. + return 0;
  12183. + }
  12184. + /* we hit 0, and we hold the lock */
  12185. + return 1;
  12186. +}
  12187. +EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
  12188. diff -Nur linux-3.18.10.orig/kernel/locking/rtmutex.c linux-3.18.10/kernel/locking/rtmutex.c
  12189. --- linux-3.18.10.orig/kernel/locking/rtmutex.c 2015-03-24 02:05:12.000000000 +0100
  12190. +++ linux-3.18.10/kernel/locking/rtmutex.c 2015-03-26 12:45:24.871805156 +0100
  12191. @@ -7,6 +7,11 @@
  12192. * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  12193. * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
  12194. * Copyright (C) 2006 Esben Nielsen
  12195. + * Adaptive Spinlocks:
  12196. + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
  12197. + * and Peter Morreale,
  12198. + * Adaptive Spinlocks simplification:
  12199. + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
  12200. *
  12201. * See Documentation/locking/rt-mutex-design.txt for details.
  12202. */
  12203. @@ -16,6 +21,7 @@
  12204. #include <linux/sched/rt.h>
  12205. #include <linux/sched/deadline.h>
  12206. #include <linux/timer.h>
  12207. +#include <linux/ww_mutex.h>
  12208. #include "rtmutex_common.h"
  12209. @@ -69,6 +75,12 @@
  12210. clear_rt_mutex_waiters(lock);
  12211. }
  12212. +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
  12213. +{
  12214. + return waiter && waiter != PI_WAKEUP_INPROGRESS &&
  12215. + waiter != PI_REQUEUE_INPROGRESS;
  12216. +}
  12217. +
  12218. /*
  12219. * We can speed up the acquire/release, if the architecture
  12220. * supports cmpxchg and if there's no debugging state to be set up
  12221. @@ -333,6 +345,14 @@
  12222. return debug_rt_mutex_detect_deadlock(waiter, chwalk);
  12223. }
  12224. +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
  12225. +{
  12226. + if (waiter->savestate)
  12227. + wake_up_lock_sleeper(waiter->task);
  12228. + else
  12229. + wake_up_process(waiter->task);
  12230. +}
  12231. +
  12232. /*
  12233. * Max number of times we'll walk the boosting chain:
  12234. */
  12235. @@ -340,7 +360,8 @@
  12236. static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
  12237. {
  12238. - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
  12239. + return rt_mutex_real_waiter(p->pi_blocked_on) ?
  12240. + p->pi_blocked_on->lock : NULL;
  12241. }
  12242. /*
  12243. @@ -477,7 +498,7 @@
  12244. * reached or the state of the chain has changed while we
  12245. * dropped the locks.
  12246. */
  12247. - if (!waiter)
  12248. + if (!rt_mutex_real_waiter(waiter))
  12249. goto out_unlock_pi;
  12250. /*
  12251. @@ -639,13 +660,16 @@
  12252. * follow here. This is the end of the chain we are walking.
  12253. */
  12254. if (!rt_mutex_owner(lock)) {
  12255. + struct rt_mutex_waiter *lock_top_waiter;
  12256. +
  12257. /*
  12258. * If the requeue [7] above changed the top waiter,
  12259. * then we need to wake the new top waiter up to try
  12260. * to get the lock.
  12261. */
  12262. - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
  12263. - wake_up_process(rt_mutex_top_waiter(lock)->task);
  12264. + lock_top_waiter = rt_mutex_top_waiter(lock);
  12265. + if (prerequeue_top_waiter != lock_top_waiter)
  12266. + rt_mutex_wake_waiter(lock_top_waiter);
  12267. raw_spin_unlock(&lock->wait_lock);
  12268. return 0;
  12269. }
  12270. @@ -738,6 +762,25 @@
  12271. return ret;
  12272. }
  12273. +
  12274. +#define STEAL_NORMAL 0
  12275. +#define STEAL_LATERAL 1
  12276. +
  12277. +/*
  12278. + * Note that RT tasks are excluded from lateral-steals to prevent the
  12279. + * introduction of an unbounded latency
  12280. + */
  12281. +static inline int lock_is_stealable(struct task_struct *task,
  12282. + struct task_struct *pendowner, int mode)
  12283. +{
  12284. + if (mode == STEAL_NORMAL || rt_task(task)) {
  12285. + if (task->prio >= pendowner->prio)
  12286. + return 0;
  12287. + } else if (task->prio > pendowner->prio)
  12288. + return 0;
  12289. + return 1;
  12290. +}
  12291. +
  12292. /*
  12293. * Try to take an rt-mutex
  12294. *
  12295. @@ -748,8 +791,9 @@
  12296. * @waiter: The waiter that is queued to the lock's wait list if the
  12297. * callsite called task_blocked_on_lock(), otherwise NULL
  12298. */
  12299. -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  12300. - struct rt_mutex_waiter *waiter)
  12301. +static int __try_to_take_rt_mutex(struct rt_mutex *lock,
  12302. + struct task_struct *task,
  12303. + struct rt_mutex_waiter *waiter, int mode)
  12304. {
  12305. unsigned long flags;
  12306. @@ -788,8 +832,10 @@
  12307. * If waiter is not the highest priority waiter of
  12308. * @lock, give up.
  12309. */
  12310. - if (waiter != rt_mutex_top_waiter(lock))
  12311. + if (waiter != rt_mutex_top_waiter(lock)) {
  12312. + /* XXX lock_is_stealable() ? */
  12313. return 0;
  12314. + }
  12315. /*
  12316. * We can acquire the lock. Remove the waiter from the
  12317. @@ -807,14 +853,10 @@
  12318. * not need to be dequeued.
  12319. */
  12320. if (rt_mutex_has_waiters(lock)) {
  12321. - /*
  12322. - * If @task->prio is greater than or equal to
  12323. - * the top waiter priority (kernel view),
  12324. - * @task lost.
  12325. - */
  12326. - if (task->prio >= rt_mutex_top_waiter(lock)->prio)
  12327. - return 0;
  12328. + struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
  12329. + if (task != pown && !lock_is_stealable(task, pown, mode))
  12330. + return 0;
  12331. /*
  12332. * The current top waiter stays enqueued. We
  12333. * don't have to change anything in the lock
  12334. @@ -863,6 +905,369 @@
  12335. return 1;
  12336. }
  12337. +#ifdef CONFIG_PREEMPT_RT_FULL
  12338. +/*
  12339. + * preemptible spin_lock functions:
  12340. + */
  12341. +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
  12342. + void (*slowfn)(struct rt_mutex *lock))
  12343. +{
  12344. + might_sleep();
  12345. +
  12346. + if (likely(rt_mutex_cmpxchg(lock, NULL, current)))
  12347. + rt_mutex_deadlock_account_lock(lock, current);
  12348. + else
  12349. + slowfn(lock);
  12350. +}
  12351. +
  12352. +static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
  12353. + void (*slowfn)(struct rt_mutex *lock))
  12354. +{
  12355. + if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
  12356. + rt_mutex_deadlock_account_unlock(current);
  12357. + else
  12358. + slowfn(lock);
  12359. +}
  12360. +#ifdef CONFIG_SMP
  12361. +/*
  12362. + * Note that owner is a speculative pointer and dereferencing relies
  12363. + * on rcu_read_lock() and the check against the lock owner.
  12364. + */
  12365. +static int adaptive_wait(struct rt_mutex *lock,
  12366. + struct task_struct *owner)
  12367. +{
  12368. + int res = 0;
  12369. +
  12370. + rcu_read_lock();
  12371. + for (;;) {
  12372. + if (owner != rt_mutex_owner(lock))
  12373. + break;
  12374. + /*
  12375. + * Ensure that owner->on_cpu is dereferenced _after_
  12376. + * checking the above to be valid.
  12377. + */
  12378. + barrier();
  12379. + if (!owner->on_cpu) {
  12380. + res = 1;
  12381. + break;
  12382. + }
  12383. + cpu_relax();
  12384. + }
  12385. + rcu_read_unlock();
  12386. + return res;
  12387. +}
  12388. +#else
  12389. +static int adaptive_wait(struct rt_mutex *lock,
  12390. + struct task_struct *orig_owner)
  12391. +{
  12392. + return 1;
  12393. +}
  12394. +#endif
  12395. +
  12396. +# define pi_lock(lock) raw_spin_lock_irq(lock)
  12397. +# define pi_unlock(lock) raw_spin_unlock_irq(lock)
  12398. +
  12399. +static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  12400. + struct rt_mutex_waiter *waiter,
  12401. + struct task_struct *task,
  12402. + enum rtmutex_chainwalk chwalk);
  12403. +/*
  12404. + * Slow path lock function spin_lock style: this variant is very
  12405. + * careful not to miss any non-lock wakeups.
  12406. + *
  12407. + * We store the current state under p->pi_lock in p->saved_state and
  12408. + * the try_to_wake_up() code handles this accordingly.
  12409. + */
  12410. +static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock)
  12411. +{
  12412. + struct task_struct *lock_owner, *self = current;
  12413. + struct rt_mutex_waiter waiter, *top_waiter;
  12414. + int ret;
  12415. +
  12416. + rt_mutex_init_waiter(&waiter, true);
  12417. +
  12418. + raw_spin_lock(&lock->wait_lock);
  12419. +
  12420. + if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
  12421. + raw_spin_unlock(&lock->wait_lock);
  12422. + return;
  12423. + }
  12424. +
  12425. + BUG_ON(rt_mutex_owner(lock) == self);
  12426. +
  12427. + /*
  12428. + * We save whatever state the task is in and we'll restore it
  12429. + * after acquiring the lock taking real wakeups into account
  12430. + * as well. We are serialized via pi_lock against wakeups. See
  12431. + * try_to_wake_up().
  12432. + */
  12433. + pi_lock(&self->pi_lock);
  12434. + self->saved_state = self->state;
  12435. + __set_current_state(TASK_UNINTERRUPTIBLE);
  12436. + pi_unlock(&self->pi_lock);
  12437. +
  12438. + ret = task_blocks_on_rt_mutex(lock, &waiter, self, 0);
  12439. + BUG_ON(ret);
  12440. +
  12441. + for (;;) {
  12442. + /* Try to acquire the lock again. */
  12443. + if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
  12444. + break;
  12445. +
  12446. + top_waiter = rt_mutex_top_waiter(lock);
  12447. + lock_owner = rt_mutex_owner(lock);
  12448. +
  12449. + raw_spin_unlock(&lock->wait_lock);
  12450. +
  12451. + debug_rt_mutex_print_deadlock(&waiter);
  12452. +
  12453. + if (top_waiter != &waiter || adaptive_wait(lock, lock_owner))
  12454. + schedule_rt_mutex(lock);
  12455. +
  12456. + raw_spin_lock(&lock->wait_lock);
  12457. +
  12458. + pi_lock(&self->pi_lock);
  12459. + __set_current_state(TASK_UNINTERRUPTIBLE);
  12460. + pi_unlock(&self->pi_lock);
  12461. + }
  12462. +
  12463. + /*
  12464. + * Restore the task state to current->saved_state. We set it
  12465. + * to the original state above and the try_to_wake_up() code
  12466. + * has possibly updated it when a real (non-rtmutex) wakeup
  12467. + * happened while we were blocked. Clear saved_state so
  12468. + * try_to_wakeup() does not get confused.
  12469. + */
  12470. + pi_lock(&self->pi_lock);
  12471. + __set_current_state(self->saved_state);
  12472. + self->saved_state = TASK_RUNNING;
  12473. + pi_unlock(&self->pi_lock);
  12474. +
  12475. + /*
  12476. + * try_to_take_rt_mutex() sets the waiter bit
  12477. + * unconditionally. We might have to fix that up:
  12478. + */
  12479. + fixup_rt_mutex_waiters(lock);
  12480. +
  12481. + BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
  12482. + BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
  12483. +
  12484. + raw_spin_unlock(&lock->wait_lock);
  12485. +
  12486. + debug_rt_mutex_free_waiter(&waiter);
  12487. +}
  12488. +
  12489. +static void wakeup_next_waiter(struct rt_mutex *lock);
  12490. +/*
  12491. + * Slow path to release a rt_mutex spin_lock style
  12492. + */
  12493. +static void __sched __rt_spin_lock_slowunlock(struct rt_mutex *lock)
  12494. +{
  12495. + debug_rt_mutex_unlock(lock);
  12496. +
  12497. + rt_mutex_deadlock_account_unlock(current);
  12498. +
  12499. + if (!rt_mutex_has_waiters(lock)) {
  12500. + lock->owner = NULL;
  12501. + raw_spin_unlock(&lock->wait_lock);
  12502. + return;
  12503. + }
  12504. +
  12505. + wakeup_next_waiter(lock);
  12506. +
  12507. + raw_spin_unlock(&lock->wait_lock);
  12508. +
  12509. + /* Undo pi boosting.when necessary */
  12510. + rt_mutex_adjust_prio(current);
  12511. +}
  12512. +
  12513. +static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
  12514. +{
  12515. + raw_spin_lock(&lock->wait_lock);
  12516. + __rt_spin_lock_slowunlock(lock);
  12517. +}
  12518. +
  12519. +static void noinline __sched rt_spin_lock_slowunlock_hirq(struct rt_mutex *lock)
  12520. +{
  12521. + int ret;
  12522. +
  12523. + do {
  12524. + ret = raw_spin_trylock(&lock->wait_lock);
  12525. + } while (!ret);
  12526. +
  12527. + __rt_spin_lock_slowunlock(lock);
  12528. +}
  12529. +
  12530. +void __lockfunc rt_spin_lock(spinlock_t *lock)
  12531. +{
  12532. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
  12533. + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  12534. +}
  12535. +EXPORT_SYMBOL(rt_spin_lock);
  12536. +
  12537. +void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
  12538. +{
  12539. + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
  12540. +}
  12541. +EXPORT_SYMBOL(__rt_spin_lock);
  12542. +
  12543. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12544. +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
  12545. +{
  12546. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
  12547. + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
  12548. +}
  12549. +EXPORT_SYMBOL(rt_spin_lock_nested);
  12550. +#endif
  12551. +
  12552. +void __lockfunc rt_spin_unlock(spinlock_t *lock)
  12553. +{
  12554. + /* NOTE: we always pass in '1' for nested, for simplicity */
  12555. + spin_release(&lock->dep_map, 1, _RET_IP_);
  12556. + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
  12557. +}
  12558. +EXPORT_SYMBOL(rt_spin_unlock);
  12559. +
  12560. +void __lockfunc rt_spin_unlock_after_trylock_in_irq(spinlock_t *lock)
  12561. +{
  12562. + /* NOTE: we always pass in '1' for nested, for simplicity */
  12563. + spin_release(&lock->dep_map, 1, _RET_IP_);
  12564. + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock_hirq);
  12565. +}
  12566. +
  12567. +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
  12568. +{
  12569. + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
  12570. +}
  12571. +EXPORT_SYMBOL(__rt_spin_unlock);
  12572. +
  12573. +/*
  12574. + * Wait for the lock to get unlocked: instead of polling for an unlock
  12575. + * (like raw spinlocks do), we lock and unlock, to force the kernel to
  12576. + * schedule if there's contention:
  12577. + */
  12578. +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
  12579. +{
  12580. + spin_lock(lock);
  12581. + spin_unlock(lock);
  12582. +}
  12583. +EXPORT_SYMBOL(rt_spin_unlock_wait);
  12584. +
  12585. +int __lockfunc __rt_spin_trylock(struct rt_mutex *lock)
  12586. +{
  12587. + return rt_mutex_trylock(lock);
  12588. +}
  12589. +
  12590. +int __lockfunc rt_spin_trylock(spinlock_t *lock)
  12591. +{
  12592. + int ret = rt_mutex_trylock(&lock->lock);
  12593. +
  12594. + if (ret)
  12595. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  12596. + return ret;
  12597. +}
  12598. +EXPORT_SYMBOL(rt_spin_trylock);
  12599. +
  12600. +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
  12601. +{
  12602. + int ret;
  12603. +
  12604. + local_bh_disable();
  12605. + ret = rt_mutex_trylock(&lock->lock);
  12606. + if (ret) {
  12607. + migrate_disable();
  12608. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  12609. + } else
  12610. + local_bh_enable();
  12611. + return ret;
  12612. +}
  12613. +EXPORT_SYMBOL(rt_spin_trylock_bh);
  12614. +
  12615. +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
  12616. +{
  12617. + int ret;
  12618. +
  12619. + *flags = 0;
  12620. + ret = rt_mutex_trylock(&lock->lock);
  12621. + if (ret) {
  12622. + migrate_disable();
  12623. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  12624. + }
  12625. + return ret;
  12626. +}
  12627. +EXPORT_SYMBOL(rt_spin_trylock_irqsave);
  12628. +
  12629. +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
  12630. +{
  12631. + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
  12632. + if (atomic_add_unless(atomic, -1, 1))
  12633. + return 0;
  12634. + migrate_disable();
  12635. + rt_spin_lock(lock);
  12636. + if (atomic_dec_and_test(atomic))
  12637. + return 1;
  12638. + rt_spin_unlock(lock);
  12639. + migrate_enable();
  12640. + return 0;
  12641. +}
  12642. +EXPORT_SYMBOL(atomic_dec_and_spin_lock);
  12643. +
  12644. + void
  12645. +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
  12646. +{
  12647. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12648. + /*
  12649. + * Make sure we are not reinitializing a held lock:
  12650. + */
  12651. + debug_check_no_locks_freed((void *)lock, sizeof(*lock));
  12652. + lockdep_init_map(&lock->dep_map, name, key, 0);
  12653. +#endif
  12654. +}
  12655. +EXPORT_SYMBOL(__rt_spin_lock_init);
  12656. +
  12657. +#endif /* PREEMPT_RT_FULL */
  12658. +
  12659. +#ifdef CONFIG_PREEMPT_RT_FULL
  12660. + static inline int __sched
  12661. +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
  12662. +{
  12663. + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
  12664. + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
  12665. +
  12666. + if (!hold_ctx)
  12667. + return 0;
  12668. +
  12669. + if (unlikely(ctx == hold_ctx))
  12670. + return -EALREADY;
  12671. +
  12672. + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
  12673. + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
  12674. +#ifdef CONFIG_DEBUG_MUTEXES
  12675. + DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
  12676. + ctx->contending_lock = ww;
  12677. +#endif
  12678. + return -EDEADLK;
  12679. + }
  12680. +
  12681. + return 0;
  12682. +}
  12683. +#else
  12684. + static inline int __sched
  12685. +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
  12686. +{
  12687. + BUG();
  12688. + return 0;
  12689. +}
  12690. +
  12691. +#endif
  12692. +
  12693. +static inline int
  12694. +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  12695. + struct rt_mutex_waiter *waiter)
  12696. +{
  12697. + return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
  12698. +}
  12699. +
  12700. /*
  12701. * Task blocks on lock.
  12702. *
  12703. @@ -894,6 +1299,23 @@
  12704. return -EDEADLK;
  12705. raw_spin_lock_irqsave(&task->pi_lock, flags);
  12706. +
  12707. + /*
  12708. + * In the case of futex requeue PI, this will be a proxy
  12709. + * lock. The task will wake unaware that it is enqueueed on
  12710. + * this lock. Avoid blocking on two locks and corrupting
  12711. + * pi_blocked_on via the PI_WAKEUP_INPROGRESS
  12712. + * flag. futex_wait_requeue_pi() sets this when it wakes up
  12713. + * before requeue (due to a signal or timeout). Do not enqueue
  12714. + * the task if PI_WAKEUP_INPROGRESS is set.
  12715. + */
  12716. + if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
  12717. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  12718. + return -EAGAIN;
  12719. + }
  12720. +
  12721. + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
  12722. +
  12723. __rt_mutex_adjust_prio(task);
  12724. waiter->task = task;
  12725. waiter->lock = lock;
  12726. @@ -917,7 +1339,7 @@
  12727. rt_mutex_enqueue_pi(owner, waiter);
  12728. __rt_mutex_adjust_prio(owner);
  12729. - if (owner->pi_blocked_on)
  12730. + if (rt_mutex_real_waiter(owner->pi_blocked_on))
  12731. chain_walk = 1;
  12732. } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
  12733. chain_walk = 1;
  12734. @@ -994,7 +1416,7 @@
  12735. * long as we hold lock->wait_lock. The waiter task needs to
  12736. * acquire it in order to dequeue the waiter.
  12737. */
  12738. - wake_up_process(waiter->task);
  12739. + rt_mutex_wake_waiter(waiter);
  12740. }
  12741. /*
  12742. @@ -1008,7 +1430,7 @@
  12743. {
  12744. bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
  12745. struct task_struct *owner = rt_mutex_owner(lock);
  12746. - struct rt_mutex *next_lock;
  12747. + struct rt_mutex *next_lock = NULL;
  12748. unsigned long flags;
  12749. raw_spin_lock_irqsave(&current->pi_lock, flags);
  12750. @@ -1033,7 +1455,8 @@
  12751. __rt_mutex_adjust_prio(owner);
  12752. /* Store the lock on which owner is blocked or NULL */
  12753. - next_lock = task_blocked_on_lock(owner);
  12754. + if (rt_mutex_real_waiter(owner->pi_blocked_on))
  12755. + next_lock = task_blocked_on_lock(owner);
  12756. raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
  12757. @@ -1069,17 +1492,17 @@
  12758. raw_spin_lock_irqsave(&task->pi_lock, flags);
  12759. waiter = task->pi_blocked_on;
  12760. - if (!waiter || (waiter->prio == task->prio &&
  12761. + if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio &&
  12762. !dl_prio(task->prio))) {
  12763. raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  12764. return;
  12765. }
  12766. next_lock = waiter->lock;
  12767. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  12768. /* gets dropped in rt_mutex_adjust_prio_chain()! */
  12769. get_task_struct(task);
  12770. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  12771. rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
  12772. next_lock, NULL, task);
  12773. }
  12774. @@ -1097,7 +1520,8 @@
  12775. static int __sched
  12776. __rt_mutex_slowlock(struct rt_mutex *lock, int state,
  12777. struct hrtimer_sleeper *timeout,
  12778. - struct rt_mutex_waiter *waiter)
  12779. + struct rt_mutex_waiter *waiter,
  12780. + struct ww_acquire_ctx *ww_ctx)
  12781. {
  12782. int ret = 0;
  12783. @@ -1120,6 +1544,12 @@
  12784. break;
  12785. }
  12786. + if (ww_ctx && ww_ctx->acquired > 0) {
  12787. + ret = __mutex_lock_check_stamp(lock, ww_ctx);
  12788. + if (ret)
  12789. + break;
  12790. + }
  12791. +
  12792. raw_spin_unlock(&lock->wait_lock);
  12793. debug_rt_mutex_print_deadlock(waiter);
  12794. @@ -1153,25 +1583,102 @@
  12795. }
  12796. }
  12797. +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
  12798. + struct ww_acquire_ctx *ww_ctx)
  12799. +{
  12800. +#ifdef CONFIG_DEBUG_MUTEXES
  12801. + /*
  12802. + * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
  12803. + * but released with a normal mutex_unlock in this call.
  12804. + *
  12805. + * This should never happen, always use ww_mutex_unlock.
  12806. + */
  12807. + DEBUG_LOCKS_WARN_ON(ww->ctx);
  12808. +
  12809. + /*
  12810. + * Not quite done after calling ww_acquire_done() ?
  12811. + */
  12812. + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
  12813. +
  12814. + if (ww_ctx->contending_lock) {
  12815. + /*
  12816. + * After -EDEADLK you tried to
  12817. + * acquire a different ww_mutex? Bad!
  12818. + */
  12819. + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
  12820. +
  12821. + /*
  12822. + * You called ww_mutex_lock after receiving -EDEADLK,
  12823. + * but 'forgot' to unlock everything else first?
  12824. + */
  12825. + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
  12826. + ww_ctx->contending_lock = NULL;
  12827. + }
  12828. +
  12829. + /*
  12830. + * Naughty, using a different class will lead to undefined behavior!
  12831. + */
  12832. + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
  12833. +#endif
  12834. + ww_ctx->acquired++;
  12835. +}
  12836. +
  12837. +#ifdef CONFIG_PREEMPT_RT_FULL
  12838. +static void ww_mutex_account_lock(struct rt_mutex *lock,
  12839. + struct ww_acquire_ctx *ww_ctx)
  12840. +{
  12841. + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
  12842. + struct rt_mutex_waiter *waiter, *n;
  12843. +
  12844. + /*
  12845. + * This branch gets optimized out for the common case,
  12846. + * and is only important for ww_mutex_lock.
  12847. + */
  12848. + ww_mutex_lock_acquired(ww, ww_ctx);
  12849. + ww->ctx = ww_ctx;
  12850. +
  12851. + /*
  12852. + * Give any possible sleeping processes the chance to wake up,
  12853. + * so they can recheck if they have to back off.
  12854. + */
  12855. + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters,
  12856. + tree_entry) {
  12857. + /* XXX debug rt mutex waiter wakeup */
  12858. +
  12859. + BUG_ON(waiter->lock != lock);
  12860. + rt_mutex_wake_waiter(waiter);
  12861. + }
  12862. +}
  12863. +
  12864. +#else
  12865. +
  12866. +static void ww_mutex_account_lock(struct rt_mutex *lock,
  12867. + struct ww_acquire_ctx *ww_ctx)
  12868. +{
  12869. + BUG();
  12870. +}
  12871. +#endif
  12872. +
  12873. /*
  12874. * Slow path lock function:
  12875. */
  12876. static int __sched
  12877. rt_mutex_slowlock(struct rt_mutex *lock, int state,
  12878. struct hrtimer_sleeper *timeout,
  12879. - enum rtmutex_chainwalk chwalk)
  12880. + enum rtmutex_chainwalk chwalk,
  12881. + struct ww_acquire_ctx *ww_ctx)
  12882. {
  12883. struct rt_mutex_waiter waiter;
  12884. int ret = 0;
  12885. - debug_rt_mutex_init_waiter(&waiter);
  12886. - RB_CLEAR_NODE(&waiter.pi_tree_entry);
  12887. - RB_CLEAR_NODE(&waiter.tree_entry);
  12888. + rt_mutex_init_waiter(&waiter, false);
  12889. raw_spin_lock(&lock->wait_lock);
  12890. /* Try to acquire the lock again: */
  12891. if (try_to_take_rt_mutex(lock, current, NULL)) {
  12892. + if (ww_ctx)
  12893. + ww_mutex_account_lock(lock, ww_ctx);
  12894. raw_spin_unlock(&lock->wait_lock);
  12895. return 0;
  12896. }
  12897. @@ -1188,14 +1695,23 @@
  12898. ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
  12899. if (likely(!ret))
  12900. - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
  12901. + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, ww_ctx);
  12902. + else if (ww_ctx) {
  12903. + /* ww_mutex received EDEADLK, let it become EALREADY */
  12904. + ret = __mutex_lock_check_stamp(lock, ww_ctx);
  12905. + BUG_ON(!ret);
  12906. + }
  12907. set_current_state(TASK_RUNNING);
  12908. if (unlikely(ret)) {
  12909. if (rt_mutex_has_waiters(lock))
  12910. remove_waiter(lock, &waiter);
  12911. - rt_mutex_handle_deadlock(ret, chwalk, &waiter);
  12912. + /* ww_mutex want to report EDEADLK/EALREADY, let them */
  12913. + if (!ww_ctx)
  12914. + rt_mutex_handle_deadlock(ret, chwalk, &waiter);
  12915. + } else if (ww_ctx) {
  12916. + ww_mutex_account_lock(lock, ww_ctx);
  12917. }
  12918. /*
  12919. @@ -1234,7 +1750,8 @@
  12920. * The mutex has currently no owner. Lock the wait lock and
  12921. * try to acquire the lock.
  12922. */
  12923. - raw_spin_lock(&lock->wait_lock);
  12924. + if (!raw_spin_trylock(&lock->wait_lock))
  12925. + return 0;
  12926. ret = try_to_take_rt_mutex(lock, current, NULL);
  12927. @@ -1320,31 +1837,36 @@
  12928. */
  12929. static inline int
  12930. rt_mutex_fastlock(struct rt_mutex *lock, int state,
  12931. + struct ww_acquire_ctx *ww_ctx,
  12932. int (*slowfn)(struct rt_mutex *lock, int state,
  12933. struct hrtimer_sleeper *timeout,
  12934. - enum rtmutex_chainwalk chwalk))
  12935. + enum rtmutex_chainwalk chwalk,
  12936. + struct ww_acquire_ctx *ww_ctx))
  12937. {
  12938. if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
  12939. rt_mutex_deadlock_account_lock(lock, current);
  12940. return 0;
  12941. } else
  12942. - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
  12943. + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK,
  12944. + ww_ctx);
  12945. }
  12946. static inline int
  12947. rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
  12948. struct hrtimer_sleeper *timeout,
  12949. enum rtmutex_chainwalk chwalk,
  12950. + struct ww_acquire_ctx *ww_ctx,
  12951. int (*slowfn)(struct rt_mutex *lock, int state,
  12952. struct hrtimer_sleeper *timeout,
  12953. - enum rtmutex_chainwalk chwalk))
  12954. + enum rtmutex_chainwalk chwalk,
  12955. + struct ww_acquire_ctx *ww_ctx))
  12956. {
  12957. if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
  12958. likely(rt_mutex_cmpxchg(lock, NULL, current))) {
  12959. rt_mutex_deadlock_account_lock(lock, current);
  12960. return 0;
  12961. } else
  12962. - return slowfn(lock, state, timeout, chwalk);
  12963. + return slowfn(lock, state, timeout, chwalk, ww_ctx);
  12964. }
  12965. static inline int
  12966. @@ -1377,7 +1899,7 @@
  12967. {
  12968. might_sleep();
  12969. - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
  12970. + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock);
  12971. }
  12972. EXPORT_SYMBOL_GPL(rt_mutex_lock);
  12973. @@ -1394,7 +1916,7 @@
  12974. {
  12975. might_sleep();
  12976. - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
  12977. + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock);
  12978. }
  12979. EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
  12980. @@ -1407,11 +1929,30 @@
  12981. might_sleep();
  12982. return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  12983. - RT_MUTEX_FULL_CHAINWALK,
  12984. + RT_MUTEX_FULL_CHAINWALK, NULL,
  12985. rt_mutex_slowlock);
  12986. }
  12987. /**
  12988. + * rt_mutex_lock_killable - lock a rt_mutex killable
  12989. + *
  12990. + * @lock: the rt_mutex to be locked
  12991. + * @detect_deadlock: deadlock detection on/off
  12992. + *
  12993. + * Returns:
  12994. + * 0 on success
  12995. + * -EINTR when interrupted by a signal
  12996. + * -EDEADLK when the lock would deadlock (when deadlock detection is on)
  12997. + */
  12998. +int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
  12999. +{
  13000. + might_sleep();
  13001. +
  13002. + return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock);
  13003. +}
  13004. +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
  13005. +
  13006. +/**
  13007. * rt_mutex_timed_lock - lock a rt_mutex interruptible
  13008. * the timeout structure is provided
  13009. * by the caller
  13010. @@ -1431,6 +1972,7 @@
  13011. return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  13012. RT_MUTEX_MIN_CHAINWALK,
  13013. + NULL,
  13014. rt_mutex_slowlock);
  13015. }
  13016. EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  13017. @@ -1489,13 +2031,12 @@
  13018. void __rt_mutex_init(struct rt_mutex *lock, const char *name)
  13019. {
  13020. lock->owner = NULL;
  13021. - raw_spin_lock_init(&lock->wait_lock);
  13022. lock->waiters = RB_ROOT;
  13023. lock->waiters_leftmost = NULL;
  13024. debug_rt_mutex_init(lock, name);
  13025. }
  13026. -EXPORT_SYMBOL_GPL(__rt_mutex_init);
  13027. +EXPORT_SYMBOL(__rt_mutex_init);
  13028. /**
  13029. * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
  13030. @@ -1510,7 +2051,7 @@
  13031. void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  13032. struct task_struct *proxy_owner)
  13033. {
  13034. - __rt_mutex_init(lock, NULL);
  13035. + rt_mutex_init(lock);
  13036. debug_rt_mutex_proxy_lock(lock, proxy_owner);
  13037. rt_mutex_set_owner(lock, proxy_owner);
  13038. rt_mutex_deadlock_account_lock(lock, proxy_owner);
  13039. @@ -1558,6 +2099,35 @@
  13040. return 1;
  13041. }
  13042. +#ifdef CONFIG_PREEMPT_RT_FULL
  13043. + /*
  13044. + * In PREEMPT_RT there's an added race.
  13045. + * If the task, that we are about to requeue, times out,
  13046. + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
  13047. + * to skip this task. But right after the task sets
  13048. + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
  13049. + * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
  13050. + * This will replace the PI_WAKEUP_INPROGRESS with the actual
  13051. + * lock that it blocks on. We *must not* place this task
  13052. + * on this proxy lock in that case.
  13053. + *
  13054. + * To prevent this race, we first take the task's pi_lock
  13055. + * and check if it has updated its pi_blocked_on. If it has,
  13056. + * we assume that it woke up and we return -EAGAIN.
  13057. + * Otherwise, we set the task's pi_blocked_on to
  13058. + * PI_REQUEUE_INPROGRESS, so that if the task is waking up
  13059. + * it will know that we are in the process of requeuing it.
  13060. + */
  13061. + raw_spin_lock_irq(&task->pi_lock);
  13062. + if (task->pi_blocked_on) {
  13063. + raw_spin_unlock_irq(&task->pi_lock);
  13064. + raw_spin_unlock(&lock->wait_lock);
  13065. + return -EAGAIN;
  13066. + }
  13067. + task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
  13068. + raw_spin_unlock_irq(&task->pi_lock);
  13069. +#endif
  13070. +
  13071. /* We enforce deadlock detection for futexes */
  13072. ret = task_blocks_on_rt_mutex(lock, waiter, task,
  13073. RT_MUTEX_FULL_CHAINWALK);
  13074. @@ -1627,7 +2197,7 @@
  13075. set_current_state(TASK_INTERRUPTIBLE);
  13076. - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
  13077. + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
  13078. set_current_state(TASK_RUNNING);
  13079. @@ -1644,3 +2214,89 @@
  13080. return ret;
  13081. }
  13082. +
  13083. +static inline int
  13084. +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
  13085. +{
  13086. +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
  13087. + unsigned tmp;
  13088. +
  13089. + if (ctx->deadlock_inject_countdown-- == 0) {
  13090. + tmp = ctx->deadlock_inject_interval;
  13091. + if (tmp > UINT_MAX/4)
  13092. + tmp = UINT_MAX;
  13093. + else
  13094. + tmp = tmp*2 + tmp + tmp/2;
  13095. +
  13096. + ctx->deadlock_inject_interval = tmp;
  13097. + ctx->deadlock_inject_countdown = tmp;
  13098. + ctx->contending_lock = lock;
  13099. +
  13100. + ww_mutex_unlock(lock);
  13101. +
  13102. + return -EDEADLK;
  13103. + }
  13104. +#endif
  13105. +
  13106. + return 0;
  13107. +}
  13108. +
  13109. +#ifdef CONFIG_PREEMPT_RT_FULL
  13110. +int __sched
  13111. +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
  13112. +{
  13113. + int ret;
  13114. +
  13115. + might_sleep();
  13116. +
  13117. + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
  13118. + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx);
  13119. + if (ret)
  13120. + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
  13121. + else if (!ret && ww_ctx->acquired > 1)
  13122. + return ww_mutex_deadlock_injection(lock, ww_ctx);
  13123. +
  13124. + return ret;
  13125. +}
  13126. +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
  13127. +
  13128. +int __sched
  13129. +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
  13130. +{
  13131. + int ret;
  13132. +
  13133. + might_sleep();
  13134. +
  13135. + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
  13136. + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx);
  13137. + if (ret)
  13138. + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
  13139. + else if (!ret && ww_ctx->acquired > 1)
  13140. + return ww_mutex_deadlock_injection(lock, ww_ctx);
  13141. +
  13142. + return ret;
  13143. +}
  13144. +EXPORT_SYMBOL_GPL(__ww_mutex_lock);
  13145. +
  13146. +void __sched ww_mutex_unlock(struct ww_mutex *lock)
  13147. +{
  13148. + int nest = !!lock->ctx;
  13149. +
  13150. + /*
  13151. + * The unlocking fastpath is the 0->1 transition from 'locked'
  13152. + * into 'unlocked' state:
  13153. + */
  13154. + if (nest) {
  13155. +#ifdef CONFIG_DEBUG_MUTEXES
  13156. + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
  13157. +#endif
  13158. + if (lock->ctx->acquired > 0)
  13159. + lock->ctx->acquired--;
  13160. + lock->ctx = NULL;
  13161. + }
  13162. +
  13163. + mutex_release(&lock->base.dep_map, nest, _RET_IP_);
  13164. + rt_mutex_unlock(&lock->base.lock);
  13165. +}
  13166. +EXPORT_SYMBOL(ww_mutex_unlock);
  13167. +#endif
  13168. diff -Nur linux-3.18.10.orig/kernel/locking/rtmutex_common.h linux-3.18.10/kernel/locking/rtmutex_common.h
  13169. --- linux-3.18.10.orig/kernel/locking/rtmutex_common.h 2015-03-24 02:05:12.000000000 +0100
  13170. +++ linux-3.18.10/kernel/locking/rtmutex_common.h 2015-03-26 12:42:18.675588336 +0100
  13171. @@ -49,6 +49,7 @@
  13172. struct rb_node pi_tree_entry;
  13173. struct task_struct *task;
  13174. struct rt_mutex *lock;
  13175. + bool savestate;
  13176. #ifdef CONFIG_DEBUG_RT_MUTEXES
  13177. unsigned long ip;
  13178. struct pid *deadlock_task_pid;
  13179. @@ -119,6 +120,9 @@
  13180. /*
  13181. * PI-futex support (proxy locking functions, etc.):
  13182. */
  13183. +#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
  13184. +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
  13185. +
  13186. extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
  13187. extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  13188. struct task_struct *proxy_owner);
  13189. @@ -138,4 +142,14 @@
  13190. # include "rtmutex.h"
  13191. #endif
  13192. +static inline void
  13193. +rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
  13194. +{
  13195. + debug_rt_mutex_init_waiter(waiter);
  13196. + waiter->task = NULL;
  13197. + waiter->savestate = savestate;
  13198. + RB_CLEAR_NODE(&waiter->pi_tree_entry);
  13199. + RB_CLEAR_NODE(&waiter->tree_entry);
  13200. +}
  13201. +
  13202. #endif
  13203. diff -Nur linux-3.18.10.orig/kernel/locking/spinlock.c linux-3.18.10/kernel/locking/spinlock.c
  13204. --- linux-3.18.10.orig/kernel/locking/spinlock.c 2015-03-24 02:05:12.000000000 +0100
  13205. +++ linux-3.18.10/kernel/locking/spinlock.c 2015-03-26 12:42:18.675588336 +0100
  13206. @@ -124,8 +124,11 @@
  13207. * __[spin|read|write]_lock_bh()
  13208. */
  13209. BUILD_LOCK_OPS(spin, raw_spinlock);
  13210. +
  13211. +#ifndef CONFIG_PREEMPT_RT_FULL
  13212. BUILD_LOCK_OPS(read, rwlock);
  13213. BUILD_LOCK_OPS(write, rwlock);
  13214. +#endif
  13215. #endif
  13216. @@ -209,6 +212,8 @@
  13217. EXPORT_SYMBOL(_raw_spin_unlock_bh);
  13218. #endif
  13219. +#ifndef CONFIG_PREEMPT_RT_FULL
  13220. +
  13221. #ifndef CONFIG_INLINE_READ_TRYLOCK
  13222. int __lockfunc _raw_read_trylock(rwlock_t *lock)
  13223. {
  13224. @@ -353,6 +358,8 @@
  13225. EXPORT_SYMBOL(_raw_write_unlock_bh);
  13226. #endif
  13227. +#endif /* !PREEMPT_RT_FULL */
  13228. +
  13229. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  13230. void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
  13231. diff -Nur linux-3.18.10.orig/kernel/locking/spinlock_debug.c linux-3.18.10/kernel/locking/spinlock_debug.c
  13232. --- linux-3.18.10.orig/kernel/locking/spinlock_debug.c 2015-03-24 02:05:12.000000000 +0100
  13233. +++ linux-3.18.10/kernel/locking/spinlock_debug.c 2015-03-26 12:42:18.675588336 +0100
  13234. @@ -31,6 +31,7 @@
  13235. EXPORT_SYMBOL(__raw_spin_lock_init);
  13236. +#ifndef CONFIG_PREEMPT_RT_FULL
  13237. void __rwlock_init(rwlock_t *lock, const char *name,
  13238. struct lock_class_key *key)
  13239. {
  13240. @@ -48,6 +49,7 @@
  13241. }
  13242. EXPORT_SYMBOL(__rwlock_init);
  13243. +#endif
  13244. static void spin_dump(raw_spinlock_t *lock, const char *msg)
  13245. {
  13246. @@ -159,6 +161,7 @@
  13247. arch_spin_unlock(&lock->raw_lock);
  13248. }
  13249. +#ifndef CONFIG_PREEMPT_RT_FULL
  13250. static void rwlock_bug(rwlock_t *lock, const char *msg)
  13251. {
  13252. if (!debug_locks_off())
  13253. @@ -300,3 +303,5 @@
  13254. debug_write_unlock(lock);
  13255. arch_write_unlock(&lock->raw_lock);
  13256. }
  13257. +
  13258. +#endif
  13259. diff -Nur linux-3.18.10.orig/kernel/panic.c linux-3.18.10/kernel/panic.c
  13260. --- linux-3.18.10.orig/kernel/panic.c 2015-03-24 02:05:12.000000000 +0100
  13261. +++ linux-3.18.10/kernel/panic.c 2015-03-26 12:42:18.675588336 +0100
  13262. @@ -384,9 +384,11 @@
  13263. static int init_oops_id(void)
  13264. {
  13265. +#ifndef CONFIG_PREEMPT_RT_FULL
  13266. if (!oops_id)
  13267. get_random_bytes(&oops_id, sizeof(oops_id));
  13268. else
  13269. +#endif
  13270. oops_id++;
  13271. return 0;
  13272. diff -Nur linux-3.18.10.orig/kernel/power/hibernate.c linux-3.18.10/kernel/power/hibernate.c
  13273. --- linux-3.18.10.orig/kernel/power/hibernate.c 2015-03-24 02:05:12.000000000 +0100
  13274. +++ linux-3.18.10/kernel/power/hibernate.c 2015-03-26 12:42:18.675588336 +0100
  13275. @@ -287,6 +287,8 @@
  13276. local_irq_disable();
  13277. + system_state = SYSTEM_SUSPEND;
  13278. +
  13279. error = syscore_suspend();
  13280. if (error) {
  13281. printk(KERN_ERR "PM: Some system devices failed to power down, "
  13282. @@ -316,6 +318,7 @@
  13283. syscore_resume();
  13284. Enable_irqs:
  13285. + system_state = SYSTEM_RUNNING;
  13286. local_irq_enable();
  13287. Enable_cpus:
  13288. @@ -439,6 +442,7 @@
  13289. goto Enable_cpus;
  13290. local_irq_disable();
  13291. + system_state = SYSTEM_SUSPEND;
  13292. error = syscore_suspend();
  13293. if (error)
  13294. @@ -472,6 +476,7 @@
  13295. syscore_resume();
  13296. Enable_irqs:
  13297. + system_state = SYSTEM_RUNNING;
  13298. local_irq_enable();
  13299. Enable_cpus:
  13300. @@ -557,6 +562,7 @@
  13301. goto Platform_finish;
  13302. local_irq_disable();
  13303. + system_state = SYSTEM_SUSPEND;
  13304. syscore_suspend();
  13305. if (pm_wakeup_pending()) {
  13306. error = -EAGAIN;
  13307. @@ -569,6 +575,7 @@
  13308. Power_up:
  13309. syscore_resume();
  13310. + system_state = SYSTEM_RUNNING;
  13311. local_irq_enable();
  13312. enable_nonboot_cpus();
  13313. diff -Nur linux-3.18.10.orig/kernel/power/suspend.c linux-3.18.10/kernel/power/suspend.c
  13314. --- linux-3.18.10.orig/kernel/power/suspend.c 2015-03-24 02:05:12.000000000 +0100
  13315. +++ linux-3.18.10/kernel/power/suspend.c 2015-03-26 12:42:18.675588336 +0100
  13316. @@ -318,6 +318,8 @@
  13317. arch_suspend_disable_irqs();
  13318. BUG_ON(!irqs_disabled());
  13319. + system_state = SYSTEM_SUSPEND;
  13320. +
  13321. error = syscore_suspend();
  13322. if (!error) {
  13323. *wakeup = pm_wakeup_pending();
  13324. @@ -332,6 +334,8 @@
  13325. syscore_resume();
  13326. }
  13327. + system_state = SYSTEM_RUNNING;
  13328. +
  13329. arch_suspend_enable_irqs();
  13330. BUG_ON(irqs_disabled());
  13331. diff -Nur linux-3.18.10.orig/kernel/printk/printk.c linux-3.18.10/kernel/printk/printk.c
  13332. --- linux-3.18.10.orig/kernel/printk/printk.c 2015-03-24 02:05:12.000000000 +0100
  13333. +++ linux-3.18.10/kernel/printk/printk.c 2015-03-26 12:42:18.675588336 +0100
  13334. @@ -1165,6 +1165,7 @@
  13335. {
  13336. char *text;
  13337. int len = 0;
  13338. + int attempts = 0;
  13339. text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
  13340. if (!text)
  13341. @@ -1176,7 +1177,14 @@
  13342. u64 seq;
  13343. u32 idx;
  13344. enum log_flags prev;
  13345. -
  13346. + int num_msg;
  13347. +try_again:
  13348. + attempts++;
  13349. + if (attempts > 10) {
  13350. + len = -EBUSY;
  13351. + goto out;
  13352. + }
  13353. + num_msg = 0;
  13354. if (clear_seq < log_first_seq) {
  13355. /* messages are gone, move to first available one */
  13356. clear_seq = log_first_seq;
  13357. @@ -1197,6 +1205,14 @@
  13358. prev = msg->flags;
  13359. idx = log_next(idx);
  13360. seq++;
  13361. + num_msg++;
  13362. + if (num_msg > 5) {
  13363. + num_msg = 0;
  13364. + raw_spin_unlock_irq(&logbuf_lock);
  13365. + raw_spin_lock_irq(&logbuf_lock);
  13366. + if (clear_seq < log_first_seq)
  13367. + goto try_again;
  13368. + }
  13369. }
  13370. /* move first record forward until length fits into the buffer */
  13371. @@ -1210,6 +1226,14 @@
  13372. prev = msg->flags;
  13373. idx = log_next(idx);
  13374. seq++;
  13375. + num_msg++;
  13376. + if (num_msg > 5) {
  13377. + num_msg = 0;
  13378. + raw_spin_unlock_irq(&logbuf_lock);
  13379. + raw_spin_lock_irq(&logbuf_lock);
  13380. + if (clear_seq < log_first_seq)
  13381. + goto try_again;
  13382. + }
  13383. }
  13384. /* last message fitting into this dump */
  13385. @@ -1250,6 +1274,7 @@
  13386. clear_seq = log_next_seq;
  13387. clear_idx = log_next_idx;
  13388. }
  13389. +out:
  13390. raw_spin_unlock_irq(&logbuf_lock);
  13391. kfree(text);
  13392. @@ -1407,6 +1432,7 @@
  13393. if (!console_drivers)
  13394. return;
  13395. + migrate_disable();
  13396. for_each_console(con) {
  13397. if (exclusive_console && con != exclusive_console)
  13398. continue;
  13399. @@ -1419,6 +1445,7 @@
  13400. continue;
  13401. con->write(con, text, len);
  13402. }
  13403. + migrate_enable();
  13404. }
  13405. /*
  13406. @@ -1479,6 +1506,15 @@
  13407. static int console_trylock_for_printk(void)
  13408. {
  13409. unsigned int cpu = smp_processor_id();
  13410. +#ifdef CONFIG_PREEMPT_RT_FULL
  13411. + int lock = !early_boot_irqs_disabled && (preempt_count() == 0) &&
  13412. + !irqs_disabled();
  13413. +#else
  13414. + int lock = 1;
  13415. +#endif
  13416. +
  13417. + if (!lock)
  13418. + return 0;
  13419. if (!console_trylock())
  13420. return 0;
  13421. @@ -1613,6 +1649,62 @@
  13422. return textlen;
  13423. }
  13424. +#ifdef CONFIG_EARLY_PRINTK
  13425. +struct console *early_console;
  13426. +
  13427. +void early_vprintk(const char *fmt, va_list ap)
  13428. +{
  13429. + if (early_console) {
  13430. + char buf[512];
  13431. + int n = vscnprintf(buf, sizeof(buf), fmt, ap);
  13432. +
  13433. + early_console->write(early_console, buf, n);
  13434. + }
  13435. +}
  13436. +
  13437. +asmlinkage void early_printk(const char *fmt, ...)
  13438. +{
  13439. + va_list ap;
  13440. +
  13441. + va_start(ap, fmt);
  13442. + early_vprintk(fmt, ap);
  13443. + va_end(ap);
  13444. +}
  13445. +
  13446. +/*
  13447. + * This is independent of any log levels - a global
  13448. + * kill switch that turns off all of printk.
  13449. + *
  13450. + * Used by the NMI watchdog if early-printk is enabled.
  13451. + */
  13452. +static bool __read_mostly printk_killswitch;
  13453. +
  13454. +static int __init force_early_printk_setup(char *str)
  13455. +{
  13456. + printk_killswitch = true;
  13457. + return 0;
  13458. +}
  13459. +early_param("force_early_printk", force_early_printk_setup);
  13460. +
  13461. +void printk_kill(void)
  13462. +{
  13463. + printk_killswitch = true;
  13464. +}
  13465. +
  13466. +static int forced_early_printk(const char *fmt, va_list ap)
  13467. +{
  13468. + if (!printk_killswitch)
  13469. + return 0;
  13470. + early_vprintk(fmt, ap);
  13471. + return 1;
  13472. +}
  13473. +#else
  13474. +static inline int forced_early_printk(const char *fmt, va_list ap)
  13475. +{
  13476. + return 0;
  13477. +}
  13478. +#endif
  13479. +
  13480. asmlinkage int vprintk_emit(int facility, int level,
  13481. const char *dict, size_t dictlen,
  13482. const char *fmt, va_list args)
  13483. @@ -1629,6 +1721,13 @@
  13484. /* cpu currently holding logbuf_lock in this function */
  13485. static volatile unsigned int logbuf_cpu = UINT_MAX;
  13486. + /*
  13487. + * Fall back to early_printk if a debugging subsystem has
  13488. + * killed printk output
  13489. + */
  13490. + if (unlikely(forced_early_printk(fmt, args)))
  13491. + return 1;
  13492. +
  13493. if (level == SCHED_MESSAGE_LOGLEVEL) {
  13494. level = -1;
  13495. in_sched = true;
  13496. @@ -1769,8 +1868,7 @@
  13497. * console_sem which would prevent anyone from printing to
  13498. * console
  13499. */
  13500. - preempt_disable();
  13501. -
  13502. + migrate_disable();
  13503. /*
  13504. * Try to acquire and then immediately release the console
  13505. * semaphore. The release will print out buffers and wake up
  13506. @@ -1778,7 +1876,7 @@
  13507. */
  13508. if (console_trylock_for_printk())
  13509. console_unlock();
  13510. - preempt_enable();
  13511. + migrate_enable();
  13512. lockdep_on();
  13513. }
  13514. @@ -1878,29 +1976,6 @@
  13515. #endif /* CONFIG_PRINTK */
  13516. -#ifdef CONFIG_EARLY_PRINTK
  13517. -struct console *early_console;
  13518. -
  13519. -void early_vprintk(const char *fmt, va_list ap)
  13520. -{
  13521. - if (early_console) {
  13522. - char buf[512];
  13523. - int n = vscnprintf(buf, sizeof(buf), fmt, ap);
  13524. -
  13525. - early_console->write(early_console, buf, n);
  13526. - }
  13527. -}
  13528. -
  13529. -asmlinkage __visible void early_printk(const char *fmt, ...)
  13530. -{
  13531. - va_list ap;
  13532. -
  13533. - va_start(ap, fmt);
  13534. - early_vprintk(fmt, ap);
  13535. - va_end(ap);
  13536. -}
  13537. -#endif
  13538. -
  13539. static int __add_preferred_console(char *name, int idx, char *options,
  13540. char *brl_options)
  13541. {
  13542. @@ -2140,11 +2215,16 @@
  13543. goto out;
  13544. len = cont_print_text(text, size);
  13545. +#ifndef CONFIG_PREEMPT_RT_FULL
  13546. raw_spin_unlock(&logbuf_lock);
  13547. stop_critical_timings();
  13548. call_console_drivers(cont.level, text, len);
  13549. start_critical_timings();
  13550. local_irq_restore(flags);
  13551. +#else
  13552. + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  13553. + call_console_drivers(cont.level, text, len);
  13554. +#endif
  13555. return;
  13556. out:
  13557. raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  13558. @@ -2232,12 +2312,17 @@
  13559. console_idx = log_next(console_idx);
  13560. console_seq++;
  13561. console_prev = msg->flags;
  13562. +#ifdef CONFIG_PREEMPT_RT_FULL
  13563. + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  13564. + call_console_drivers(level, text, len);
  13565. +#else
  13566. raw_spin_unlock(&logbuf_lock);
  13567. stop_critical_timings(); /* don't trace print latency */
  13568. call_console_drivers(level, text, len);
  13569. start_critical_timings();
  13570. local_irq_restore(flags);
  13571. +#endif
  13572. }
  13573. console_locked = 0;
  13574. diff -Nur linux-3.18.10.orig/kernel/ptrace.c linux-3.18.10/kernel/ptrace.c
  13575. --- linux-3.18.10.orig/kernel/ptrace.c 2015-03-24 02:05:12.000000000 +0100
  13576. +++ linux-3.18.10/kernel/ptrace.c 2015-03-26 12:42:18.675588336 +0100
  13577. @@ -129,7 +129,12 @@
  13578. spin_lock_irq(&task->sighand->siglock);
  13579. if (task_is_traced(task) && !__fatal_signal_pending(task)) {
  13580. - task->state = __TASK_TRACED;
  13581. + raw_spin_lock_irq(&task->pi_lock);
  13582. + if (task->state & __TASK_TRACED)
  13583. + task->state = __TASK_TRACED;
  13584. + else
  13585. + task->saved_state = __TASK_TRACED;
  13586. + raw_spin_unlock_irq(&task->pi_lock);
  13587. ret = true;
  13588. }
  13589. spin_unlock_irq(&task->sighand->siglock);
  13590. diff -Nur linux-3.18.10.orig/kernel/rcu/tiny.c linux-3.18.10/kernel/rcu/tiny.c
  13591. --- linux-3.18.10.orig/kernel/rcu/tiny.c 2015-03-24 02:05:12.000000000 +0100
  13592. +++ linux-3.18.10/kernel/rcu/tiny.c 2015-03-26 12:42:18.675588336 +0100
  13593. @@ -370,6 +370,7 @@
  13594. }
  13595. EXPORT_SYMBOL_GPL(call_rcu_sched);
  13596. +#ifndef CONFIG_PREEMPT_RT_FULL
  13597. /*
  13598. * Post an RCU bottom-half callback to be invoked after any subsequent
  13599. * quiescent state.
  13600. @@ -379,6 +380,7 @@
  13601. __call_rcu(head, func, &rcu_bh_ctrlblk);
  13602. }
  13603. EXPORT_SYMBOL_GPL(call_rcu_bh);
  13604. +#endif
  13605. void rcu_init(void)
  13606. {
  13607. diff -Nur linux-3.18.10.orig/kernel/rcu/tree.c linux-3.18.10/kernel/rcu/tree.c
  13608. --- linux-3.18.10.orig/kernel/rcu/tree.c 2015-03-24 02:05:12.000000000 +0100
  13609. +++ linux-3.18.10/kernel/rcu/tree.c 2015-03-26 12:42:18.679588341 +0100
  13610. @@ -56,6 +56,11 @@
  13611. #include <linux/random.h>
  13612. #include <linux/ftrace_event.h>
  13613. #include <linux/suspend.h>
  13614. +#include <linux/delay.h>
  13615. +#include <linux/gfp.h>
  13616. +#include <linux/oom.h>
  13617. +#include <linux/smpboot.h>
  13618. +#include "../time/tick-internal.h"
  13619. #include "tree.h"
  13620. #include "rcu.h"
  13621. @@ -152,8 +157,6 @@
  13622. */
  13623. static int rcu_scheduler_fully_active __read_mostly;
  13624. -#ifdef CONFIG_RCU_BOOST
  13625. -
  13626. /*
  13627. * Control variables for per-CPU and per-rcu_node kthreads. These
  13628. * handle all flavors of RCU.
  13629. @@ -163,8 +166,6 @@
  13630. DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  13631. DEFINE_PER_CPU(char, rcu_cpu_has_work);
  13632. -#endif /* #ifdef CONFIG_RCU_BOOST */
  13633. -
  13634. static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
  13635. static void invoke_rcu_core(void);
  13636. static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
  13637. @@ -207,6 +208,19 @@
  13638. }
  13639. }
  13640. +#ifdef CONFIG_PREEMPT_RT_FULL
  13641. +static void rcu_preempt_qs(void);
  13642. +
  13643. +void rcu_bh_qs(void)
  13644. +{
  13645. + unsigned long flags;
  13646. +
  13647. + /* Callers to this function, rcu_preempt_qs(), must disable irqs. */
  13648. + local_irq_save(flags);
  13649. + rcu_preempt_qs();
  13650. + local_irq_restore(flags);
  13651. +}
  13652. +#else
  13653. void rcu_bh_qs(void)
  13654. {
  13655. if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) {
  13656. @@ -216,6 +230,7 @@
  13657. __this_cpu_write(rcu_bh_data.passed_quiesce, 1);
  13658. }
  13659. }
  13660. +#endif
  13661. static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
  13662. @@ -336,6 +351,7 @@
  13663. }
  13664. EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
  13665. +#ifndef CONFIG_PREEMPT_RT_FULL
  13666. /*
  13667. * Return the number of RCU BH batches processed thus far for debug & stats.
  13668. */
  13669. @@ -363,6 +379,13 @@
  13670. }
  13671. EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
  13672. +#else
  13673. +void rcu_force_quiescent_state(void)
  13674. +{
  13675. +}
  13676. +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
  13677. +#endif
  13678. +
  13679. /*
  13680. * Show the state of the grace-period kthreads.
  13681. */
  13682. @@ -1411,7 +1434,7 @@
  13683. !ACCESS_ONCE(rsp->gp_flags) ||
  13684. !rsp->gp_kthread)
  13685. return;
  13686. - wake_up(&rsp->gp_wq);
  13687. + swait_wake(&rsp->gp_wq);
  13688. }
  13689. /*
  13690. @@ -1793,7 +1816,7 @@
  13691. ACCESS_ONCE(rsp->gpnum),
  13692. TPS("reqwait"));
  13693. rsp->gp_state = RCU_GP_WAIT_GPS;
  13694. - wait_event_interruptible(rsp->gp_wq,
  13695. + swait_event_interruptible(rsp->gp_wq,
  13696. ACCESS_ONCE(rsp->gp_flags) &
  13697. RCU_GP_FLAG_INIT);
  13698. /* Locking provides needed memory barrier. */
  13699. @@ -1821,7 +1844,7 @@
  13700. ACCESS_ONCE(rsp->gpnum),
  13701. TPS("fqswait"));
  13702. rsp->gp_state = RCU_GP_WAIT_FQS;
  13703. - ret = wait_event_interruptible_timeout(rsp->gp_wq,
  13704. + ret = swait_event_interruptible_timeout(rsp->gp_wq,
  13705. ((gf = ACCESS_ONCE(rsp->gp_flags)) &
  13706. RCU_GP_FLAG_FQS) ||
  13707. (!ACCESS_ONCE(rnp->qsmask) &&
  13708. @@ -2565,16 +2588,14 @@
  13709. /*
  13710. * Do RCU core processing for the current CPU.
  13711. */
  13712. -static void rcu_process_callbacks(struct softirq_action *unused)
  13713. +static void rcu_process_callbacks(void)
  13714. {
  13715. struct rcu_state *rsp;
  13716. if (cpu_is_offline(smp_processor_id()))
  13717. return;
  13718. - trace_rcu_utilization(TPS("Start RCU core"));
  13719. for_each_rcu_flavor(rsp)
  13720. __rcu_process_callbacks(rsp);
  13721. - trace_rcu_utilization(TPS("End RCU core"));
  13722. }
  13723. /*
  13724. @@ -2588,18 +2609,105 @@
  13725. {
  13726. if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
  13727. return;
  13728. - if (likely(!rsp->boost)) {
  13729. - rcu_do_batch(rsp, rdp);
  13730. + rcu_do_batch(rsp, rdp);
  13731. +}
  13732. +
  13733. +static void rcu_wake_cond(struct task_struct *t, int status)
  13734. +{
  13735. + /*
  13736. + * If the thread is yielding, only wake it when this
  13737. + * is invoked from idle
  13738. + */
  13739. + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
  13740. + wake_up_process(t);
  13741. +}
  13742. +
  13743. +/*
  13744. + * Wake up this CPU's rcuc kthread to do RCU core processing.
  13745. + */
  13746. +static void invoke_rcu_core(void)
  13747. +{
  13748. + unsigned long flags;
  13749. + struct task_struct *t;
  13750. +
  13751. + if (!cpu_online(smp_processor_id()))
  13752. return;
  13753. + local_irq_save(flags);
  13754. + __this_cpu_write(rcu_cpu_has_work, 1);
  13755. + t = __this_cpu_read(rcu_cpu_kthread_task);
  13756. + if (t != NULL && current != t)
  13757. + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
  13758. + local_irq_restore(flags);
  13759. +}
  13760. +
  13761. +static void rcu_cpu_kthread_park(unsigned int cpu)
  13762. +{
  13763. + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  13764. +}
  13765. +
  13766. +static int rcu_cpu_kthread_should_run(unsigned int cpu)
  13767. +{
  13768. + return __this_cpu_read(rcu_cpu_has_work);
  13769. +}
  13770. +
  13771. +/*
  13772. + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  13773. + * RCU softirq used in flavors and configurations of RCU that do not
  13774. + * support RCU priority boosting.
  13775. + */
  13776. +static void rcu_cpu_kthread(unsigned int cpu)
  13777. +{
  13778. + unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);
  13779. + char work, *workp = &__get_cpu_var(rcu_cpu_has_work);
  13780. + int spincnt;
  13781. +
  13782. + for (spincnt = 0; spincnt < 10; spincnt++) {
  13783. + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
  13784. + local_bh_disable();
  13785. + *statusp = RCU_KTHREAD_RUNNING;
  13786. + this_cpu_inc(rcu_cpu_kthread_loops);
  13787. + local_irq_disable();
  13788. + work = *workp;
  13789. + *workp = 0;
  13790. + local_irq_enable();
  13791. + if (work)
  13792. + rcu_process_callbacks();
  13793. + local_bh_enable();
  13794. + if (*workp == 0) {
  13795. + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
  13796. + *statusp = RCU_KTHREAD_WAITING;
  13797. + return;
  13798. + }
  13799. }
  13800. - invoke_rcu_callbacks_kthread();
  13801. + *statusp = RCU_KTHREAD_YIELDING;
  13802. + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
  13803. + schedule_timeout_interruptible(2);
  13804. + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
  13805. + *statusp = RCU_KTHREAD_WAITING;
  13806. }
  13807. -static void invoke_rcu_core(void)
  13808. +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  13809. + .store = &rcu_cpu_kthread_task,
  13810. + .thread_should_run = rcu_cpu_kthread_should_run,
  13811. + .thread_fn = rcu_cpu_kthread,
  13812. + .thread_comm = "rcuc/%u",
  13813. + .setup = rcu_cpu_kthread_setup,
  13814. + .park = rcu_cpu_kthread_park,
  13815. +};
  13816. +
  13817. +/*
  13818. + * Spawn per-CPU RCU core processing kthreads.
  13819. + */
  13820. +static int __init rcu_spawn_core_kthreads(void)
  13821. {
  13822. - if (cpu_online(smp_processor_id()))
  13823. - raise_softirq(RCU_SOFTIRQ);
  13824. + int cpu;
  13825. +
  13826. + for_each_possible_cpu(cpu)
  13827. + per_cpu(rcu_cpu_has_work, cpu) = 0;
  13828. + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  13829. + return 0;
  13830. }
  13831. +early_initcall(rcu_spawn_core_kthreads);
  13832. /*
  13833. * Handle any core-RCU processing required by a call_rcu() invocation.
  13834. @@ -2734,6 +2842,7 @@
  13835. }
  13836. EXPORT_SYMBOL_GPL(call_rcu_sched);
  13837. +#ifndef CONFIG_PREEMPT_RT_FULL
  13838. /*
  13839. * Queue an RCU callback for invocation after a quicker grace period.
  13840. */
  13841. @@ -2742,6 +2851,7 @@
  13842. __call_rcu(head, func, &rcu_bh_state, -1, 0);
  13843. }
  13844. EXPORT_SYMBOL_GPL(call_rcu_bh);
  13845. +#endif
  13846. /*
  13847. * Queue an RCU callback for lazy invocation after a grace period.
  13848. @@ -2833,6 +2943,7 @@
  13849. }
  13850. EXPORT_SYMBOL_GPL(synchronize_sched);
  13851. +#ifndef CONFIG_PREEMPT_RT_FULL
  13852. /**
  13853. * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
  13854. *
  13855. @@ -2859,6 +2970,7 @@
  13856. wait_rcu_gp(call_rcu_bh);
  13857. }
  13858. EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
  13859. +#endif
  13860. /**
  13861. * get_state_synchronize_rcu - Snapshot current RCU state
  13862. @@ -3341,6 +3453,7 @@
  13863. mutex_unlock(&rsp->barrier_mutex);
  13864. }
  13865. +#ifndef CONFIG_PREEMPT_RT_FULL
  13866. /**
  13867. * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
  13868. */
  13869. @@ -3349,6 +3462,7 @@
  13870. _rcu_barrier(&rcu_bh_state);
  13871. }
  13872. EXPORT_SYMBOL_GPL(rcu_barrier_bh);
  13873. +#endif
  13874. /**
  13875. * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
  13876. @@ -3658,7 +3772,7 @@
  13877. }
  13878. rsp->rda = rda;
  13879. - init_waitqueue_head(&rsp->gp_wq);
  13880. + init_swait_head(&rsp->gp_wq);
  13881. rnp = rsp->level[rcu_num_lvls - 1];
  13882. for_each_possible_cpu(i) {
  13883. while (i > rnp->grphi)
  13884. @@ -3755,7 +3869,6 @@
  13885. rcu_init_one(&rcu_bh_state, &rcu_bh_data);
  13886. rcu_init_one(&rcu_sched_state, &rcu_sched_data);
  13887. __rcu_init_preempt();
  13888. - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
  13889. /*
  13890. * We don't need protection against CPU-hotplug here because
  13891. diff -Nur linux-3.18.10.orig/kernel/rcu/tree.h linux-3.18.10/kernel/rcu/tree.h
  13892. --- linux-3.18.10.orig/kernel/rcu/tree.h 2015-03-24 02:05:12.000000000 +0100
  13893. +++ linux-3.18.10/kernel/rcu/tree.h 2015-03-26 12:42:18.679588341 +0100
  13894. @@ -28,6 +28,7 @@
  13895. #include <linux/cpumask.h>
  13896. #include <linux/seqlock.h>
  13897. #include <linux/irq_work.h>
  13898. +#include <linux/wait-simple.h>
  13899. /*
  13900. * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
  13901. @@ -172,11 +173,6 @@
  13902. /* queued on this rcu_node structure that */
  13903. /* are blocking the current grace period, */
  13904. /* there can be no such task. */
  13905. - struct completion boost_completion;
  13906. - /* Used to ensure that the rt_mutex used */
  13907. - /* to carry out the boosting is fully */
  13908. - /* released with no future boostee accesses */
  13909. - /* before that rt_mutex is re-initialized. */
  13910. struct rt_mutex boost_mtx;
  13911. /* Used only for the priority-boosting */
  13912. /* side effect, not as a lock. */
  13913. @@ -208,7 +204,7 @@
  13914. /* This can happen due to race conditions. */
  13915. #endif /* #ifdef CONFIG_RCU_BOOST */
  13916. #ifdef CONFIG_RCU_NOCB_CPU
  13917. - wait_queue_head_t nocb_gp_wq[2];
  13918. + struct swait_head nocb_gp_wq[2];
  13919. /* Place for rcu_nocb_kthread() to wait GP. */
  13920. #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
  13921. int need_future_gp[2];
  13922. @@ -348,7 +344,7 @@
  13923. atomic_long_t nocb_follower_count_lazy; /* (approximate). */
  13924. int nocb_p_count; /* # CBs being invoked by kthread */
  13925. int nocb_p_count_lazy; /* (approximate). */
  13926. - wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
  13927. + struct swait_head nocb_wq; /* For nocb kthreads to sleep on. */
  13928. struct task_struct *nocb_kthread;
  13929. int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
  13930. @@ -439,7 +435,7 @@
  13931. unsigned long gpnum; /* Current gp number. */
  13932. unsigned long completed; /* # of last completed gp. */
  13933. struct task_struct *gp_kthread; /* Task for grace periods. */
  13934. - wait_queue_head_t gp_wq; /* Where GP task waits. */
  13935. + struct swait_head gp_wq; /* Where GP task waits. */
  13936. short gp_flags; /* Commands for GP task. */
  13937. short gp_state; /* GP kthread sleep state. */
  13938. @@ -570,10 +566,9 @@
  13939. static void __init __rcu_init_preempt(void);
  13940. static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
  13941. static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
  13942. -static void invoke_rcu_callbacks_kthread(void);
  13943. static bool rcu_is_callbacks_kthread(void);
  13944. +static void rcu_cpu_kthread_setup(unsigned int cpu);
  13945. #ifdef CONFIG_RCU_BOOST
  13946. -static void rcu_preempt_do_callbacks(void);
  13947. static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
  13948. struct rcu_node *rnp);
  13949. #endif /* #ifdef CONFIG_RCU_BOOST */
  13950. diff -Nur linux-3.18.10.orig/kernel/rcu/tree_plugin.h linux-3.18.10/kernel/rcu/tree_plugin.h
  13951. --- linux-3.18.10.orig/kernel/rcu/tree_plugin.h 2015-03-24 02:05:12.000000000 +0100
  13952. +++ linux-3.18.10/kernel/rcu/tree_plugin.h 2015-03-26 12:42:18.679588341 +0100
  13953. @@ -24,12 +24,6 @@
  13954. * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  13955. */
  13956. -#include <linux/delay.h>
  13957. -#include <linux/gfp.h>
  13958. -#include <linux/oom.h>
  13959. -#include <linux/smpboot.h>
  13960. -#include "../time/tick-internal.h"
  13961. -
  13962. #define RCU_KTHREAD_PRIO 1
  13963. #ifdef CONFIG_RCU_BOOST
  13964. @@ -335,7 +329,7 @@
  13965. }
  13966. /* Hardware IRQ handlers cannot block, complain if they get here. */
  13967. - if (WARN_ON_ONCE(in_irq() || in_serving_softirq())) {
  13968. + if (WARN_ON_ONCE(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET))) {
  13969. local_irq_restore(flags);
  13970. return;
  13971. }
  13972. @@ -398,10 +392,8 @@
  13973. #ifdef CONFIG_RCU_BOOST
  13974. /* Unboost if we were boosted. */
  13975. - if (drop_boost_mutex) {
  13976. + if (drop_boost_mutex)
  13977. rt_mutex_unlock(&rnp->boost_mtx);
  13978. - complete(&rnp->boost_completion);
  13979. - }
  13980. #endif /* #ifdef CONFIG_RCU_BOOST */
  13981. /*
  13982. @@ -635,15 +627,6 @@
  13983. t->rcu_read_unlock_special.b.need_qs = true;
  13984. }
  13985. -#ifdef CONFIG_RCU_BOOST
  13986. -
  13987. -static void rcu_preempt_do_callbacks(void)
  13988. -{
  13989. - rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
  13990. -}
  13991. -
  13992. -#endif /* #ifdef CONFIG_RCU_BOOST */
  13993. -
  13994. /*
  13995. * Queue a preemptible-RCU callback for invocation after a grace period.
  13996. */
  13997. @@ -1072,6 +1055,19 @@
  13998. #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
  13999. +/*
  14000. + * If boosting, set rcuc kthreads to realtime priority.
  14001. + */
  14002. +static void rcu_cpu_kthread_setup(unsigned int cpu)
  14003. +{
  14004. +#ifdef CONFIG_RCU_BOOST
  14005. + struct sched_param sp;
  14006. +
  14007. + sp.sched_priority = RCU_KTHREAD_PRIO;
  14008. + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  14009. +#endif /* #ifdef CONFIG_RCU_BOOST */
  14010. +}
  14011. +
  14012. #ifdef CONFIG_RCU_BOOST
  14013. #include "../locking/rtmutex_common.h"
  14014. @@ -1103,16 +1099,6 @@
  14015. #endif /* #else #ifdef CONFIG_RCU_TRACE */
  14016. -static void rcu_wake_cond(struct task_struct *t, int status)
  14017. -{
  14018. - /*
  14019. - * If the thread is yielding, only wake it when this
  14020. - * is invoked from idle
  14021. - */
  14022. - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
  14023. - wake_up_process(t);
  14024. -}
  14025. -
  14026. /*
  14027. * Carry out RCU priority boosting on the task indicated by ->exp_tasks
  14028. * or ->boost_tasks, advancing the pointer to the next task in the
  14029. @@ -1175,15 +1161,11 @@
  14030. */
  14031. t = container_of(tb, struct task_struct, rcu_node_entry);
  14032. rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
  14033. - init_completion(&rnp->boost_completion);
  14034. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  14035. /* Lock only for side effect: boosts task t's priority. */
  14036. rt_mutex_lock(&rnp->boost_mtx);
  14037. rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */
  14038. - /* Wait for boostee to be done w/boost_mtx before reinitializing. */
  14039. - wait_for_completion(&rnp->boost_completion);
  14040. -
  14041. return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
  14042. ACCESS_ONCE(rnp->boost_tasks) != NULL;
  14043. }
  14044. @@ -1261,23 +1243,6 @@
  14045. }
  14046. /*
  14047. - * Wake up the per-CPU kthread to invoke RCU callbacks.
  14048. - */
  14049. -static void invoke_rcu_callbacks_kthread(void)
  14050. -{
  14051. - unsigned long flags;
  14052. -
  14053. - local_irq_save(flags);
  14054. - __this_cpu_write(rcu_cpu_has_work, 1);
  14055. - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
  14056. - current != __this_cpu_read(rcu_cpu_kthread_task)) {
  14057. - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
  14058. - __this_cpu_read(rcu_cpu_kthread_status));
  14059. - }
  14060. - local_irq_restore(flags);
  14061. -}
  14062. -
  14063. -/*
  14064. * Is the current CPU running the RCU-callbacks kthread?
  14065. * Caller must have preemption disabled.
  14066. */
  14067. @@ -1332,67 +1297,6 @@
  14068. return 0;
  14069. }
  14070. -static void rcu_kthread_do_work(void)
  14071. -{
  14072. - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
  14073. - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
  14074. - rcu_preempt_do_callbacks();
  14075. -}
  14076. -
  14077. -static void rcu_cpu_kthread_setup(unsigned int cpu)
  14078. -{
  14079. - struct sched_param sp;
  14080. -
  14081. - sp.sched_priority = RCU_KTHREAD_PRIO;
  14082. - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  14083. -}
  14084. -
  14085. -static void rcu_cpu_kthread_park(unsigned int cpu)
  14086. -{
  14087. - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  14088. -}
  14089. -
  14090. -static int rcu_cpu_kthread_should_run(unsigned int cpu)
  14091. -{
  14092. - return __this_cpu_read(rcu_cpu_has_work);
  14093. -}
  14094. -
  14095. -/*
  14096. - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  14097. - * RCU softirq used in flavors and configurations of RCU that do not
  14098. - * support RCU priority boosting.
  14099. - */
  14100. -static void rcu_cpu_kthread(unsigned int cpu)
  14101. -{
  14102. - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
  14103. - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
  14104. - int spincnt;
  14105. -
  14106. - for (spincnt = 0; spincnt < 10; spincnt++) {
  14107. - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
  14108. - local_bh_disable();
  14109. - *statusp = RCU_KTHREAD_RUNNING;
  14110. - this_cpu_inc(rcu_cpu_kthread_loops);
  14111. - local_irq_disable();
  14112. - work = *workp;
  14113. - *workp = 0;
  14114. - local_irq_enable();
  14115. - if (work)
  14116. - rcu_kthread_do_work();
  14117. - local_bh_enable();
  14118. - if (*workp == 0) {
  14119. - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
  14120. - *statusp = RCU_KTHREAD_WAITING;
  14121. - return;
  14122. - }
  14123. - }
  14124. - *statusp = RCU_KTHREAD_YIELDING;
  14125. - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
  14126. - schedule_timeout_interruptible(2);
  14127. - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
  14128. - *statusp = RCU_KTHREAD_WAITING;
  14129. -}
  14130. -
  14131. /*
  14132. * Set the per-rcu_node kthread's affinity to cover all CPUs that are
  14133. * served by the rcu_node in question. The CPU hotplug lock is still
  14134. @@ -1426,26 +1330,13 @@
  14135. free_cpumask_var(cm);
  14136. }
  14137. -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  14138. - .store = &rcu_cpu_kthread_task,
  14139. - .thread_should_run = rcu_cpu_kthread_should_run,
  14140. - .thread_fn = rcu_cpu_kthread,
  14141. - .thread_comm = "rcuc/%u",
  14142. - .setup = rcu_cpu_kthread_setup,
  14143. - .park = rcu_cpu_kthread_park,
  14144. -};
  14145. -
  14146. /*
  14147. * Spawn boost kthreads -- called as soon as the scheduler is running.
  14148. */
  14149. static void __init rcu_spawn_boost_kthreads(void)
  14150. {
  14151. struct rcu_node *rnp;
  14152. - int cpu;
  14153. - for_each_possible_cpu(cpu)
  14154. - per_cpu(rcu_cpu_has_work, cpu) = 0;
  14155. - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  14156. rnp = rcu_get_root(rcu_state_p);
  14157. (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
  14158. if (NUM_RCU_NODES > 1) {
  14159. @@ -1472,11 +1363,6 @@
  14160. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  14161. }
  14162. -static void invoke_rcu_callbacks_kthread(void)
  14163. -{
  14164. - WARN_ON_ONCE(1);
  14165. -}
  14166. -
  14167. static bool rcu_is_callbacks_kthread(void)
  14168. {
  14169. return false;
  14170. @@ -1500,7 +1386,7 @@
  14171. #endif /* #else #ifdef CONFIG_RCU_BOOST */
  14172. -#if !defined(CONFIG_RCU_FAST_NO_HZ)
  14173. +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL)
  14174. /*
  14175. * Check to see if any future RCU-related work will need to be done
  14176. @@ -1518,7 +1404,9 @@
  14177. return rcu_cpu_has_callbacks(cpu, NULL);
  14178. }
  14179. #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
  14180. +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */
  14181. +#if !defined(CONFIG_RCU_FAST_NO_HZ)
  14182. /*
  14183. * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
  14184. * after it.
  14185. @@ -1615,6 +1503,8 @@
  14186. return cbs_ready;
  14187. }
  14188. +#ifndef CONFIG_PREEMPT_RT_FULL
  14189. +
  14190. /*
  14191. * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
  14192. * to invoke. If the CPU has callbacks, try to advance them. Tell the
  14193. @@ -1655,7 +1545,7 @@
  14194. return 0;
  14195. }
  14196. #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
  14197. -
  14198. +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */
  14199. /*
  14200. * Prepare a CPU for idle from an RCU perspective. The first major task
  14201. * is to sense whether nohz mode has been enabled or disabled via sysfs.
  14202. @@ -2001,7 +1891,7 @@
  14203. */
  14204. static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
  14205. {
  14206. - wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
  14207. + swait_wake_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
  14208. }
  14209. /*
  14210. @@ -2019,8 +1909,8 @@
  14211. static void rcu_init_one_nocb(struct rcu_node *rnp)
  14212. {
  14213. - init_waitqueue_head(&rnp->nocb_gp_wq[0]);
  14214. - init_waitqueue_head(&rnp->nocb_gp_wq[1]);
  14215. + init_swait_head(&rnp->nocb_gp_wq[0]);
  14216. + init_swait_head(&rnp->nocb_gp_wq[1]);
  14217. }
  14218. #ifndef CONFIG_RCU_NOCB_CPU_ALL
  14219. @@ -2045,7 +1935,7 @@
  14220. if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) {
  14221. /* Prior smp_mb__after_atomic() orders against prior enqueue. */
  14222. ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false;
  14223. - wake_up(&rdp_leader->nocb_wq);
  14224. + swait_wake(&rdp_leader->nocb_wq);
  14225. }
  14226. }
  14227. @@ -2238,7 +2128,7 @@
  14228. */
  14229. trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
  14230. for (;;) {
  14231. - wait_event_interruptible(
  14232. + swait_event_interruptible(
  14233. rnp->nocb_gp_wq[c & 0x1],
  14234. (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
  14235. if (likely(d))
  14236. @@ -2266,7 +2156,7 @@
  14237. /* Wait for callbacks to appear. */
  14238. if (!rcu_nocb_poll) {
  14239. trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
  14240. - wait_event_interruptible(my_rdp->nocb_wq,
  14241. + swait_event_interruptible(my_rdp->nocb_wq,
  14242. !ACCESS_ONCE(my_rdp->nocb_leader_sleep));
  14243. /* Memory barrier handled by smp_mb() calls below and repoll. */
  14244. } else if (firsttime) {
  14245. @@ -2347,7 +2237,7 @@
  14246. * List was empty, wake up the follower.
  14247. * Memory barriers supplied by atomic_long_add().
  14248. */
  14249. - wake_up(&rdp->nocb_wq);
  14250. + swait_wake(&rdp->nocb_wq);
  14251. }
  14252. }
  14253. @@ -2368,7 +2258,7 @@
  14254. if (!rcu_nocb_poll) {
  14255. trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
  14256. "FollowerSleep");
  14257. - wait_event_interruptible(rdp->nocb_wq,
  14258. + swait_event_interruptible(rdp->nocb_wq,
  14259. ACCESS_ONCE(rdp->nocb_follower_head));
  14260. } else if (firsttime) {
  14261. /* Don't drown trace log with "Poll"! */
  14262. @@ -2539,7 +2429,7 @@
  14263. static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
  14264. {
  14265. rdp->nocb_tail = &rdp->nocb_head;
  14266. - init_waitqueue_head(&rdp->nocb_wq);
  14267. + init_swait_head(&rdp->nocb_wq);
  14268. rdp->nocb_follower_tail = &rdp->nocb_follower_head;
  14269. }
  14270. diff -Nur linux-3.18.10.orig/kernel/rcu/update.c linux-3.18.10/kernel/rcu/update.c
  14271. --- linux-3.18.10.orig/kernel/rcu/update.c 2015-03-24 02:05:12.000000000 +0100
  14272. +++ linux-3.18.10/kernel/rcu/update.c 2015-03-26 12:42:18.679588341 +0100
  14273. @@ -170,6 +170,7 @@
  14274. }
  14275. EXPORT_SYMBOL_GPL(rcu_read_lock_held);
  14276. +#ifndef CONFIG_PREEMPT_RT_FULL
  14277. /**
  14278. * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
  14279. *
  14280. @@ -196,6 +197,7 @@
  14281. return in_softirq() || irqs_disabled();
  14282. }
  14283. EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
  14284. +#endif
  14285. #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
  14286. diff -Nur linux-3.18.10.orig/kernel/relay.c linux-3.18.10/kernel/relay.c
  14287. --- linux-3.18.10.orig/kernel/relay.c 2015-03-24 02:05:12.000000000 +0100
  14288. +++ linux-3.18.10/kernel/relay.c 2015-03-26 12:42:18.679588341 +0100
  14289. @@ -339,6 +339,10 @@
  14290. {
  14291. struct rchan_buf *buf = (struct rchan_buf *)data;
  14292. wake_up_interruptible(&buf->read_wait);
  14293. + /*
  14294. + * Stupid polling for now:
  14295. + */
  14296. + mod_timer(&buf->timer, jiffies + 1);
  14297. }
  14298. /**
  14299. @@ -356,6 +360,7 @@
  14300. init_waitqueue_head(&buf->read_wait);
  14301. kref_init(&buf->kref);
  14302. setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
  14303. + mod_timer(&buf->timer, jiffies + 1);
  14304. } else
  14305. del_timer_sync(&buf->timer);
  14306. @@ -739,15 +744,6 @@
  14307. else
  14308. buf->early_bytes += buf->chan->subbuf_size -
  14309. buf->padding[old_subbuf];
  14310. - smp_mb();
  14311. - if (waitqueue_active(&buf->read_wait))
  14312. - /*
  14313. - * Calling wake_up_interruptible() from here
  14314. - * will deadlock if we happen to be logging
  14315. - * from the scheduler (trying to re-grab
  14316. - * rq->lock), so defer it.
  14317. - */
  14318. - mod_timer(&buf->timer, jiffies + 1);
  14319. }
  14320. old = buf->data;
  14321. diff -Nur linux-3.18.10.orig/kernel/res_counter.c linux-3.18.10/kernel/res_counter.c
  14322. --- linux-3.18.10.orig/kernel/res_counter.c 2015-03-24 02:05:12.000000000 +0100
  14323. +++ linux-3.18.10/kernel/res_counter.c 2015-03-26 12:42:18.679588341 +0100
  14324. @@ -59,7 +59,7 @@
  14325. r = ret = 0;
  14326. *limit_fail_at = NULL;
  14327. - local_irq_save(flags);
  14328. + local_irq_save_nort(flags);
  14329. for (c = counter; c != NULL; c = c->parent) {
  14330. spin_lock(&c->lock);
  14331. r = res_counter_charge_locked(c, val, force);
  14332. @@ -79,7 +79,7 @@
  14333. spin_unlock(&u->lock);
  14334. }
  14335. }
  14336. - local_irq_restore(flags);
  14337. + local_irq_restore_nort(flags);
  14338. return ret;
  14339. }
  14340. @@ -104,7 +104,7 @@
  14341. struct res_counter *c;
  14342. u64 ret = 0;
  14343. - local_irq_save(flags);
  14344. + local_irq_save_nort(flags);
  14345. for (c = counter; c != top; c = c->parent) {
  14346. u64 r;
  14347. spin_lock(&c->lock);
  14348. @@ -113,7 +113,7 @@
  14349. ret = r;
  14350. spin_unlock(&c->lock);
  14351. }
  14352. - local_irq_restore(flags);
  14353. + local_irq_restore_nort(flags);
  14354. return ret;
  14355. }
  14356. diff -Nur linux-3.18.10.orig/kernel/sched/completion.c linux-3.18.10/kernel/sched/completion.c
  14357. --- linux-3.18.10.orig/kernel/sched/completion.c 2015-03-24 02:05:12.000000000 +0100
  14358. +++ linux-3.18.10/kernel/sched/completion.c 2015-03-26 12:42:18.679588341 +0100
  14359. @@ -30,10 +30,10 @@
  14360. {
  14361. unsigned long flags;
  14362. - spin_lock_irqsave(&x->wait.lock, flags);
  14363. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  14364. x->done++;
  14365. - __wake_up_locked(&x->wait, TASK_NORMAL, 1);
  14366. - spin_unlock_irqrestore(&x->wait.lock, flags);
  14367. + __swait_wake_locked(&x->wait, TASK_NORMAL, 1);
  14368. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  14369. }
  14370. EXPORT_SYMBOL(complete);
  14371. @@ -50,10 +50,10 @@
  14372. {
  14373. unsigned long flags;
  14374. - spin_lock_irqsave(&x->wait.lock, flags);
  14375. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  14376. x->done += UINT_MAX/2;
  14377. - __wake_up_locked(&x->wait, TASK_NORMAL, 0);
  14378. - spin_unlock_irqrestore(&x->wait.lock, flags);
  14379. + __swait_wake_locked(&x->wait, TASK_NORMAL, 0);
  14380. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  14381. }
  14382. EXPORT_SYMBOL(complete_all);
  14383. @@ -62,20 +62,20 @@
  14384. long (*action)(long), long timeout, int state)
  14385. {
  14386. if (!x->done) {
  14387. - DECLARE_WAITQUEUE(wait, current);
  14388. + DEFINE_SWAITER(wait);
  14389. - __add_wait_queue_tail_exclusive(&x->wait, &wait);
  14390. + swait_prepare_locked(&x->wait, &wait);
  14391. do {
  14392. if (signal_pending_state(state, current)) {
  14393. timeout = -ERESTARTSYS;
  14394. break;
  14395. }
  14396. __set_current_state(state);
  14397. - spin_unlock_irq(&x->wait.lock);
  14398. + raw_spin_unlock_irq(&x->wait.lock);
  14399. timeout = action(timeout);
  14400. - spin_lock_irq(&x->wait.lock);
  14401. + raw_spin_lock_irq(&x->wait.lock);
  14402. } while (!x->done && timeout);
  14403. - __remove_wait_queue(&x->wait, &wait);
  14404. + swait_finish_locked(&x->wait, &wait);
  14405. if (!x->done)
  14406. return timeout;
  14407. }
  14408. @@ -89,9 +89,9 @@
  14409. {
  14410. might_sleep();
  14411. - spin_lock_irq(&x->wait.lock);
  14412. + raw_spin_lock_irq(&x->wait.lock);
  14413. timeout = do_wait_for_common(x, action, timeout, state);
  14414. - spin_unlock_irq(&x->wait.lock);
  14415. + raw_spin_unlock_irq(&x->wait.lock);
  14416. return timeout;
  14417. }
  14418. @@ -267,12 +267,12 @@
  14419. unsigned long flags;
  14420. int ret = 1;
  14421. - spin_lock_irqsave(&x->wait.lock, flags);
  14422. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  14423. if (!x->done)
  14424. ret = 0;
  14425. else
  14426. x->done--;
  14427. - spin_unlock_irqrestore(&x->wait.lock, flags);
  14428. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  14429. return ret;
  14430. }
  14431. EXPORT_SYMBOL(try_wait_for_completion);
  14432. @@ -290,10 +290,10 @@
  14433. unsigned long flags;
  14434. int ret = 1;
  14435. - spin_lock_irqsave(&x->wait.lock, flags);
  14436. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  14437. if (!x->done)
  14438. ret = 0;
  14439. - spin_unlock_irqrestore(&x->wait.lock, flags);
  14440. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  14441. return ret;
  14442. }
  14443. EXPORT_SYMBOL(completion_done);
  14444. diff -Nur linux-3.18.10.orig/kernel/sched/core.c linux-3.18.10/kernel/sched/core.c
  14445. --- linux-3.18.10.orig/kernel/sched/core.c 2015-03-24 02:05:12.000000000 +0100
  14446. +++ linux-3.18.10/kernel/sched/core.c 2015-03-26 12:42:18.679588341 +0100
  14447. @@ -280,7 +280,11 @@
  14448. * Number of tasks to iterate in a single balance run.
  14449. * Limited because this is done with IRQs disabled.
  14450. */
  14451. +#ifndef CONFIG_PREEMPT_RT_FULL
  14452. const_debug unsigned int sysctl_sched_nr_migrate = 32;
  14453. +#else
  14454. +const_debug unsigned int sysctl_sched_nr_migrate = 8;
  14455. +#endif
  14456. /*
  14457. * period over which we average the RT time consumption, measured
  14458. @@ -516,6 +520,7 @@
  14459. hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  14460. rq->hrtick_timer.function = hrtick;
  14461. + rq->hrtick_timer.irqsafe = 1;
  14462. }
  14463. #else /* CONFIG_SCHED_HRTICK */
  14464. static inline void hrtick_clear(struct rq *rq)
  14465. @@ -627,6 +632,38 @@
  14466. trace_sched_wake_idle_without_ipi(cpu);
  14467. }
  14468. +#ifdef CONFIG_PREEMPT_LAZY
  14469. +void resched_curr_lazy(struct rq *rq)
  14470. +{
  14471. + struct task_struct *curr = rq->curr;
  14472. + int cpu;
  14473. +
  14474. + if (!sched_feat(PREEMPT_LAZY)) {
  14475. + resched_curr(rq);
  14476. + return;
  14477. + }
  14478. +
  14479. + lockdep_assert_held(&rq->lock);
  14480. +
  14481. + if (test_tsk_need_resched(curr))
  14482. + return;
  14483. +
  14484. + if (test_tsk_need_resched_lazy(curr))
  14485. + return;
  14486. +
  14487. + set_tsk_need_resched_lazy(curr);
  14488. +
  14489. + cpu = cpu_of(rq);
  14490. + if (cpu == smp_processor_id())
  14491. + return;
  14492. +
  14493. + /* NEED_RESCHED_LAZY must be visible before we test polling */
  14494. + smp_mb();
  14495. + if (!tsk_is_polling(curr))
  14496. + smp_send_reschedule(cpu);
  14497. +}
  14498. +#endif
  14499. +
  14500. void resched_cpu(int cpu)
  14501. {
  14502. struct rq *rq = cpu_rq(cpu);
  14503. @@ -650,12 +687,14 @@
  14504. */
  14505. int get_nohz_timer_target(int pinned)
  14506. {
  14507. - int cpu = smp_processor_id();
  14508. + int cpu;
  14509. int i;
  14510. struct sched_domain *sd;
  14511. + preempt_disable_rt();
  14512. + cpu = smp_processor_id();
  14513. if (pinned || !get_sysctl_timer_migration() || !idle_cpu(cpu))
  14514. - return cpu;
  14515. + goto preempt_en_rt;
  14516. rcu_read_lock();
  14517. for_each_domain(cpu, sd) {
  14518. @@ -668,6 +707,8 @@
  14519. }
  14520. unlock:
  14521. rcu_read_unlock();
  14522. +preempt_en_rt:
  14523. + preempt_enable_rt();
  14524. return cpu;
  14525. }
  14526. /*
  14527. @@ -1198,6 +1239,18 @@
  14528. static int migration_cpu_stop(void *data);
  14529. +static bool check_task_state(struct task_struct *p, long match_state)
  14530. +{
  14531. + bool match = false;
  14532. +
  14533. + raw_spin_lock_irq(&p->pi_lock);
  14534. + if (p->state == match_state || p->saved_state == match_state)
  14535. + match = true;
  14536. + raw_spin_unlock_irq(&p->pi_lock);
  14537. +
  14538. + return match;
  14539. +}
  14540. +
  14541. /*
  14542. * wait_task_inactive - wait for a thread to unschedule.
  14543. *
  14544. @@ -1242,7 +1295,7 @@
  14545. * is actually now running somewhere else!
  14546. */
  14547. while (task_running(rq, p)) {
  14548. - if (match_state && unlikely(p->state != match_state))
  14549. + if (match_state && !check_task_state(p, match_state))
  14550. return 0;
  14551. cpu_relax();
  14552. }
  14553. @@ -1257,7 +1310,8 @@
  14554. running = task_running(rq, p);
  14555. queued = task_on_rq_queued(p);
  14556. ncsw = 0;
  14557. - if (!match_state || p->state == match_state)
  14558. + if (!match_state || p->state == match_state ||
  14559. + p->saved_state == match_state)
  14560. ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
  14561. task_rq_unlock(rq, p, &flags);
  14562. @@ -1482,10 +1536,6 @@
  14563. {
  14564. activate_task(rq, p, en_flags);
  14565. p->on_rq = TASK_ON_RQ_QUEUED;
  14566. -
  14567. - /* if a worker is waking up, notify workqueue */
  14568. - if (p->flags & PF_WQ_WORKER)
  14569. - wq_worker_waking_up(p, cpu_of(rq));
  14570. }
  14571. /*
  14572. @@ -1699,8 +1749,27 @@
  14573. */
  14574. smp_mb__before_spinlock();
  14575. raw_spin_lock_irqsave(&p->pi_lock, flags);
  14576. - if (!(p->state & state))
  14577. + if (!(p->state & state)) {
  14578. + /*
  14579. + * The task might be running due to a spinlock sleeper
  14580. + * wakeup. Check the saved state and set it to running
  14581. + * if the wakeup condition is true.
  14582. + */
  14583. + if (!(wake_flags & WF_LOCK_SLEEPER)) {
  14584. + if (p->saved_state & state) {
  14585. + p->saved_state = TASK_RUNNING;
  14586. + success = 1;
  14587. + }
  14588. + }
  14589. goto out;
  14590. + }
  14591. +
  14592. + /*
  14593. + * If this is a regular wakeup, then we can unconditionally
  14594. + * clear the saved state of a "lock sleeper".
  14595. + */
  14596. + if (!(wake_flags & WF_LOCK_SLEEPER))
  14597. + p->saved_state = TASK_RUNNING;
  14598. success = 1; /* we're going to change ->state */
  14599. cpu = task_cpu(p);
  14600. @@ -1743,42 +1812,6 @@
  14601. }
  14602. /**
  14603. - * try_to_wake_up_local - try to wake up a local task with rq lock held
  14604. - * @p: the thread to be awakened
  14605. - *
  14606. - * Put @p on the run-queue if it's not already there. The caller must
  14607. - * ensure that this_rq() is locked, @p is bound to this_rq() and not
  14608. - * the current task.
  14609. - */
  14610. -static void try_to_wake_up_local(struct task_struct *p)
  14611. -{
  14612. - struct rq *rq = task_rq(p);
  14613. -
  14614. - if (WARN_ON_ONCE(rq != this_rq()) ||
  14615. - WARN_ON_ONCE(p == current))
  14616. - return;
  14617. -
  14618. - lockdep_assert_held(&rq->lock);
  14619. -
  14620. - if (!raw_spin_trylock(&p->pi_lock)) {
  14621. - raw_spin_unlock(&rq->lock);
  14622. - raw_spin_lock(&p->pi_lock);
  14623. - raw_spin_lock(&rq->lock);
  14624. - }
  14625. -
  14626. - if (!(p->state & TASK_NORMAL))
  14627. - goto out;
  14628. -
  14629. - if (!task_on_rq_queued(p))
  14630. - ttwu_activate(rq, p, ENQUEUE_WAKEUP);
  14631. -
  14632. - ttwu_do_wakeup(rq, p, 0);
  14633. - ttwu_stat(p, smp_processor_id(), 0);
  14634. -out:
  14635. - raw_spin_unlock(&p->pi_lock);
  14636. -}
  14637. -
  14638. -/**
  14639. * wake_up_process - Wake up a specific process
  14640. * @p: The process to be woken up.
  14641. *
  14642. @@ -1792,11 +1825,23 @@
  14643. */
  14644. int wake_up_process(struct task_struct *p)
  14645. {
  14646. - WARN_ON(task_is_stopped_or_traced(p));
  14647. + WARN_ON(__task_is_stopped_or_traced(p));
  14648. return try_to_wake_up(p, TASK_NORMAL, 0);
  14649. }
  14650. EXPORT_SYMBOL(wake_up_process);
  14651. +/**
  14652. + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
  14653. + * @p: The process to be woken up.
  14654. + *
  14655. + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
  14656. + * the nature of the wakeup.
  14657. + */
  14658. +int wake_up_lock_sleeper(struct task_struct *p)
  14659. +{
  14660. + return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
  14661. +}
  14662. +
  14663. int wake_up_state(struct task_struct *p, unsigned int state)
  14664. {
  14665. return try_to_wake_up(p, state, 0);
  14666. @@ -1987,6 +2032,9 @@
  14667. p->on_cpu = 0;
  14668. #endif
  14669. init_task_preempt_count(p);
  14670. +#ifdef CONFIG_HAVE_PREEMPT_LAZY
  14671. + task_thread_info(p)->preempt_lazy_count = 0;
  14672. +#endif
  14673. #ifdef CONFIG_SMP
  14674. plist_node_init(&p->pushable_tasks, MAX_PRIO);
  14675. RB_CLEAR_NODE(&p->pushable_dl_tasks);
  14676. @@ -2270,8 +2318,12 @@
  14677. finish_arch_post_lock_switch();
  14678. fire_sched_in_preempt_notifiers(current);
  14679. + /*
  14680. + * We use mmdrop_delayed() here so we don't have to do the
  14681. + * full __mmdrop() when we are the last user.
  14682. + */
  14683. if (mm)
  14684. - mmdrop(mm);
  14685. + mmdrop_delayed(mm);
  14686. if (unlikely(prev_state == TASK_DEAD)) {
  14687. if (prev->sched_class->task_dead)
  14688. prev->sched_class->task_dead(prev);
  14689. @@ -2696,6 +2748,133 @@
  14690. schedstat_inc(this_rq(), sched_count);
  14691. }
  14692. +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
  14693. +#define MIGRATE_DISABLE_SET_AFFIN (1<<30) /* Can't make a negative */
  14694. +#define migrate_disabled_updated(p) ((p)->migrate_disable & MIGRATE_DISABLE_SET_AFFIN)
  14695. +#define migrate_disable_count(p) ((p)->migrate_disable & ~MIGRATE_DISABLE_SET_AFFIN)
  14696. +
  14697. +static inline void update_migrate_disable(struct task_struct *p)
  14698. +{
  14699. + const struct cpumask *mask;
  14700. +
  14701. + if (likely(!p->migrate_disable))
  14702. + return;
  14703. +
  14704. + /* Did we already update affinity? */
  14705. + if (unlikely(migrate_disabled_updated(p)))
  14706. + return;
  14707. +
  14708. + /*
  14709. + * Since this is always current we can get away with only locking
  14710. + * rq->lock, the ->cpus_allowed value can normally only be changed
  14711. + * while holding both p->pi_lock and rq->lock, but seeing that this
  14712. + * is current, we cannot actually be waking up, so all code that
  14713. + * relies on serialization against p->pi_lock is out of scope.
  14714. + *
  14715. + * Having rq->lock serializes us against things like
  14716. + * set_cpus_allowed_ptr() that can still happen concurrently.
  14717. + */
  14718. + mask = tsk_cpus_allowed(p);
  14719. +
  14720. + if (p->sched_class->set_cpus_allowed)
  14721. + p->sched_class->set_cpus_allowed(p, mask);
  14722. + /* mask==cpumask_of(task_cpu(p)) which has a cpumask_weight==1 */
  14723. + p->nr_cpus_allowed = 1;
  14724. +
  14725. + /* Let migrate_enable know to fix things back up */
  14726. + p->migrate_disable |= MIGRATE_DISABLE_SET_AFFIN;
  14727. +}
  14728. +
  14729. +void migrate_disable(void)
  14730. +{
  14731. + struct task_struct *p = current;
  14732. +
  14733. + if (in_atomic()) {
  14734. +#ifdef CONFIG_SCHED_DEBUG
  14735. + p->migrate_disable_atomic++;
  14736. +#endif
  14737. + return;
  14738. + }
  14739. +
  14740. +#ifdef CONFIG_SCHED_DEBUG
  14741. + if (unlikely(p->migrate_disable_atomic)) {
  14742. + tracing_off();
  14743. + WARN_ON_ONCE(1);
  14744. + }
  14745. +#endif
  14746. +
  14747. + if (p->migrate_disable) {
  14748. + p->migrate_disable++;
  14749. + return;
  14750. + }
  14751. +
  14752. + preempt_disable();
  14753. + preempt_lazy_disable();
  14754. + pin_current_cpu();
  14755. + p->migrate_disable = 1;
  14756. + preempt_enable();
  14757. +}
  14758. +EXPORT_SYMBOL(migrate_disable);
  14759. +
  14760. +void migrate_enable(void)
  14761. +{
  14762. + struct task_struct *p = current;
  14763. + const struct cpumask *mask;
  14764. + unsigned long flags;
  14765. + struct rq *rq;
  14766. +
  14767. + if (in_atomic()) {
  14768. +#ifdef CONFIG_SCHED_DEBUG
  14769. + p->migrate_disable_atomic--;
  14770. +#endif
  14771. + return;
  14772. + }
  14773. +
  14774. +#ifdef CONFIG_SCHED_DEBUG
  14775. + if (unlikely(p->migrate_disable_atomic)) {
  14776. + tracing_off();
  14777. + WARN_ON_ONCE(1);
  14778. + }
  14779. +#endif
  14780. + WARN_ON_ONCE(p->migrate_disable <= 0);
  14781. +
  14782. + if (migrate_disable_count(p) > 1) {
  14783. + p->migrate_disable--;
  14784. + return;
  14785. + }
  14786. +
  14787. + preempt_disable();
  14788. + if (unlikely(migrate_disabled_updated(p))) {
  14789. + /*
  14790. + * Undo whatever update_migrate_disable() did, also see there
  14791. + * about locking.
  14792. + */
  14793. + rq = this_rq();
  14794. + raw_spin_lock_irqsave(&rq->lock, flags);
  14795. +
  14796. + /*
  14797. + * Clearing migrate_disable causes tsk_cpus_allowed to
  14798. + * show the tasks original cpu affinity.
  14799. + */
  14800. + p->migrate_disable = 0;
  14801. + mask = tsk_cpus_allowed(p);
  14802. + if (p->sched_class->set_cpus_allowed)
  14803. + p->sched_class->set_cpus_allowed(p, mask);
  14804. + p->nr_cpus_allowed = cpumask_weight(mask);
  14805. + raw_spin_unlock_irqrestore(&rq->lock, flags);
  14806. + } else
  14807. + p->migrate_disable = 0;
  14808. +
  14809. + unpin_current_cpu();
  14810. + preempt_enable();
  14811. + preempt_lazy_enable();
  14812. +}
  14813. +EXPORT_SYMBOL(migrate_enable);
  14814. +#else
  14815. +static inline void update_migrate_disable(struct task_struct *p) { }
  14816. +#define migrate_disabled_updated(p) 0
  14817. +#endif
  14818. +
  14819. /*
  14820. * Pick up the highest-prio task:
  14821. */
  14822. @@ -2799,6 +2978,8 @@
  14823. smp_mb__before_spinlock();
  14824. raw_spin_lock_irq(&rq->lock);
  14825. + update_migrate_disable(prev);
  14826. +
  14827. switch_count = &prev->nivcsw;
  14828. if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
  14829. if (unlikely(signal_pending_state(prev->state, prev))) {
  14830. @@ -2806,19 +2987,6 @@
  14831. } else {
  14832. deactivate_task(rq, prev, DEQUEUE_SLEEP);
  14833. prev->on_rq = 0;
  14834. -
  14835. - /*
  14836. - * If a worker went to sleep, notify and ask workqueue
  14837. - * whether it wants to wake up a task to maintain
  14838. - * concurrency.
  14839. - */
  14840. - if (prev->flags & PF_WQ_WORKER) {
  14841. - struct task_struct *to_wakeup;
  14842. -
  14843. - to_wakeup = wq_worker_sleeping(prev, cpu);
  14844. - if (to_wakeup)
  14845. - try_to_wake_up_local(to_wakeup);
  14846. - }
  14847. }
  14848. switch_count = &prev->nvcsw;
  14849. }
  14850. @@ -2828,6 +2996,7 @@
  14851. next = pick_next_task(rq, prev);
  14852. clear_tsk_need_resched(prev);
  14853. + clear_tsk_need_resched_lazy(prev);
  14854. clear_preempt_need_resched();
  14855. rq->skip_clock_update = 0;
  14856. @@ -2857,9 +3026,20 @@
  14857. static inline void sched_submit_work(struct task_struct *tsk)
  14858. {
  14859. - if (!tsk->state || tsk_is_pi_blocked(tsk))
  14860. + if (!tsk->state)
  14861. return;
  14862. /*
  14863. + * If a worker went to sleep, notify and ask workqueue whether
  14864. + * it wants to wake up a task to maintain concurrency.
  14865. + */
  14866. + if (tsk->flags & PF_WQ_WORKER)
  14867. + wq_worker_sleeping(tsk);
  14868. +
  14869. +
  14870. + if (tsk_is_pi_blocked(tsk))
  14871. + return;
  14872. +
  14873. + /*
  14874. * If we are going to sleep and we have plugged IO queued,
  14875. * make sure to submit it to avoid deadlocks.
  14876. */
  14877. @@ -2867,12 +3047,19 @@
  14878. blk_schedule_flush_plug(tsk);
  14879. }
  14880. +static inline void sched_update_worker(struct task_struct *tsk)
  14881. +{
  14882. + if (tsk->flags & PF_WQ_WORKER)
  14883. + wq_worker_running(tsk);
  14884. +}
  14885. +
  14886. asmlinkage __visible void __sched schedule(void)
  14887. {
  14888. struct task_struct *tsk = current;
  14889. sched_submit_work(tsk);
  14890. __schedule();
  14891. + sched_update_worker(tsk);
  14892. }
  14893. EXPORT_SYMBOL(schedule);
  14894. @@ -2922,9 +3109,26 @@
  14895. if (likely(!preemptible()))
  14896. return;
  14897. +#ifdef CONFIG_PREEMPT_LAZY
  14898. + /*
  14899. + * Check for lazy preemption
  14900. + */
  14901. + if (current_thread_info()->preempt_lazy_count &&
  14902. + !test_thread_flag(TIF_NEED_RESCHED))
  14903. + return;
  14904. +#endif
  14905. do {
  14906. __preempt_count_add(PREEMPT_ACTIVE);
  14907. + /*
  14908. + * The add/subtract must not be traced by the function
  14909. + * tracer. But we still want to account for the
  14910. + * preempt off latency tracer. Since the _notrace versions
  14911. + * of add/subtract skip the accounting for latency tracer
  14912. + * we must force it manually.
  14913. + */
  14914. + start_critical_timings();
  14915. __schedule();
  14916. + stop_critical_timings();
  14917. __preempt_count_sub(PREEMPT_ACTIVE);
  14918. /*
  14919. @@ -4234,9 +4438,16 @@
  14920. static void __cond_resched(void)
  14921. {
  14922. - __preempt_count_add(PREEMPT_ACTIVE);
  14923. - __schedule();
  14924. - __preempt_count_sub(PREEMPT_ACTIVE);
  14925. + do {
  14926. + __preempt_count_add(PREEMPT_ACTIVE);
  14927. + __schedule();
  14928. + __preempt_count_sub(PREEMPT_ACTIVE);
  14929. + /*
  14930. + * Check again in case we missed a preemption
  14931. + * opportunity between schedule and now.
  14932. + */
  14933. + barrier();
  14934. + } while (need_resched());
  14935. }
  14936. int __sched _cond_resched(void)
  14937. @@ -4277,6 +4488,7 @@
  14938. }
  14939. EXPORT_SYMBOL(__cond_resched_lock);
  14940. +#ifndef CONFIG_PREEMPT_RT_FULL
  14941. int __sched __cond_resched_softirq(void)
  14942. {
  14943. BUG_ON(!in_softirq());
  14944. @@ -4290,6 +4502,7 @@
  14945. return 0;
  14946. }
  14947. EXPORT_SYMBOL(__cond_resched_softirq);
  14948. +#endif
  14949. /**
  14950. * yield - yield the current processor to other threads.
  14951. @@ -4651,7 +4864,9 @@
  14952. /* Set the preempt count _outside_ the spinlocks! */
  14953. init_idle_preempt_count(idle, cpu);
  14954. -
  14955. +#ifdef CONFIG_HAVE_PREEMPT_LAZY
  14956. + task_thread_info(idle)->preempt_lazy_count = 0;
  14957. +#endif
  14958. /*
  14959. * The idle tasks have their own, simple scheduling class:
  14960. */
  14961. @@ -4693,11 +4908,91 @@
  14962. void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  14963. {
  14964. - if (p->sched_class && p->sched_class->set_cpus_allowed)
  14965. - p->sched_class->set_cpus_allowed(p, new_mask);
  14966. + if (!migrate_disabled_updated(p)) {
  14967. + if (p->sched_class && p->sched_class->set_cpus_allowed)
  14968. + p->sched_class->set_cpus_allowed(p, new_mask);
  14969. + p->nr_cpus_allowed = cpumask_weight(new_mask);
  14970. + }
  14971. cpumask_copy(&p->cpus_allowed, new_mask);
  14972. - p->nr_cpus_allowed = cpumask_weight(new_mask);
  14973. +}
  14974. +
  14975. +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
  14976. +static DEFINE_MUTEX(sched_down_mutex);
  14977. +static cpumask_t sched_down_cpumask;
  14978. +
  14979. +void tell_sched_cpu_down_begin(int cpu)
  14980. +{
  14981. + mutex_lock(&sched_down_mutex);
  14982. + cpumask_set_cpu(cpu, &sched_down_cpumask);
  14983. + mutex_unlock(&sched_down_mutex);
  14984. +}
  14985. +
  14986. +void tell_sched_cpu_down_done(int cpu)
  14987. +{
  14988. + mutex_lock(&sched_down_mutex);
  14989. + cpumask_clear_cpu(cpu, &sched_down_cpumask);
  14990. + mutex_unlock(&sched_down_mutex);
  14991. +}
  14992. +
  14993. +/**
  14994. + * migrate_me - try to move the current task off this cpu
  14995. + *
  14996. + * Used by the pin_current_cpu() code to try to get tasks
  14997. + * to move off the current CPU as it is going down.
  14998. + * It will only move the task if the task isn't pinned to
  14999. + * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
  15000. + * and the task has to be in a RUNNING state. Otherwise the
  15001. + * movement of the task will wake it up (change its state
  15002. + * to running) when the task did not expect it.
  15003. + *
  15004. + * Returns 1 if it succeeded in moving the current task
  15005. + * 0 otherwise.
  15006. + */
  15007. +int migrate_me(void)
  15008. +{
  15009. + struct task_struct *p = current;
  15010. + struct migration_arg arg;
  15011. + struct cpumask *cpumask;
  15012. + struct cpumask *mask;
  15013. + unsigned long flags;
  15014. + unsigned int dest_cpu;
  15015. + struct rq *rq;
  15016. +
  15017. + /*
  15018. + * We can not migrate tasks bounded to a CPU or tasks not
  15019. + * running. The movement of the task will wake it up.
  15020. + */
  15021. + if (p->flags & PF_NO_SETAFFINITY || p->state)
  15022. + return 0;
  15023. +
  15024. + mutex_lock(&sched_down_mutex);
  15025. + rq = task_rq_lock(p, &flags);
  15026. +
  15027. + cpumask = &__get_cpu_var(sched_cpumasks);
  15028. + mask = &p->cpus_allowed;
  15029. +
  15030. + cpumask_andnot(cpumask, mask, &sched_down_cpumask);
  15031. +
  15032. + if (!cpumask_weight(cpumask)) {
  15033. + /* It's only on this CPU? */
  15034. + task_rq_unlock(rq, p, &flags);
  15035. + mutex_unlock(&sched_down_mutex);
  15036. + return 0;
  15037. + }
  15038. +
  15039. + dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
  15040. +
  15041. + arg.task = p;
  15042. + arg.dest_cpu = dest_cpu;
  15043. +
  15044. + task_rq_unlock(rq, p, &flags);
  15045. +
  15046. + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
  15047. + tlb_migrate_finish(p->mm);
  15048. + mutex_unlock(&sched_down_mutex);
  15049. +
  15050. + return 1;
  15051. }
  15052. /*
  15053. @@ -4743,7 +5038,7 @@
  15054. do_set_cpus_allowed(p, new_mask);
  15055. /* Can the task run on the task's current CPU? If so, we're done */
  15056. - if (cpumask_test_cpu(task_cpu(p), new_mask))
  15057. + if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
  15058. goto out;
  15059. dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
  15060. @@ -4883,6 +5178,8 @@
  15061. #ifdef CONFIG_HOTPLUG_CPU
  15062. +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
  15063. +
  15064. /*
  15065. * Ensures that the idle task is using init_mm right before its cpu goes
  15066. * offline.
  15067. @@ -4897,7 +5194,11 @@
  15068. switch_mm(mm, &init_mm, current);
  15069. finish_arch_post_lock_switch();
  15070. }
  15071. - mmdrop(mm);
  15072. + /*
  15073. + * Defer the cleanup to an alive cpu. On RT we can neither
  15074. + * call mmdrop() nor mmdrop_delayed() from here.
  15075. + */
  15076. + per_cpu(idle_last_mm, smp_processor_id()) = mm;
  15077. }
  15078. /*
  15079. @@ -5240,6 +5541,10 @@
  15080. case CPU_DEAD:
  15081. calc_load_migrate(rq);
  15082. + if (per_cpu(idle_last_mm, cpu)) {
  15083. + mmdrop(per_cpu(idle_last_mm, cpu));
  15084. + per_cpu(idle_last_mm, cpu) = NULL;
  15085. + }
  15086. break;
  15087. #endif
  15088. }
  15089. @@ -7181,7 +7486,8 @@
  15090. #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  15091. static inline int preempt_count_equals(int preempt_offset)
  15092. {
  15093. - int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
  15094. + int nested = (preempt_count() & ~PREEMPT_ACTIVE) +
  15095. + sched_rcu_preempt_depth();
  15096. return (nested == preempt_offset);
  15097. }
  15098. diff -Nur linux-3.18.10.orig/kernel/sched/cputime.c linux-3.18.10/kernel/sched/cputime.c
  15099. --- linux-3.18.10.orig/kernel/sched/cputime.c 2015-03-24 02:05:12.000000000 +0100
  15100. +++ linux-3.18.10/kernel/sched/cputime.c 2015-03-26 12:42:18.679588341 +0100
  15101. @@ -675,37 +675,45 @@
  15102. void vtime_account_system(struct task_struct *tsk)
  15103. {
  15104. - write_seqlock(&tsk->vtime_seqlock);
  15105. + raw_spin_lock(&tsk->vtime_lock);
  15106. + write_seqcount_begin(&tsk->vtime_seq);
  15107. __vtime_account_system(tsk);
  15108. - write_sequnlock(&tsk->vtime_seqlock);
  15109. + write_seqcount_end(&tsk->vtime_seq);
  15110. + raw_spin_unlock(&tsk->vtime_lock);
  15111. }
  15112. void vtime_gen_account_irq_exit(struct task_struct *tsk)
  15113. {
  15114. - write_seqlock(&tsk->vtime_seqlock);
  15115. + raw_spin_lock(&tsk->vtime_lock);
  15116. + write_seqcount_begin(&tsk->vtime_seq);
  15117. __vtime_account_system(tsk);
  15118. if (context_tracking_in_user())
  15119. tsk->vtime_snap_whence = VTIME_USER;
  15120. - write_sequnlock(&tsk->vtime_seqlock);
  15121. + write_seqcount_end(&tsk->vtime_seq);
  15122. + raw_spin_unlock(&tsk->vtime_lock);
  15123. }
  15124. void vtime_account_user(struct task_struct *tsk)
  15125. {
  15126. cputime_t delta_cpu;
  15127. - write_seqlock(&tsk->vtime_seqlock);
  15128. + raw_spin_lock(&tsk->vtime_lock);
  15129. + write_seqcount_begin(&tsk->vtime_seq);
  15130. delta_cpu = get_vtime_delta(tsk);
  15131. tsk->vtime_snap_whence = VTIME_SYS;
  15132. account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
  15133. - write_sequnlock(&tsk->vtime_seqlock);
  15134. + write_seqcount_end(&tsk->vtime_seq);
  15135. + raw_spin_unlock(&tsk->vtime_lock);
  15136. }
  15137. void vtime_user_enter(struct task_struct *tsk)
  15138. {
  15139. - write_seqlock(&tsk->vtime_seqlock);
  15140. + raw_spin_lock(&tsk->vtime_lock);
  15141. + write_seqcount_begin(&tsk->vtime_seq);
  15142. __vtime_account_system(tsk);
  15143. tsk->vtime_snap_whence = VTIME_USER;
  15144. - write_sequnlock(&tsk->vtime_seqlock);
  15145. + write_seqcount_end(&tsk->vtime_seq);
  15146. + raw_spin_unlock(&tsk->vtime_lock);
  15147. }
  15148. void vtime_guest_enter(struct task_struct *tsk)
  15149. @@ -717,19 +725,23 @@
  15150. * synchronization against the reader (task_gtime())
  15151. * that can thus safely catch up with a tickless delta.
  15152. */
  15153. - write_seqlock(&tsk->vtime_seqlock);
  15154. + raw_spin_lock(&tsk->vtime_lock);
  15155. + write_seqcount_begin(&tsk->vtime_seq);
  15156. __vtime_account_system(tsk);
  15157. current->flags |= PF_VCPU;
  15158. - write_sequnlock(&tsk->vtime_seqlock);
  15159. + write_seqcount_end(&tsk->vtime_seq);
  15160. + raw_spin_unlock(&tsk->vtime_lock);
  15161. }
  15162. EXPORT_SYMBOL_GPL(vtime_guest_enter);
  15163. void vtime_guest_exit(struct task_struct *tsk)
  15164. {
  15165. - write_seqlock(&tsk->vtime_seqlock);
  15166. + raw_spin_lock(&tsk->vtime_lock);
  15167. + write_seqcount_begin(&tsk->vtime_seq);
  15168. __vtime_account_system(tsk);
  15169. current->flags &= ~PF_VCPU;
  15170. - write_sequnlock(&tsk->vtime_seqlock);
  15171. + write_seqcount_end(&tsk->vtime_seq);
  15172. + raw_spin_unlock(&tsk->vtime_lock);
  15173. }
  15174. EXPORT_SYMBOL_GPL(vtime_guest_exit);
  15175. @@ -742,24 +754,30 @@
  15176. void arch_vtime_task_switch(struct task_struct *prev)
  15177. {
  15178. - write_seqlock(&prev->vtime_seqlock);
  15179. + raw_spin_lock(&prev->vtime_lock);
  15180. + write_seqcount_begin(&prev->vtime_seq);
  15181. prev->vtime_snap_whence = VTIME_SLEEPING;
  15182. - write_sequnlock(&prev->vtime_seqlock);
  15183. + write_seqcount_end(&prev->vtime_seq);
  15184. + raw_spin_unlock(&prev->vtime_lock);
  15185. - write_seqlock(&current->vtime_seqlock);
  15186. + raw_spin_lock(&current->vtime_lock);
  15187. + write_seqcount_begin(&current->vtime_seq);
  15188. current->vtime_snap_whence = VTIME_SYS;
  15189. current->vtime_snap = sched_clock_cpu(smp_processor_id());
  15190. - write_sequnlock(&current->vtime_seqlock);
  15191. + write_seqcount_end(&current->vtime_seq);
  15192. + raw_spin_unlock(&current->vtime_lock);
  15193. }
  15194. void vtime_init_idle(struct task_struct *t, int cpu)
  15195. {
  15196. unsigned long flags;
  15197. - write_seqlock_irqsave(&t->vtime_seqlock, flags);
  15198. + raw_spin_lock_irqsave(&t->vtime_lock, flags);
  15199. + write_seqcount_begin(&t->vtime_seq);
  15200. t->vtime_snap_whence = VTIME_SYS;
  15201. t->vtime_snap = sched_clock_cpu(cpu);
  15202. - write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
  15203. + write_seqcount_end(&t->vtime_seq);
  15204. + raw_spin_unlock_irqrestore(&t->vtime_lock, flags);
  15205. }
  15206. cputime_t task_gtime(struct task_struct *t)
  15207. @@ -768,13 +786,13 @@
  15208. cputime_t gtime;
  15209. do {
  15210. - seq = read_seqbegin(&t->vtime_seqlock);
  15211. + seq = read_seqcount_begin(&t->vtime_seq);
  15212. gtime = t->gtime;
  15213. if (t->flags & PF_VCPU)
  15214. gtime += vtime_delta(t);
  15215. - } while (read_seqretry(&t->vtime_seqlock, seq));
  15216. + } while (read_seqcount_retry(&t->vtime_seq, seq));
  15217. return gtime;
  15218. }
  15219. @@ -797,7 +815,7 @@
  15220. *udelta = 0;
  15221. *sdelta = 0;
  15222. - seq = read_seqbegin(&t->vtime_seqlock);
  15223. + seq = read_seqcount_begin(&t->vtime_seq);
  15224. if (u_dst)
  15225. *u_dst = *u_src;
  15226. @@ -821,7 +839,7 @@
  15227. if (t->vtime_snap_whence == VTIME_SYS)
  15228. *sdelta = delta;
  15229. }
  15230. - } while (read_seqretry(&t->vtime_seqlock, seq));
  15231. + } while (read_seqcount_retry(&t->vtime_seq, seq));
  15232. }
  15233. diff -Nur linux-3.18.10.orig/kernel/sched/deadline.c linux-3.18.10/kernel/sched/deadline.c
  15234. --- linux-3.18.10.orig/kernel/sched/deadline.c 2015-03-24 02:05:12.000000000 +0100
  15235. +++ linux-3.18.10/kernel/sched/deadline.c 2015-03-26 12:42:18.679588341 +0100
  15236. @@ -570,6 +570,7 @@
  15237. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  15238. timer->function = dl_task_timer;
  15239. + timer->irqsafe = 1;
  15240. }
  15241. static
  15242. diff -Nur linux-3.18.10.orig/kernel/sched/debug.c linux-3.18.10/kernel/sched/debug.c
  15243. --- linux-3.18.10.orig/kernel/sched/debug.c 2015-03-24 02:05:12.000000000 +0100
  15244. +++ linux-3.18.10/kernel/sched/debug.c 2015-03-26 12:42:18.679588341 +0100
  15245. @@ -256,6 +256,9 @@
  15246. P(rt_throttled);
  15247. PN(rt_time);
  15248. PN(rt_runtime);
  15249. +#ifdef CONFIG_SMP
  15250. + P(rt_nr_migratory);
  15251. +#endif
  15252. #undef PN
  15253. #undef P
  15254. @@ -634,6 +637,10 @@
  15255. #endif
  15256. P(policy);
  15257. P(prio);
  15258. +#ifdef CONFIG_PREEMPT_RT_FULL
  15259. + P(migrate_disable);
  15260. +#endif
  15261. + P(nr_cpus_allowed);
  15262. #undef PN
  15263. #undef __PN
  15264. #undef P
  15265. diff -Nur linux-3.18.10.orig/kernel/sched/fair.c linux-3.18.10/kernel/sched/fair.c
  15266. --- linux-3.18.10.orig/kernel/sched/fair.c 2015-03-24 02:05:12.000000000 +0100
  15267. +++ linux-3.18.10/kernel/sched/fair.c 2015-03-26 12:42:18.679588341 +0100
  15268. @@ -2951,7 +2951,7 @@
  15269. ideal_runtime = sched_slice(cfs_rq, curr);
  15270. delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
  15271. if (delta_exec > ideal_runtime) {
  15272. - resched_curr(rq_of(cfs_rq));
  15273. + resched_curr_lazy(rq_of(cfs_rq));
  15274. /*
  15275. * The current task ran long enough, ensure it doesn't get
  15276. * re-elected due to buddy favours.
  15277. @@ -2975,7 +2975,7 @@
  15278. return;
  15279. if (delta > ideal_runtime)
  15280. - resched_curr(rq_of(cfs_rq));
  15281. + resched_curr_lazy(rq_of(cfs_rq));
  15282. }
  15283. static void
  15284. @@ -3115,7 +3115,7 @@
  15285. * validating it and just reschedule.
  15286. */
  15287. if (queued) {
  15288. - resched_curr(rq_of(cfs_rq));
  15289. + resched_curr_lazy(rq_of(cfs_rq));
  15290. return;
  15291. }
  15292. /*
  15293. @@ -3306,7 +3306,7 @@
  15294. * hierarchy can be throttled
  15295. */
  15296. if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
  15297. - resched_curr(rq_of(cfs_rq));
  15298. + resched_curr_lazy(rq_of(cfs_rq));
  15299. }
  15300. static __always_inline
  15301. @@ -3925,7 +3925,7 @@
  15302. if (delta < 0) {
  15303. if (rq->curr == p)
  15304. - resched_curr(rq);
  15305. + resched_curr_lazy(rq);
  15306. return;
  15307. }
  15308. hrtick_start(rq, delta);
  15309. @@ -4792,7 +4792,7 @@
  15310. return;
  15311. preempt:
  15312. - resched_curr(rq);
  15313. + resched_curr_lazy(rq);
  15314. /*
  15315. * Only set the backward buddy when the current task is still
  15316. * on the rq. This can happen when a wakeup gets interleaved
  15317. @@ -7576,7 +7576,7 @@
  15318. * 'current' within the tree based on its new key value.
  15319. */
  15320. swap(curr->vruntime, se->vruntime);
  15321. - resched_curr(rq);
  15322. + resched_curr_lazy(rq);
  15323. }
  15324. se->vruntime -= cfs_rq->min_vruntime;
  15325. @@ -7601,7 +7601,7 @@
  15326. */
  15327. if (rq->curr == p) {
  15328. if (p->prio > oldprio)
  15329. - resched_curr(rq);
  15330. + resched_curr_lazy(rq);
  15331. } else
  15332. check_preempt_curr(rq, p, 0);
  15333. }
  15334. diff -Nur linux-3.18.10.orig/kernel/sched/features.h linux-3.18.10/kernel/sched/features.h
  15335. --- linux-3.18.10.orig/kernel/sched/features.h 2015-03-24 02:05:12.000000000 +0100
  15336. +++ linux-3.18.10/kernel/sched/features.h 2015-03-26 12:42:18.679588341 +0100
  15337. @@ -50,12 +50,18 @@
  15338. */
  15339. SCHED_FEAT(NONTASK_CAPACITY, true)
  15340. +#ifdef CONFIG_PREEMPT_RT_FULL
  15341. +SCHED_FEAT(TTWU_QUEUE, false)
  15342. +# ifdef CONFIG_PREEMPT_LAZY
  15343. +SCHED_FEAT(PREEMPT_LAZY, true)
  15344. +# endif
  15345. +#else
  15346. /*
  15347. * Queue remote wakeups on the target CPU and process them
  15348. * using the scheduler IPI. Reduces rq->lock contention/bounces.
  15349. */
  15350. SCHED_FEAT(TTWU_QUEUE, true)
  15351. -
  15352. +#endif
  15353. SCHED_FEAT(FORCE_SD_OVERLAP, false)
  15354. SCHED_FEAT(RT_RUNTIME_SHARE, true)
  15355. SCHED_FEAT(LB_MIN, false)
  15356. diff -Nur linux-3.18.10.orig/kernel/sched/Makefile linux-3.18.10/kernel/sched/Makefile
  15357. --- linux-3.18.10.orig/kernel/sched/Makefile 2015-03-24 02:05:12.000000000 +0100
  15358. +++ linux-3.18.10/kernel/sched/Makefile 2015-03-26 12:42:18.679588341 +0100
  15359. @@ -13,7 +13,7 @@
  15360. obj-y += core.o proc.o clock.o cputime.o
  15361. obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
  15362. -obj-y += wait.o completion.o idle.o
  15363. +obj-y += wait.o wait-simple.o work-simple.o completion.o idle.o
  15364. obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
  15365. obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
  15366. obj-$(CONFIG_SCHEDSTATS) += stats.o
  15367. diff -Nur linux-3.18.10.orig/kernel/sched/rt.c linux-3.18.10/kernel/sched/rt.c
  15368. --- linux-3.18.10.orig/kernel/sched/rt.c 2015-03-24 02:05:12.000000000 +0100
  15369. +++ linux-3.18.10/kernel/sched/rt.c 2015-03-26 12:42:18.679588341 +0100
  15370. @@ -43,6 +43,7 @@
  15371. hrtimer_init(&rt_b->rt_period_timer,
  15372. CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  15373. + rt_b->rt_period_timer.irqsafe = 1;
  15374. rt_b->rt_period_timer.function = sched_rt_period_timer;
  15375. }
  15376. diff -Nur linux-3.18.10.orig/kernel/sched/sched.h linux-3.18.10/kernel/sched/sched.h
  15377. --- linux-3.18.10.orig/kernel/sched/sched.h 2015-03-24 02:05:12.000000000 +0100
  15378. +++ linux-3.18.10/kernel/sched/sched.h 2015-03-26 12:42:18.679588341 +0100
  15379. @@ -1018,6 +1018,7 @@
  15380. #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
  15381. #define WF_FORK 0x02 /* child wakeup after fork */
  15382. #define WF_MIGRATED 0x4 /* internal use, task got migrated */
  15383. +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */
  15384. /*
  15385. * To aid in avoiding the subversion of "niceness" due to uneven distribution
  15386. @@ -1210,6 +1211,15 @@
  15387. extern void resched_curr(struct rq *rq);
  15388. extern void resched_cpu(int cpu);
  15389. +#ifdef CONFIG_PREEMPT_LAZY
  15390. +extern void resched_curr_lazy(struct rq *rq);
  15391. +#else
  15392. +static inline void resched_curr_lazy(struct rq *rq)
  15393. +{
  15394. + resched_curr(rq);
  15395. +}
  15396. +#endif
  15397. +
  15398. extern struct rt_bandwidth def_rt_bandwidth;
  15399. extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
  15400. diff -Nur linux-3.18.10.orig/kernel/sched/wait-simple.c linux-3.18.10/kernel/sched/wait-simple.c
  15401. --- linux-3.18.10.orig/kernel/sched/wait-simple.c 1970-01-01 01:00:00.000000000 +0100
  15402. +++ linux-3.18.10/kernel/sched/wait-simple.c 2015-03-26 12:42:18.679588341 +0100
  15403. @@ -0,0 +1,115 @@
  15404. +/*
  15405. + * Simple waitqueues without fancy flags and callbacks
  15406. + *
  15407. + * (C) 2011 Thomas Gleixner <tglx@linutronix.de>
  15408. + *
  15409. + * Based on kernel/wait.c
  15410. + *
  15411. + * For licencing details see kernel-base/COPYING
  15412. + */
  15413. +#include <linux/init.h>
  15414. +#include <linux/export.h>
  15415. +#include <linux/sched.h>
  15416. +#include <linux/wait-simple.h>
  15417. +
  15418. +/* Adds w to head->list. Must be called with head->lock locked. */
  15419. +static inline void __swait_enqueue(struct swait_head *head, struct swaiter *w)
  15420. +{
  15421. + list_add(&w->node, &head->list);
  15422. + /* We can't let the condition leak before the setting of head */
  15423. + smp_mb();
  15424. +}
  15425. +
  15426. +/* Removes w from head->list. Must be called with head->lock locked. */
  15427. +static inline void __swait_dequeue(struct swaiter *w)
  15428. +{
  15429. + list_del_init(&w->node);
  15430. +}
  15431. +
  15432. +void __init_swait_head(struct swait_head *head, struct lock_class_key *key)
  15433. +{
  15434. + raw_spin_lock_init(&head->lock);
  15435. + lockdep_set_class(&head->lock, key);
  15436. + INIT_LIST_HEAD(&head->list);
  15437. +}
  15438. +EXPORT_SYMBOL(__init_swait_head);
  15439. +
  15440. +void swait_prepare_locked(struct swait_head *head, struct swaiter *w)
  15441. +{
  15442. + w->task = current;
  15443. + if (list_empty(&w->node))
  15444. + __swait_enqueue(head, w);
  15445. +}
  15446. +
  15447. +void swait_prepare(struct swait_head *head, struct swaiter *w, int state)
  15448. +{
  15449. + unsigned long flags;
  15450. +
  15451. + raw_spin_lock_irqsave(&head->lock, flags);
  15452. + swait_prepare_locked(head, w);
  15453. + __set_current_state(state);
  15454. + raw_spin_unlock_irqrestore(&head->lock, flags);
  15455. +}
  15456. +EXPORT_SYMBOL(swait_prepare);
  15457. +
  15458. +void swait_finish_locked(struct swait_head *head, struct swaiter *w)
  15459. +{
  15460. + __set_current_state(TASK_RUNNING);
  15461. + if (w->task)
  15462. + __swait_dequeue(w);
  15463. +}
  15464. +
  15465. +void swait_finish(struct swait_head *head, struct swaiter *w)
  15466. +{
  15467. + unsigned long flags;
  15468. +
  15469. + __set_current_state(TASK_RUNNING);
  15470. + if (w->task) {
  15471. + raw_spin_lock_irqsave(&head->lock, flags);
  15472. + __swait_dequeue(w);
  15473. + raw_spin_unlock_irqrestore(&head->lock, flags);
  15474. + }
  15475. +}
  15476. +EXPORT_SYMBOL(swait_finish);
  15477. +
  15478. +unsigned int
  15479. +__swait_wake_locked(struct swait_head *head, unsigned int state, unsigned int num)
  15480. +{
  15481. + struct swaiter *curr, *next;
  15482. + int woken = 0;
  15483. +
  15484. + list_for_each_entry_safe(curr, next, &head->list, node) {
  15485. + if (wake_up_state(curr->task, state)) {
  15486. + __swait_dequeue(curr);
  15487. + /*
  15488. + * The waiting task can free the waiter as
  15489. + * soon as curr->task = NULL is written,
  15490. + * without taking any locks. A memory barrier
  15491. + * is required here to prevent the following
  15492. + * store to curr->task from getting ahead of
  15493. + * the dequeue operation.
  15494. + */
  15495. + smp_wmb();
  15496. + curr->task = NULL;
  15497. + if (++woken == num)
  15498. + break;
  15499. + }
  15500. + }
  15501. + return woken;
  15502. +}
  15503. +
  15504. +unsigned int
  15505. +__swait_wake(struct swait_head *head, unsigned int state, unsigned int num)
  15506. +{
  15507. + unsigned long flags;
  15508. + int woken;
  15509. +
  15510. + if (!swaitqueue_active(head))
  15511. + return 0;
  15512. +
  15513. + raw_spin_lock_irqsave(&head->lock, flags);
  15514. + woken = __swait_wake_locked(head, state, num);
  15515. + raw_spin_unlock_irqrestore(&head->lock, flags);
  15516. + return woken;
  15517. +}
  15518. +EXPORT_SYMBOL(__swait_wake);
  15519. diff -Nur linux-3.18.10.orig/kernel/sched/work-simple.c linux-3.18.10/kernel/sched/work-simple.c
  15520. --- linux-3.18.10.orig/kernel/sched/work-simple.c 1970-01-01 01:00:00.000000000 +0100
  15521. +++ linux-3.18.10/kernel/sched/work-simple.c 2015-03-26 12:42:18.683588345 +0100
  15522. @@ -0,0 +1,172 @@
  15523. +/*
  15524. + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de
  15525. + *
  15526. + * Provides a framework for enqueuing callbacks from irq context
  15527. + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context.
  15528. + */
  15529. +
  15530. +#include <linux/wait-simple.h>
  15531. +#include <linux/work-simple.h>
  15532. +#include <linux/kthread.h>
  15533. +#include <linux/slab.h>
  15534. +#include <linux/spinlock.h>
  15535. +
  15536. +#define SWORK_EVENT_PENDING (1 << 0)
  15537. +
  15538. +static DEFINE_MUTEX(worker_mutex);
  15539. +static struct sworker *glob_worker;
  15540. +
  15541. +struct sworker {
  15542. + struct list_head events;
  15543. + struct swait_head wq;
  15544. +
  15545. + raw_spinlock_t lock;
  15546. +
  15547. + struct task_struct *task;
  15548. + int refs;
  15549. +};
  15550. +
  15551. +static bool swork_readable(struct sworker *worker)
  15552. +{
  15553. + bool r;
  15554. +
  15555. + if (kthread_should_stop())
  15556. + return true;
  15557. +
  15558. + raw_spin_lock_irq(&worker->lock);
  15559. + r = !list_empty(&worker->events);
  15560. + raw_spin_unlock_irq(&worker->lock);
  15561. +
  15562. + return r;
  15563. +}
  15564. +
  15565. +static int swork_kthread(void *arg)
  15566. +{
  15567. + struct sworker *worker = arg;
  15568. +
  15569. + for (;;) {
  15570. + swait_event_interruptible(worker->wq,
  15571. + swork_readable(worker));
  15572. + if (kthread_should_stop())
  15573. + break;
  15574. +
  15575. + raw_spin_lock_irq(&worker->lock);
  15576. + while (!list_empty(&worker->events)) {
  15577. + struct swork_event *sev;
  15578. +
  15579. + sev = list_first_entry(&worker->events,
  15580. + struct swork_event, item);
  15581. + list_del(&sev->item);
  15582. + raw_spin_unlock_irq(&worker->lock);
  15583. +
  15584. + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING,
  15585. + &sev->flags));
  15586. + sev->func(sev);
  15587. + raw_spin_lock_irq(&worker->lock);
  15588. + }
  15589. + raw_spin_unlock_irq(&worker->lock);
  15590. + }
  15591. + return 0;
  15592. +}
  15593. +
  15594. +static struct sworker *swork_create(void)
  15595. +{
  15596. + struct sworker *worker;
  15597. +
  15598. + worker = kzalloc(sizeof(*worker), GFP_KERNEL);
  15599. + if (!worker)
  15600. + return ERR_PTR(-ENOMEM);
  15601. +
  15602. + INIT_LIST_HEAD(&worker->events);
  15603. + raw_spin_lock_init(&worker->lock);
  15604. + init_swait_head(&worker->wq);
  15605. +
  15606. + worker->task = kthread_run(swork_kthread, worker, "kswork");
  15607. + if (IS_ERR(worker->task)) {
  15608. + kfree(worker);
  15609. + return ERR_PTR(-ENOMEM);
  15610. + }
  15611. +
  15612. + return worker;
  15613. +}
  15614. +
  15615. +static void swork_destroy(struct sworker *worker)
  15616. +{
  15617. + kthread_stop(worker->task);
  15618. +
  15619. + WARN_ON(!list_empty(&worker->events));
  15620. + kfree(worker);
  15621. +}
  15622. +
  15623. +/**
  15624. + * swork_queue - queue swork
  15625. + *
  15626. + * Returns %false if @work was already on a queue, %true otherwise.
  15627. + *
  15628. + * The work is queued and processed on a random CPU
  15629. + */
  15630. +bool swork_queue(struct swork_event *sev)
  15631. +{
  15632. + unsigned long flags;
  15633. +
  15634. + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags))
  15635. + return false;
  15636. +
  15637. + raw_spin_lock_irqsave(&glob_worker->lock, flags);
  15638. + list_add_tail(&sev->item, &glob_worker->events);
  15639. + raw_spin_unlock_irqrestore(&glob_worker->lock, flags);
  15640. +
  15641. + swait_wake(&glob_worker->wq);
  15642. + return true;
  15643. +}
  15644. +EXPORT_SYMBOL_GPL(swork_queue);
  15645. +
  15646. +/**
  15647. + * swork_get - get an instance of the sworker
  15648. + *
  15649. + * Returns an negative error code if the initialization if the worker did not
  15650. + * work, %0 otherwise.
  15651. + *
  15652. + */
  15653. +int swork_get(void)
  15654. +{
  15655. + struct sworker *worker;
  15656. +
  15657. + mutex_lock(&worker_mutex);
  15658. + if (!glob_worker) {
  15659. + worker = swork_create();
  15660. + if (IS_ERR(worker)) {
  15661. + mutex_unlock(&worker_mutex);
  15662. + return -ENOMEM;
  15663. + }
  15664. +
  15665. + glob_worker = worker;
  15666. + }
  15667. +
  15668. + glob_worker->refs++;
  15669. + mutex_unlock(&worker_mutex);
  15670. +
  15671. + return 0;
  15672. +}
  15673. +EXPORT_SYMBOL_GPL(swork_get);
  15674. +
  15675. +/**
  15676. + * swork_put - puts an instance of the sworker
  15677. + *
  15678. + * Will destroy the sworker thread. This function must not be called until all
  15679. + * queued events have been completed.
  15680. + */
  15681. +void swork_put(void)
  15682. +{
  15683. + mutex_lock(&worker_mutex);
  15684. +
  15685. + glob_worker->refs--;
  15686. + if (glob_worker->refs > 0)
  15687. + goto out;
  15688. +
  15689. + swork_destroy(glob_worker);
  15690. + glob_worker = NULL;
  15691. +out:
  15692. + mutex_unlock(&worker_mutex);
  15693. +}
  15694. +EXPORT_SYMBOL_GPL(swork_put);
  15695. diff -Nur linux-3.18.10.orig/kernel/signal.c linux-3.18.10/kernel/signal.c
  15696. --- linux-3.18.10.orig/kernel/signal.c 2015-03-24 02:05:12.000000000 +0100
  15697. +++ linux-3.18.10/kernel/signal.c 2015-03-26 12:42:18.683588345 +0100
  15698. @@ -14,6 +14,7 @@
  15699. #include <linux/export.h>
  15700. #include <linux/init.h>
  15701. #include <linux/sched.h>
  15702. +#include <linux/sched/rt.h>
  15703. #include <linux/fs.h>
  15704. #include <linux/tty.h>
  15705. #include <linux/binfmts.h>
  15706. @@ -352,13 +353,45 @@
  15707. return false;
  15708. }
  15709. +#ifdef __HAVE_ARCH_CMPXCHG
  15710. +static inline struct sigqueue *get_task_cache(struct task_struct *t)
  15711. +{
  15712. + struct sigqueue *q = t->sigqueue_cache;
  15713. +
  15714. + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
  15715. + return NULL;
  15716. + return q;
  15717. +}
  15718. +
  15719. +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
  15720. +{
  15721. + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
  15722. + return 0;
  15723. + return 1;
  15724. +}
  15725. +
  15726. +#else
  15727. +
  15728. +static inline struct sigqueue *get_task_cache(struct task_struct *t)
  15729. +{
  15730. + return NULL;
  15731. +}
  15732. +
  15733. +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
  15734. +{
  15735. + return 1;
  15736. +}
  15737. +
  15738. +#endif
  15739. +
  15740. /*
  15741. * allocate a new signal queue record
  15742. * - this may be called without locks if and only if t == current, otherwise an
  15743. * appropriate lock must be held to stop the target task from exiting
  15744. */
  15745. static struct sigqueue *
  15746. -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
  15747. +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
  15748. + int override_rlimit, int fromslab)
  15749. {
  15750. struct sigqueue *q = NULL;
  15751. struct user_struct *user;
  15752. @@ -375,7 +408,10 @@
  15753. if (override_rlimit ||
  15754. atomic_read(&user->sigpending) <=
  15755. task_rlimit(t, RLIMIT_SIGPENDING)) {
  15756. - q = kmem_cache_alloc(sigqueue_cachep, flags);
  15757. + if (!fromslab)
  15758. + q = get_task_cache(t);
  15759. + if (!q)
  15760. + q = kmem_cache_alloc(sigqueue_cachep, flags);
  15761. } else {
  15762. print_dropped_signal(sig);
  15763. }
  15764. @@ -392,6 +428,13 @@
  15765. return q;
  15766. }
  15767. +static struct sigqueue *
  15768. +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
  15769. + int override_rlimit)
  15770. +{
  15771. + return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
  15772. +}
  15773. +
  15774. static void __sigqueue_free(struct sigqueue *q)
  15775. {
  15776. if (q->flags & SIGQUEUE_PREALLOC)
  15777. @@ -401,6 +444,21 @@
  15778. kmem_cache_free(sigqueue_cachep, q);
  15779. }
  15780. +static void sigqueue_free_current(struct sigqueue *q)
  15781. +{
  15782. + struct user_struct *up;
  15783. +
  15784. + if (q->flags & SIGQUEUE_PREALLOC)
  15785. + return;
  15786. +
  15787. + up = q->user;
  15788. + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
  15789. + atomic_dec(&up->sigpending);
  15790. + free_uid(up);
  15791. + } else
  15792. + __sigqueue_free(q);
  15793. +}
  15794. +
  15795. void flush_sigqueue(struct sigpending *queue)
  15796. {
  15797. struct sigqueue *q;
  15798. @@ -414,6 +472,21 @@
  15799. }
  15800. /*
  15801. + * Called from __exit_signal. Flush tsk->pending and
  15802. + * tsk->sigqueue_cache
  15803. + */
  15804. +void flush_task_sigqueue(struct task_struct *tsk)
  15805. +{
  15806. + struct sigqueue *q;
  15807. +
  15808. + flush_sigqueue(&tsk->pending);
  15809. +
  15810. + q = get_task_cache(tsk);
  15811. + if (q)
  15812. + kmem_cache_free(sigqueue_cachep, q);
  15813. +}
  15814. +
  15815. +/*
  15816. * Flush all pending signals for a task.
  15817. */
  15818. void __flush_signals(struct task_struct *t)
  15819. @@ -565,7 +638,7 @@
  15820. still_pending:
  15821. list_del_init(&first->list);
  15822. copy_siginfo(info, &first->info);
  15823. - __sigqueue_free(first);
  15824. + sigqueue_free_current(first);
  15825. } else {
  15826. /*
  15827. * Ok, it wasn't in the queue. This must be
  15828. @@ -611,6 +684,8 @@
  15829. {
  15830. int signr;
  15831. + WARN_ON_ONCE(tsk != current);
  15832. +
  15833. /* We only dequeue private signals from ourselves, we don't let
  15834. * signalfd steal them
  15835. */
  15836. @@ -1207,8 +1282,8 @@
  15837. * We don't want to have recursive SIGSEGV's etc, for example,
  15838. * that is why we also clear SIGNAL_UNKILLABLE.
  15839. */
  15840. -int
  15841. -force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  15842. +static int
  15843. +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  15844. {
  15845. unsigned long int flags;
  15846. int ret, blocked, ignored;
  15847. @@ -1233,6 +1308,39 @@
  15848. return ret;
  15849. }
  15850. +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  15851. +{
  15852. +/*
  15853. + * On some archs, PREEMPT_RT has to delay sending a signal from a trap
  15854. + * since it can not enable preemption, and the signal code's spin_locks
  15855. + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
  15856. + * send the signal on exit of the trap.
  15857. + */
  15858. +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
  15859. + if (in_atomic()) {
  15860. + if (WARN_ON_ONCE(t != current))
  15861. + return 0;
  15862. + if (WARN_ON_ONCE(t->forced_info.si_signo))
  15863. + return 0;
  15864. +
  15865. + if (is_si_special(info)) {
  15866. + WARN_ON_ONCE(info != SEND_SIG_PRIV);
  15867. + t->forced_info.si_signo = sig;
  15868. + t->forced_info.si_errno = 0;
  15869. + t->forced_info.si_code = SI_KERNEL;
  15870. + t->forced_info.si_pid = 0;
  15871. + t->forced_info.si_uid = 0;
  15872. + } else {
  15873. + t->forced_info = *info;
  15874. + }
  15875. +
  15876. + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
  15877. + return 0;
  15878. + }
  15879. +#endif
  15880. + return do_force_sig_info(sig, info, t);
  15881. +}
  15882. +
  15883. /*
  15884. * Nuke all other threads in the group.
  15885. */
  15886. @@ -1267,12 +1375,12 @@
  15887. * Disable interrupts early to avoid deadlocks.
  15888. * See rcu_read_unlock() comment header for details.
  15889. */
  15890. - local_irq_save(*flags);
  15891. + local_irq_save_nort(*flags);
  15892. rcu_read_lock();
  15893. sighand = rcu_dereference(tsk->sighand);
  15894. if (unlikely(sighand == NULL)) {
  15895. rcu_read_unlock();
  15896. - local_irq_restore(*flags);
  15897. + local_irq_restore_nort(*flags);
  15898. break;
  15899. }
  15900. @@ -1283,7 +1391,7 @@
  15901. }
  15902. spin_unlock(&sighand->siglock);
  15903. rcu_read_unlock();
  15904. - local_irq_restore(*flags);
  15905. + local_irq_restore_nort(*flags);
  15906. }
  15907. return sighand;
  15908. @@ -1528,7 +1636,8 @@
  15909. */
  15910. struct sigqueue *sigqueue_alloc(void)
  15911. {
  15912. - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
  15913. + /* Preallocated sigqueue objects always from the slabcache ! */
  15914. + struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
  15915. if (q)
  15916. q->flags |= SIGQUEUE_PREALLOC;
  15917. @@ -1889,15 +1998,7 @@
  15918. if (gstop_done && ptrace_reparented(current))
  15919. do_notify_parent_cldstop(current, false, why);
  15920. - /*
  15921. - * Don't want to allow preemption here, because
  15922. - * sys_ptrace() needs this task to be inactive.
  15923. - *
  15924. - * XXX: implement read_unlock_no_resched().
  15925. - */
  15926. - preempt_disable();
  15927. read_unlock(&tasklist_lock);
  15928. - preempt_enable_no_resched();
  15929. freezable_schedule();
  15930. } else {
  15931. /*
  15932. diff -Nur linux-3.18.10.orig/kernel/softirq.c linux-3.18.10/kernel/softirq.c
  15933. --- linux-3.18.10.orig/kernel/softirq.c 2015-03-24 02:05:12.000000000 +0100
  15934. +++ linux-3.18.10/kernel/softirq.c 2015-03-26 12:42:18.683588345 +0100
  15935. @@ -21,10 +21,12 @@
  15936. #include <linux/freezer.h>
  15937. #include <linux/kthread.h>
  15938. #include <linux/rcupdate.h>
  15939. +#include <linux/delay.h>
  15940. #include <linux/ftrace.h>
  15941. #include <linux/smp.h>
  15942. #include <linux/smpboot.h>
  15943. #include <linux/tick.h>
  15944. +#include <linux/locallock.h>
  15945. #include <linux/irq.h>
  15946. #define CREATE_TRACE_POINTS
  15947. @@ -62,6 +64,98 @@
  15948. "TASKLET", "SCHED", "HRTIMER", "RCU"
  15949. };
  15950. +#ifdef CONFIG_NO_HZ_COMMON
  15951. +# ifdef CONFIG_PREEMPT_RT_FULL
  15952. +
  15953. +struct softirq_runner {
  15954. + struct task_struct *runner[NR_SOFTIRQS];
  15955. +};
  15956. +
  15957. +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners);
  15958. +
  15959. +static inline void softirq_set_runner(unsigned int sirq)
  15960. +{
  15961. + struct softirq_runner *sr = &__get_cpu_var(softirq_runners);
  15962. +
  15963. + sr->runner[sirq] = current;
  15964. +}
  15965. +
  15966. +static inline void softirq_clr_runner(unsigned int sirq)
  15967. +{
  15968. + struct softirq_runner *sr = &__get_cpu_var(softirq_runners);
  15969. +
  15970. + sr->runner[sirq] = NULL;
  15971. +}
  15972. +
  15973. +/*
  15974. + * On preempt-rt a softirq running context might be blocked on a
  15975. + * lock. There might be no other runnable task on this CPU because the
  15976. + * lock owner runs on some other CPU. So we have to go into idle with
  15977. + * the pending bit set. Therefor we need to check this otherwise we
  15978. + * warn about false positives which confuses users and defeats the
  15979. + * whole purpose of this test.
  15980. + *
  15981. + * This code is called with interrupts disabled.
  15982. + */
  15983. +void softirq_check_pending_idle(void)
  15984. +{
  15985. + static int rate_limit;
  15986. + struct softirq_runner *sr = &__get_cpu_var(softirq_runners);
  15987. + u32 warnpending;
  15988. + int i;
  15989. +
  15990. + if (rate_limit >= 10)
  15991. + return;
  15992. +
  15993. + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
  15994. + for (i = 0; i < NR_SOFTIRQS; i++) {
  15995. + struct task_struct *tsk = sr->runner[i];
  15996. +
  15997. + /*
  15998. + * The wakeup code in rtmutex.c wakes up the task
  15999. + * _before_ it sets pi_blocked_on to NULL under
  16000. + * tsk->pi_lock. So we need to check for both: state
  16001. + * and pi_blocked_on.
  16002. + */
  16003. + if (tsk) {
  16004. + raw_spin_lock(&tsk->pi_lock);
  16005. + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
  16006. + /* Clear all bits pending in that task */
  16007. + warnpending &= ~(tsk->softirqs_raised);
  16008. + warnpending &= ~(1 << i);
  16009. + }
  16010. + raw_spin_unlock(&tsk->pi_lock);
  16011. + }
  16012. + }
  16013. +
  16014. + if (warnpending) {
  16015. + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
  16016. + warnpending);
  16017. + rate_limit++;
  16018. + }
  16019. +}
  16020. +# else
  16021. +/*
  16022. + * On !PREEMPT_RT we just printk rate limited:
  16023. + */
  16024. +void softirq_check_pending_idle(void)
  16025. +{
  16026. + static int rate_limit;
  16027. +
  16028. + if (rate_limit < 10 &&
  16029. + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  16030. + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
  16031. + local_softirq_pending());
  16032. + rate_limit++;
  16033. + }
  16034. +}
  16035. +# endif
  16036. +
  16037. +#else /* !CONFIG_NO_HZ_COMMON */
  16038. +static inline void softirq_set_runner(unsigned int sirq) { }
  16039. +static inline void softirq_clr_runner(unsigned int sirq) { }
  16040. +#endif
  16041. +
  16042. /*
  16043. * we cannot loop indefinitely here to avoid userspace starvation,
  16044. * but we also don't want to introduce a worst case 1/HZ latency
  16045. @@ -77,6 +171,70 @@
  16046. wake_up_process(tsk);
  16047. }
  16048. +static void handle_softirq(unsigned int vec_nr)
  16049. +{
  16050. + struct softirq_action *h = softirq_vec + vec_nr;
  16051. + int prev_count;
  16052. +
  16053. + prev_count = preempt_count();
  16054. +
  16055. + kstat_incr_softirqs_this_cpu(vec_nr);
  16056. +
  16057. + trace_softirq_entry(vec_nr);
  16058. + h->action(h);
  16059. + trace_softirq_exit(vec_nr);
  16060. + if (unlikely(prev_count != preempt_count())) {
  16061. + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
  16062. + vec_nr, softirq_to_name[vec_nr], h->action,
  16063. + prev_count, preempt_count());
  16064. + preempt_count_set(prev_count);
  16065. + }
  16066. +}
  16067. +
  16068. +#ifndef CONFIG_PREEMPT_RT_FULL
  16069. +static inline int ksoftirqd_softirq_pending(void)
  16070. +{
  16071. + return local_softirq_pending();
  16072. +}
  16073. +
  16074. +static void handle_pending_softirqs(u32 pending, int need_rcu_bh_qs)
  16075. +{
  16076. + struct softirq_action *h = softirq_vec;
  16077. + int softirq_bit;
  16078. +
  16079. + local_irq_enable();
  16080. +
  16081. + h = softirq_vec;
  16082. +
  16083. + while ((softirq_bit = ffs(pending))) {
  16084. + unsigned int vec_nr;
  16085. +
  16086. + h += softirq_bit - 1;
  16087. + vec_nr = h - softirq_vec;
  16088. + handle_softirq(vec_nr);
  16089. +
  16090. + h++;
  16091. + pending >>= softirq_bit;
  16092. + }
  16093. +
  16094. + if (need_rcu_bh_qs)
  16095. + rcu_bh_qs();
  16096. + local_irq_disable();
  16097. +}
  16098. +
  16099. +static void run_ksoftirqd(unsigned int cpu)
  16100. +{
  16101. + local_irq_disable();
  16102. + if (ksoftirqd_softirq_pending()) {
  16103. + __do_softirq();
  16104. + rcu_note_context_switch(cpu);
  16105. + local_irq_enable();
  16106. + cond_resched();
  16107. + return;
  16108. + }
  16109. + local_irq_enable();
  16110. +}
  16111. +
  16112. /*
  16113. * preempt_count and SOFTIRQ_OFFSET usage:
  16114. * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
  16115. @@ -228,10 +386,8 @@
  16116. unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
  16117. unsigned long old_flags = current->flags;
  16118. int max_restart = MAX_SOFTIRQ_RESTART;
  16119. - struct softirq_action *h;
  16120. bool in_hardirq;
  16121. __u32 pending;
  16122. - int softirq_bit;
  16123. /*
  16124. * Mask out PF_MEMALLOC s current task context is borrowed for the
  16125. @@ -250,36 +406,7 @@
  16126. /* Reset the pending bitmask before enabling irqs */
  16127. set_softirq_pending(0);
  16128. - local_irq_enable();
  16129. -
  16130. - h = softirq_vec;
  16131. -
  16132. - while ((softirq_bit = ffs(pending))) {
  16133. - unsigned int vec_nr;
  16134. - int prev_count;
  16135. -
  16136. - h += softirq_bit - 1;
  16137. -
  16138. - vec_nr = h - softirq_vec;
  16139. - prev_count = preempt_count();
  16140. -
  16141. - kstat_incr_softirqs_this_cpu(vec_nr);
  16142. -
  16143. - trace_softirq_entry(vec_nr);
  16144. - h->action(h);
  16145. - trace_softirq_exit(vec_nr);
  16146. - if (unlikely(prev_count != preempt_count())) {
  16147. - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
  16148. - vec_nr, softirq_to_name[vec_nr], h->action,
  16149. - prev_count, preempt_count());
  16150. - preempt_count_set(prev_count);
  16151. - }
  16152. - h++;
  16153. - pending >>= softirq_bit;
  16154. - }
  16155. -
  16156. - rcu_bh_qs();
  16157. - local_irq_disable();
  16158. + handle_pending_softirqs(pending, 1);
  16159. pending = local_softirq_pending();
  16160. if (pending) {
  16161. @@ -316,6 +443,285 @@
  16162. }
  16163. /*
  16164. + * This function must run with irqs disabled!
  16165. + */
  16166. +void raise_softirq_irqoff(unsigned int nr)
  16167. +{
  16168. + __raise_softirq_irqoff(nr);
  16169. +
  16170. + /*
  16171. + * If we're in an interrupt or softirq, we're done
  16172. + * (this also catches softirq-disabled code). We will
  16173. + * actually run the softirq once we return from
  16174. + * the irq or softirq.
  16175. + *
  16176. + * Otherwise we wake up ksoftirqd to make sure we
  16177. + * schedule the softirq soon.
  16178. + */
  16179. + if (!in_interrupt())
  16180. + wakeup_softirqd();
  16181. +}
  16182. +
  16183. +void __raise_softirq_irqoff(unsigned int nr)
  16184. +{
  16185. + trace_softirq_raise(nr);
  16186. + or_softirq_pending(1UL << nr);
  16187. +}
  16188. +
  16189. +static inline void local_bh_disable_nort(void) { local_bh_disable(); }
  16190. +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
  16191. +static void ksoftirqd_set_sched_params(unsigned int cpu) { }
  16192. +static void ksoftirqd_clr_sched_params(unsigned int cpu, bool online) { }
  16193. +
  16194. +#else /* !PREEMPT_RT_FULL */
  16195. +
  16196. +/*
  16197. + * On RT we serialize softirq execution with a cpu local lock per softirq
  16198. + */
  16199. +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks);
  16200. +
  16201. +void __init softirq_early_init(void)
  16202. +{
  16203. + int i;
  16204. +
  16205. + for (i = 0; i < NR_SOFTIRQS; i++)
  16206. + local_irq_lock_init(local_softirq_locks[i]);
  16207. +}
  16208. +
  16209. +static void lock_softirq(int which)
  16210. +{
  16211. + local_lock(local_softirq_locks[which]);
  16212. +}
  16213. +
  16214. +static void unlock_softirq(int which)
  16215. +{
  16216. + local_unlock(local_softirq_locks[which]);
  16217. +}
  16218. +
  16219. +static void do_single_softirq(int which, int need_rcu_bh_qs)
  16220. +{
  16221. + unsigned long old_flags = current->flags;
  16222. +
  16223. + current->flags &= ~PF_MEMALLOC;
  16224. + vtime_account_irq_enter(current);
  16225. + current->flags |= PF_IN_SOFTIRQ;
  16226. + lockdep_softirq_enter();
  16227. + local_irq_enable();
  16228. + handle_softirq(which);
  16229. + local_irq_disable();
  16230. + lockdep_softirq_exit();
  16231. + current->flags &= ~PF_IN_SOFTIRQ;
  16232. + vtime_account_irq_enter(current);
  16233. + tsk_restore_flags(current, old_flags, PF_MEMALLOC);
  16234. +}
  16235. +
  16236. +/*
  16237. + * Called with interrupts disabled. Process softirqs which were raised
  16238. + * in current context (or on behalf of ksoftirqd).
  16239. + */
  16240. +static void do_current_softirqs(int need_rcu_bh_qs)
  16241. +{
  16242. + while (current->softirqs_raised) {
  16243. + int i = __ffs(current->softirqs_raised);
  16244. + unsigned int pending, mask = (1U << i);
  16245. +
  16246. + current->softirqs_raised &= ~mask;
  16247. + local_irq_enable();
  16248. +
  16249. + /*
  16250. + * If the lock is contended, we boost the owner to
  16251. + * process the softirq or leave the critical section
  16252. + * now.
  16253. + */
  16254. + lock_softirq(i);
  16255. + local_irq_disable();
  16256. + softirq_set_runner(i);
  16257. + /*
  16258. + * Check with the local_softirq_pending() bits,
  16259. + * whether we need to process this still or if someone
  16260. + * else took care of it.
  16261. + */
  16262. + pending = local_softirq_pending();
  16263. + if (pending & mask) {
  16264. + set_softirq_pending(pending & ~mask);
  16265. + do_single_softirq(i, need_rcu_bh_qs);
  16266. + }
  16267. + softirq_clr_runner(i);
  16268. + unlock_softirq(i);
  16269. + WARN_ON(current->softirq_nestcnt != 1);
  16270. + }
  16271. +}
  16272. +
  16273. +static void __local_bh_disable(void)
  16274. +{
  16275. + if (++current->softirq_nestcnt == 1)
  16276. + migrate_disable();
  16277. +}
  16278. +
  16279. +void local_bh_disable(void)
  16280. +{
  16281. + __local_bh_disable();
  16282. +}
  16283. +EXPORT_SYMBOL(local_bh_disable);
  16284. +
  16285. +void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
  16286. +{
  16287. + __local_bh_disable();
  16288. + if (cnt & PREEMPT_CHECK_OFFSET)
  16289. + preempt_disable();
  16290. +}
  16291. +
  16292. +static void __local_bh_enable(void)
  16293. +{
  16294. + if (WARN_ON(current->softirq_nestcnt == 0))
  16295. + return;
  16296. +
  16297. + local_irq_disable();
  16298. + if (current->softirq_nestcnt == 1 && current->softirqs_raised)
  16299. + do_current_softirqs(1);
  16300. + local_irq_enable();
  16301. +
  16302. + if (--current->softirq_nestcnt == 0)
  16303. + migrate_enable();
  16304. +}
  16305. +
  16306. +void local_bh_enable(void)
  16307. +{
  16308. + __local_bh_enable();
  16309. +}
  16310. +EXPORT_SYMBOL(local_bh_enable);
  16311. +
  16312. +extern void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
  16313. +{
  16314. + __local_bh_enable();
  16315. + if (cnt & PREEMPT_CHECK_OFFSET)
  16316. + preempt_enable();
  16317. +}
  16318. +
  16319. +void local_bh_enable_ip(unsigned long ip)
  16320. +{
  16321. + local_bh_enable();
  16322. +}
  16323. +EXPORT_SYMBOL(local_bh_enable_ip);
  16324. +
  16325. +void _local_bh_enable(void)
  16326. +{
  16327. + if (WARN_ON(current->softirq_nestcnt == 0))
  16328. + return;
  16329. + if (--current->softirq_nestcnt == 0)
  16330. + migrate_enable();
  16331. +}
  16332. +EXPORT_SYMBOL(_local_bh_enable);
  16333. +
  16334. +int in_serving_softirq(void)
  16335. +{
  16336. + return current->flags & PF_IN_SOFTIRQ;
  16337. +}
  16338. +EXPORT_SYMBOL(in_serving_softirq);
  16339. +
  16340. +/* Called with preemption disabled */
  16341. +static void run_ksoftirqd(unsigned int cpu)
  16342. +{
  16343. + local_irq_disable();
  16344. + current->softirq_nestcnt++;
  16345. +
  16346. + do_current_softirqs(1);
  16347. + current->softirq_nestcnt--;
  16348. + rcu_note_context_switch(cpu);
  16349. + local_irq_enable();
  16350. +}
  16351. +
  16352. +/*
  16353. + * Called from netif_rx_ni(). Preemption enabled, but migration
  16354. + * disabled. So the cpu can't go away under us.
  16355. + */
  16356. +void thread_do_softirq(void)
  16357. +{
  16358. + if (!in_serving_softirq() && current->softirqs_raised) {
  16359. + current->softirq_nestcnt++;
  16360. + do_current_softirqs(0);
  16361. + current->softirq_nestcnt--;
  16362. + }
  16363. +}
  16364. +
  16365. +static void do_raise_softirq_irqoff(unsigned int nr)
  16366. +{
  16367. + trace_softirq_raise(nr);
  16368. + or_softirq_pending(1UL << nr);
  16369. +
  16370. + /*
  16371. + * If we are not in a hard interrupt and inside a bh disabled
  16372. + * region, we simply raise the flag on current. local_bh_enable()
  16373. + * will make sure that the softirq is executed. Otherwise we
  16374. + * delegate it to ksoftirqd.
  16375. + */
  16376. + if (!in_irq() && current->softirq_nestcnt)
  16377. + current->softirqs_raised |= (1U << nr);
  16378. + else if (__this_cpu_read(ksoftirqd))
  16379. + __this_cpu_read(ksoftirqd)->softirqs_raised |= (1U << nr);
  16380. +}
  16381. +
  16382. +void __raise_softirq_irqoff(unsigned int nr)
  16383. +{
  16384. + do_raise_softirq_irqoff(nr);
  16385. + if (!in_irq() && !current->softirq_nestcnt)
  16386. + wakeup_softirqd();
  16387. +}
  16388. +
  16389. +/*
  16390. + * This function must run with irqs disabled!
  16391. + */
  16392. +void raise_softirq_irqoff(unsigned int nr)
  16393. +{
  16394. + do_raise_softirq_irqoff(nr);
  16395. +
  16396. + /*
  16397. + * If we're in an hard interrupt we let irq return code deal
  16398. + * with the wakeup of ksoftirqd.
  16399. + */
  16400. + if (in_irq())
  16401. + return;
  16402. + /*
  16403. + * If we are in thread context but outside of a bh disabled
  16404. + * region, we need to wake ksoftirqd as well.
  16405. + *
  16406. + * CHECKME: Some of the places which do that could be wrapped
  16407. + * into local_bh_disable/enable pairs. Though it's unclear
  16408. + * whether this is worth the effort. To find those places just
  16409. + * raise a WARN() if the condition is met.
  16410. + */
  16411. + if (!current->softirq_nestcnt)
  16412. + wakeup_softirqd();
  16413. +}
  16414. +
  16415. +static inline int ksoftirqd_softirq_pending(void)
  16416. +{
  16417. + return current->softirqs_raised;
  16418. +}
  16419. +
  16420. +static inline void local_bh_disable_nort(void) { }
  16421. +static inline void _local_bh_enable_nort(void) { }
  16422. +
  16423. +static inline void ksoftirqd_set_sched_params(unsigned int cpu)
  16424. +{
  16425. + struct sched_param param = { .sched_priority = 1 };
  16426. +
  16427. + sched_setscheduler(current, SCHED_FIFO, &param);
  16428. + /* Take over all pending softirqs when starting */
  16429. + local_irq_disable();
  16430. + current->softirqs_raised = local_softirq_pending();
  16431. + local_irq_enable();
  16432. +}
  16433. +
  16434. +static inline void ksoftirqd_clr_sched_params(unsigned int cpu, bool online)
  16435. +{
  16436. + struct sched_param param = { .sched_priority = 0 };
  16437. +
  16438. + sched_setscheduler(current, SCHED_NORMAL, &param);
  16439. +}
  16440. +
  16441. +#endif /* PREEMPT_RT_FULL */
  16442. +/*
  16443. * Enter an interrupt context.
  16444. */
  16445. void irq_enter(void)
  16446. @@ -326,9 +732,9 @@
  16447. * Prevent raise_softirq from needlessly waking up ksoftirqd
  16448. * here, as softirq will be serviced on return from interrupt.
  16449. */
  16450. - local_bh_disable();
  16451. + local_bh_disable_nort();
  16452. tick_irq_enter();
  16453. - _local_bh_enable();
  16454. + _local_bh_enable_nort();
  16455. }
  16456. __irq_enter();
  16457. @@ -336,6 +742,7 @@
  16458. static inline void invoke_softirq(void)
  16459. {
  16460. +#ifndef CONFIG_PREEMPT_RT_FULL
  16461. if (!force_irqthreads) {
  16462. #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
  16463. /*
  16464. @@ -355,6 +762,15 @@
  16465. } else {
  16466. wakeup_softirqd();
  16467. }
  16468. +#else /* PREEMPT_RT_FULL */
  16469. + unsigned long flags;
  16470. +
  16471. + local_irq_save(flags);
  16472. + if (__this_cpu_read(ksoftirqd) &&
  16473. + __this_cpu_read(ksoftirqd)->softirqs_raised)
  16474. + wakeup_softirqd();
  16475. + local_irq_restore(flags);
  16476. +#endif
  16477. }
  16478. static inline void tick_irq_exit(void)
  16479. @@ -391,26 +807,6 @@
  16480. trace_hardirq_exit(); /* must be last! */
  16481. }
  16482. -/*
  16483. - * This function must run with irqs disabled!
  16484. - */
  16485. -inline void raise_softirq_irqoff(unsigned int nr)
  16486. -{
  16487. - __raise_softirq_irqoff(nr);
  16488. -
  16489. - /*
  16490. - * If we're in an interrupt or softirq, we're done
  16491. - * (this also catches softirq-disabled code). We will
  16492. - * actually run the softirq once we return from
  16493. - * the irq or softirq.
  16494. - *
  16495. - * Otherwise we wake up ksoftirqd to make sure we
  16496. - * schedule the softirq soon.
  16497. - */
  16498. - if (!in_interrupt())
  16499. - wakeup_softirqd();
  16500. -}
  16501. -
  16502. void raise_softirq(unsigned int nr)
  16503. {
  16504. unsigned long flags;
  16505. @@ -420,12 +816,6 @@
  16506. local_irq_restore(flags);
  16507. }
  16508. -void __raise_softirq_irqoff(unsigned int nr)
  16509. -{
  16510. - trace_softirq_raise(nr);
  16511. - or_softirq_pending(1UL << nr);
  16512. -}
  16513. -
  16514. void open_softirq(int nr, void (*action)(struct softirq_action *))
  16515. {
  16516. softirq_vec[nr].action = action;
  16517. @@ -442,15 +832,45 @@
  16518. static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
  16519. static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
  16520. +static void inline
  16521. +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
  16522. +{
  16523. + if (tasklet_trylock(t)) {
  16524. +again:
  16525. + /* We may have been preempted before tasklet_trylock
  16526. + * and __tasklet_action may have already run.
  16527. + * So double check the sched bit while the takslet
  16528. + * is locked before adding it to the list.
  16529. + */
  16530. + if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
  16531. + t->next = NULL;
  16532. + *head->tail = t;
  16533. + head->tail = &(t->next);
  16534. + raise_softirq_irqoff(nr);
  16535. + tasklet_unlock(t);
  16536. + } else {
  16537. + /* This is subtle. If we hit the corner case above
  16538. + * It is possible that we get preempted right here,
  16539. + * and another task has successfully called
  16540. + * tasklet_schedule(), then this function, and
  16541. + * failed on the trylock. Thus we must be sure
  16542. + * before releasing the tasklet lock, that the
  16543. + * SCHED_BIT is clear. Otherwise the tasklet
  16544. + * may get its SCHED_BIT set, but not added to the
  16545. + * list
  16546. + */
  16547. + if (!tasklet_tryunlock(t))
  16548. + goto again;
  16549. + }
  16550. + }
  16551. +}
  16552. +
  16553. void __tasklet_schedule(struct tasklet_struct *t)
  16554. {
  16555. unsigned long flags;
  16556. local_irq_save(flags);
  16557. - t->next = NULL;
  16558. - *__this_cpu_read(tasklet_vec.tail) = t;
  16559. - __this_cpu_write(tasklet_vec.tail, &(t->next));
  16560. - raise_softirq_irqoff(TASKLET_SOFTIRQ);
  16561. + __tasklet_common_schedule(t, &__get_cpu_var(tasklet_vec), TASKLET_SOFTIRQ);
  16562. local_irq_restore(flags);
  16563. }
  16564. EXPORT_SYMBOL(__tasklet_schedule);
  16565. @@ -460,10 +880,7 @@
  16566. unsigned long flags;
  16567. local_irq_save(flags);
  16568. - t->next = NULL;
  16569. - *__this_cpu_read(tasklet_hi_vec.tail) = t;
  16570. - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
  16571. - raise_softirq_irqoff(HI_SOFTIRQ);
  16572. + __tasklet_common_schedule(t, &__get_cpu_var(tasklet_hi_vec), HI_SOFTIRQ);
  16573. local_irq_restore(flags);
  16574. }
  16575. EXPORT_SYMBOL(__tasklet_hi_schedule);
  16576. @@ -472,48 +889,116 @@
  16577. {
  16578. BUG_ON(!irqs_disabled());
  16579. - t->next = __this_cpu_read(tasklet_hi_vec.head);
  16580. - __this_cpu_write(tasklet_hi_vec.head, t);
  16581. - __raise_softirq_irqoff(HI_SOFTIRQ);
  16582. + __tasklet_hi_schedule(t);
  16583. }
  16584. EXPORT_SYMBOL(__tasklet_hi_schedule_first);
  16585. -static void tasklet_action(struct softirq_action *a)
  16586. +void tasklet_enable(struct tasklet_struct *t)
  16587. {
  16588. - struct tasklet_struct *list;
  16589. + if (!atomic_dec_and_test(&t->count))
  16590. + return;
  16591. + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
  16592. + tasklet_schedule(t);
  16593. +}
  16594. +EXPORT_SYMBOL(tasklet_enable);
  16595. - local_irq_disable();
  16596. - list = __this_cpu_read(tasklet_vec.head);
  16597. - __this_cpu_write(tasklet_vec.head, NULL);
  16598. - __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
  16599. - local_irq_enable();
  16600. +void tasklet_hi_enable(struct tasklet_struct *t)
  16601. +{
  16602. + if (!atomic_dec_and_test(&t->count))
  16603. + return;
  16604. + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
  16605. + tasklet_hi_schedule(t);
  16606. +}
  16607. +EXPORT_SYMBOL(tasklet_hi_enable);
  16608. +
  16609. +static void __tasklet_action(struct softirq_action *a,
  16610. + struct tasklet_struct *list)
  16611. +{
  16612. + int loops = 1000000;
  16613. while (list) {
  16614. struct tasklet_struct *t = list;
  16615. list = list->next;
  16616. - if (tasklet_trylock(t)) {
  16617. - if (!atomic_read(&t->count)) {
  16618. - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
  16619. - &t->state))
  16620. - BUG();
  16621. - t->func(t->data);
  16622. - tasklet_unlock(t);
  16623. - continue;
  16624. - }
  16625. - tasklet_unlock(t);
  16626. + /*
  16627. + * Should always succeed - after a tasklist got on the
  16628. + * list (after getting the SCHED bit set from 0 to 1),
  16629. + * nothing but the tasklet softirq it got queued to can
  16630. + * lock it:
  16631. + */
  16632. + if (!tasklet_trylock(t)) {
  16633. + WARN_ON(1);
  16634. + continue;
  16635. }
  16636. - local_irq_disable();
  16637. t->next = NULL;
  16638. - *__this_cpu_read(tasklet_vec.tail) = t;
  16639. - __this_cpu_write(tasklet_vec.tail, &(t->next));
  16640. - __raise_softirq_irqoff(TASKLET_SOFTIRQ);
  16641. - local_irq_enable();
  16642. +
  16643. + /*
  16644. + * If we cannot handle the tasklet because it's disabled,
  16645. + * mark it as pending. tasklet_enable() will later
  16646. + * re-schedule the tasklet.
  16647. + */
  16648. + if (unlikely(atomic_read(&t->count))) {
  16649. +out_disabled:
  16650. + /* implicit unlock: */
  16651. + wmb();
  16652. + t->state = TASKLET_STATEF_PENDING;
  16653. + continue;
  16654. + }
  16655. +
  16656. + /*
  16657. + * After this point on the tasklet might be rescheduled
  16658. + * on another CPU, but it can only be added to another
  16659. + * CPU's tasklet list if we unlock the tasklet (which we
  16660. + * dont do yet).
  16661. + */
  16662. + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
  16663. + WARN_ON(1);
  16664. +
  16665. +again:
  16666. + t->func(t->data);
  16667. +
  16668. + /*
  16669. + * Try to unlock the tasklet. We must use cmpxchg, because
  16670. + * another CPU might have scheduled or disabled the tasklet.
  16671. + * We only allow the STATE_RUN -> 0 transition here.
  16672. + */
  16673. + while (!tasklet_tryunlock(t)) {
  16674. + /*
  16675. + * If it got disabled meanwhile, bail out:
  16676. + */
  16677. + if (atomic_read(&t->count))
  16678. + goto out_disabled;
  16679. + /*
  16680. + * If it got scheduled meanwhile, re-execute
  16681. + * the tasklet function:
  16682. + */
  16683. + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
  16684. + goto again;
  16685. + if (!--loops) {
  16686. + printk("hm, tasklet state: %08lx\n", t->state);
  16687. + WARN_ON(1);
  16688. + tasklet_unlock(t);
  16689. + break;
  16690. + }
  16691. + }
  16692. }
  16693. }
  16694. +static void tasklet_action(struct softirq_action *a)
  16695. +{
  16696. + struct tasklet_struct *list;
  16697. +
  16698. + local_irq_disable();
  16699. + list = __get_cpu_var(tasklet_vec).head;
  16700. + __get_cpu_var(tasklet_vec).head = NULL;
  16701. + __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
  16702. + local_irq_enable();
  16703. +
  16704. + __tasklet_action(a, list);
  16705. +}
  16706. +
  16707. static void tasklet_hi_action(struct softirq_action *a)
  16708. {
  16709. struct tasklet_struct *list;
  16710. @@ -524,30 +1009,7 @@
  16711. __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
  16712. local_irq_enable();
  16713. - while (list) {
  16714. - struct tasklet_struct *t = list;
  16715. -
  16716. - list = list->next;
  16717. -
  16718. - if (tasklet_trylock(t)) {
  16719. - if (!atomic_read(&t->count)) {
  16720. - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
  16721. - &t->state))
  16722. - BUG();
  16723. - t->func(t->data);
  16724. - tasklet_unlock(t);
  16725. - continue;
  16726. - }
  16727. - tasklet_unlock(t);
  16728. - }
  16729. -
  16730. - local_irq_disable();
  16731. - t->next = NULL;
  16732. - *__this_cpu_read(tasklet_hi_vec.tail) = t;
  16733. - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
  16734. - __raise_softirq_irqoff(HI_SOFTIRQ);
  16735. - local_irq_enable();
  16736. - }
  16737. + __tasklet_action(a, list);
  16738. }
  16739. void tasklet_init(struct tasklet_struct *t,
  16740. @@ -568,7 +1030,7 @@
  16741. while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
  16742. do {
  16743. - yield();
  16744. + msleep(1);
  16745. } while (test_bit(TASKLET_STATE_SCHED, &t->state));
  16746. }
  16747. tasklet_unlock_wait(t);
  16748. @@ -642,26 +1104,26 @@
  16749. open_softirq(HI_SOFTIRQ, tasklet_hi_action);
  16750. }
  16751. -static int ksoftirqd_should_run(unsigned int cpu)
  16752. -{
  16753. - return local_softirq_pending();
  16754. -}
  16755. -
  16756. -static void run_ksoftirqd(unsigned int cpu)
  16757. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  16758. +void tasklet_unlock_wait(struct tasklet_struct *t)
  16759. {
  16760. - local_irq_disable();
  16761. - if (local_softirq_pending()) {
  16762. + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
  16763. /*
  16764. - * We can safely run softirq on inline stack, as we are not deep
  16765. - * in the task stack here.
  16766. + * Hack for now to avoid this busy-loop:
  16767. */
  16768. - __do_softirq();
  16769. - rcu_note_context_switch(cpu);
  16770. - local_irq_enable();
  16771. - cond_resched();
  16772. - return;
  16773. +#ifdef CONFIG_PREEMPT_RT_FULL
  16774. + msleep(1);
  16775. +#else
  16776. + barrier();
  16777. +#endif
  16778. }
  16779. - local_irq_enable();
  16780. +}
  16781. +EXPORT_SYMBOL(tasklet_unlock_wait);
  16782. +#endif
  16783. +
  16784. +static int ksoftirqd_should_run(unsigned int cpu)
  16785. +{
  16786. + return ksoftirqd_softirq_pending();
  16787. }
  16788. #ifdef CONFIG_HOTPLUG_CPU
  16789. @@ -743,6 +1205,8 @@
  16790. static struct smp_hotplug_thread softirq_threads = {
  16791. .store = &ksoftirqd,
  16792. + .setup = ksoftirqd_set_sched_params,
  16793. + .cleanup = ksoftirqd_clr_sched_params,
  16794. .thread_should_run = ksoftirqd_should_run,
  16795. .thread_fn = run_ksoftirqd,
  16796. .thread_comm = "ksoftirqd/%u",
  16797. diff -Nur linux-3.18.10.orig/kernel/stop_machine.c linux-3.18.10/kernel/stop_machine.c
  16798. --- linux-3.18.10.orig/kernel/stop_machine.c 2015-03-24 02:05:12.000000000 +0100
  16799. +++ linux-3.18.10/kernel/stop_machine.c 2015-03-26 12:42:18.683588345 +0100
  16800. @@ -30,12 +30,12 @@
  16801. atomic_t nr_todo; /* nr left to execute */
  16802. bool executed; /* actually executed? */
  16803. int ret; /* collected return value */
  16804. - struct completion completion; /* fired if nr_todo reaches 0 */
  16805. + struct task_struct *waiter; /* woken when nr_todo reaches 0 */
  16806. };
  16807. /* the actual stopper, one per every possible cpu, enabled on online cpus */
  16808. struct cpu_stopper {
  16809. - spinlock_t lock;
  16810. + raw_spinlock_t lock;
  16811. bool enabled; /* is this stopper enabled? */
  16812. struct list_head works; /* list of pending works */
  16813. };
  16814. @@ -56,7 +56,7 @@
  16815. {
  16816. memset(done, 0, sizeof(*done));
  16817. atomic_set(&done->nr_todo, nr_todo);
  16818. - init_completion(&done->completion);
  16819. + done->waiter = current;
  16820. }
  16821. /* signal completion unless @done is NULL */
  16822. @@ -65,8 +65,10 @@
  16823. if (done) {
  16824. if (executed)
  16825. done->executed = true;
  16826. - if (atomic_dec_and_test(&done->nr_todo))
  16827. - complete(&done->completion);
  16828. + if (atomic_dec_and_test(&done->nr_todo)) {
  16829. + wake_up_process(done->waiter);
  16830. + done->waiter = NULL;
  16831. + }
  16832. }
  16833. }
  16834. @@ -78,7 +80,7 @@
  16835. unsigned long flags;
  16836. - spin_lock_irqsave(&stopper->lock, flags);
  16837. + raw_spin_lock_irqsave(&stopper->lock, flags);
  16838. if (stopper->enabled) {
  16839. list_add_tail(&work->list, &stopper->works);
  16840. @@ -86,7 +88,23 @@
  16841. } else
  16842. cpu_stop_signal_done(work->done, false);
  16843. - spin_unlock_irqrestore(&stopper->lock, flags);
  16844. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  16845. +}
  16846. +
  16847. +static void wait_for_stop_done(struct cpu_stop_done *done)
  16848. +{
  16849. + set_current_state(TASK_UNINTERRUPTIBLE);
  16850. + while (atomic_read(&done->nr_todo)) {
  16851. + schedule();
  16852. + set_current_state(TASK_UNINTERRUPTIBLE);
  16853. + }
  16854. + /*
  16855. + * We need to wait until cpu_stop_signal_done() has cleared
  16856. + * done->waiter.
  16857. + */
  16858. + while (done->waiter)
  16859. + cpu_relax();
  16860. + set_current_state(TASK_RUNNING);
  16861. }
  16862. /**
  16863. @@ -120,7 +138,7 @@
  16864. cpu_stop_init_done(&done, 1);
  16865. cpu_stop_queue_work(cpu, &work);
  16866. - wait_for_completion(&done.completion);
  16867. + wait_for_stop_done(&done);
  16868. return done.executed ? done.ret : -ENOENT;
  16869. }
  16870. @@ -248,7 +266,7 @@
  16871. struct irq_cpu_stop_queue_work_info call_args;
  16872. struct multi_stop_data msdata;
  16873. - preempt_disable();
  16874. + preempt_disable_nort();
  16875. msdata = (struct multi_stop_data){
  16876. .fn = fn,
  16877. .data = arg,
  16878. @@ -281,7 +299,7 @@
  16879. * This relies on the stopper workqueues to be FIFO.
  16880. */
  16881. if (!cpu_active(cpu1) || !cpu_active(cpu2)) {
  16882. - preempt_enable();
  16883. + preempt_enable_nort();
  16884. return -ENOENT;
  16885. }
  16886. @@ -295,9 +313,9 @@
  16887. &irq_cpu_stop_queue_work,
  16888. &call_args, 1);
  16889. lg_local_unlock(&stop_cpus_lock);
  16890. - preempt_enable();
  16891. + preempt_enable_nort();
  16892. - wait_for_completion(&done.completion);
  16893. + wait_for_stop_done(&done);
  16894. return done.executed ? done.ret : -ENOENT;
  16895. }
  16896. @@ -329,7 +347,7 @@
  16897. static void queue_stop_cpus_work(const struct cpumask *cpumask,
  16898. cpu_stop_fn_t fn, void *arg,
  16899. - struct cpu_stop_done *done)
  16900. + struct cpu_stop_done *done, bool inactive)
  16901. {
  16902. struct cpu_stop_work *work;
  16903. unsigned int cpu;
  16904. @@ -343,11 +361,13 @@
  16905. }
  16906. /*
  16907. - * Disable preemption while queueing to avoid getting
  16908. - * preempted by a stopper which might wait for other stoppers
  16909. - * to enter @fn which can lead to deadlock.
  16910. + * Make sure that all work is queued on all cpus before
  16911. + * any of the cpus can execute it.
  16912. */
  16913. - lg_global_lock(&stop_cpus_lock);
  16914. + if (!inactive)
  16915. + lg_global_lock(&stop_cpus_lock);
  16916. + else
  16917. + lg_global_trylock_relax(&stop_cpus_lock);
  16918. for_each_cpu(cpu, cpumask)
  16919. cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
  16920. lg_global_unlock(&stop_cpus_lock);
  16921. @@ -359,8 +379,8 @@
  16922. struct cpu_stop_done done;
  16923. cpu_stop_init_done(&done, cpumask_weight(cpumask));
  16924. - queue_stop_cpus_work(cpumask, fn, arg, &done);
  16925. - wait_for_completion(&done.completion);
  16926. + queue_stop_cpus_work(cpumask, fn, arg, &done, false);
  16927. + wait_for_stop_done(&done);
  16928. return done.executed ? done.ret : -ENOENT;
  16929. }
  16930. @@ -439,9 +459,9 @@
  16931. unsigned long flags;
  16932. int run;
  16933. - spin_lock_irqsave(&stopper->lock, flags);
  16934. + raw_spin_lock_irqsave(&stopper->lock, flags);
  16935. run = !list_empty(&stopper->works);
  16936. - spin_unlock_irqrestore(&stopper->lock, flags);
  16937. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  16938. return run;
  16939. }
  16940. @@ -453,13 +473,13 @@
  16941. repeat:
  16942. work = NULL;
  16943. - spin_lock_irq(&stopper->lock);
  16944. + raw_spin_lock_irq(&stopper->lock);
  16945. if (!list_empty(&stopper->works)) {
  16946. work = list_first_entry(&stopper->works,
  16947. struct cpu_stop_work, list);
  16948. list_del_init(&work->list);
  16949. }
  16950. - spin_unlock_irq(&stopper->lock);
  16951. + raw_spin_unlock_irq(&stopper->lock);
  16952. if (work) {
  16953. cpu_stop_fn_t fn = work->fn;
  16954. @@ -467,6 +487,16 @@
  16955. struct cpu_stop_done *done = work->done;
  16956. char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
  16957. + /*
  16958. + * Wait until the stopper finished scheduling on all
  16959. + * cpus
  16960. + */
  16961. + lg_global_lock(&stop_cpus_lock);
  16962. + /*
  16963. + * Let other cpu threads continue as well
  16964. + */
  16965. + lg_global_unlock(&stop_cpus_lock);
  16966. +
  16967. /* cpu stop callbacks are not allowed to sleep */
  16968. preempt_disable();
  16969. @@ -481,7 +511,13 @@
  16970. kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
  16971. ksym_buf), arg);
  16972. + /*
  16973. + * Make sure that the wakeup and setting done->waiter
  16974. + * to NULL is atomic.
  16975. + */
  16976. + local_irq_disable();
  16977. cpu_stop_signal_done(done, true);
  16978. + local_irq_enable();
  16979. goto repeat;
  16980. }
  16981. }
  16982. @@ -500,20 +536,20 @@
  16983. unsigned long flags;
  16984. /* drain remaining works */
  16985. - spin_lock_irqsave(&stopper->lock, flags);
  16986. + raw_spin_lock_irqsave(&stopper->lock, flags);
  16987. list_for_each_entry(work, &stopper->works, list)
  16988. cpu_stop_signal_done(work->done, false);
  16989. stopper->enabled = false;
  16990. - spin_unlock_irqrestore(&stopper->lock, flags);
  16991. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  16992. }
  16993. static void cpu_stop_unpark(unsigned int cpu)
  16994. {
  16995. struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
  16996. - spin_lock_irq(&stopper->lock);
  16997. + raw_spin_lock_irq(&stopper->lock);
  16998. stopper->enabled = true;
  16999. - spin_unlock_irq(&stopper->lock);
  17000. + raw_spin_unlock_irq(&stopper->lock);
  17001. }
  17002. static struct smp_hotplug_thread cpu_stop_threads = {
  17003. @@ -535,10 +571,12 @@
  17004. for_each_possible_cpu(cpu) {
  17005. struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
  17006. - spin_lock_init(&stopper->lock);
  17007. + raw_spin_lock_init(&stopper->lock);
  17008. INIT_LIST_HEAD(&stopper->works);
  17009. }
  17010. + lg_lock_init(&stop_cpus_lock, "stop_cpus_lock");
  17011. +
  17012. BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
  17013. stop_machine_initialized = true;
  17014. return 0;
  17015. @@ -634,11 +672,11 @@
  17016. set_state(&msdata, MULTI_STOP_PREPARE);
  17017. cpu_stop_init_done(&done, num_active_cpus());
  17018. queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
  17019. - &done);
  17020. + &done, true);
  17021. ret = multi_cpu_stop(&msdata);
  17022. /* Busy wait for completion. */
  17023. - while (!completion_done(&done.completion))
  17024. + while (atomic_read(&done.nr_todo))
  17025. cpu_relax();
  17026. mutex_unlock(&stop_cpus_mutex);
  17027. diff -Nur linux-3.18.10.orig/kernel/time/hrtimer.c linux-3.18.10/kernel/time/hrtimer.c
  17028. --- linux-3.18.10.orig/kernel/time/hrtimer.c 2015-03-24 02:05:12.000000000 +0100
  17029. +++ linux-3.18.10/kernel/time/hrtimer.c 2015-03-26 12:42:18.683588345 +0100
  17030. @@ -48,11 +48,13 @@
  17031. #include <linux/sched/rt.h>
  17032. #include <linux/sched/deadline.h>
  17033. #include <linux/timer.h>
  17034. +#include <linux/kthread.h>
  17035. #include <linux/freezer.h>
  17036. #include <asm/uaccess.h>
  17037. #include <trace/events/timer.h>
  17038. +#include <trace/events/hist.h>
  17039. #include "timekeeping.h"
  17040. @@ -568,8 +570,7 @@
  17041. * When the callback is running, we do not reprogram the clock event
  17042. * device. The timer callback is either running on a different CPU or
  17043. * the callback is executed in the hrtimer_interrupt context. The
  17044. - * reprogramming is handled either by the softirq, which called the
  17045. - * callback or at the end of the hrtimer_interrupt.
  17046. + * reprogramming is handled at the end of the hrtimer_interrupt.
  17047. */
  17048. if (hrtimer_callback_running(timer))
  17049. return 0;
  17050. @@ -604,6 +605,9 @@
  17051. return res;
  17052. }
  17053. +static void __run_hrtimer(struct hrtimer *timer, ktime_t *now);
  17054. +static int hrtimer_rt_defer(struct hrtimer *timer);
  17055. +
  17056. /*
  17057. * Initialize the high resolution related parts of cpu_base
  17058. */
  17059. @@ -613,6 +617,21 @@
  17060. base->hres_active = 0;
  17061. }
  17062. +static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
  17063. + struct hrtimer_clock_base *base,
  17064. + int wakeup)
  17065. +{
  17066. + if (!hrtimer_reprogram(timer, base))
  17067. + return 0;
  17068. + if (!wakeup)
  17069. + return -ETIME;
  17070. +#ifdef CONFIG_PREEMPT_RT_BASE
  17071. + if (!hrtimer_rt_defer(timer))
  17072. + return -ETIME;
  17073. +#endif
  17074. + return 1;
  17075. +}
  17076. +
  17077. static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
  17078. {
  17079. ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
  17080. @@ -678,6 +697,44 @@
  17081. static DECLARE_WORK(hrtimer_work, clock_was_set_work);
  17082. +#ifdef CONFIG_PREEMPT_RT_FULL
  17083. +/*
  17084. + * RT can not call schedule_work from real interrupt context.
  17085. + * Need to make a thread to do the real work.
  17086. + */
  17087. +static struct task_struct *clock_set_delay_thread;
  17088. +static bool do_clock_set_delay;
  17089. +
  17090. +static int run_clock_set_delay(void *ignore)
  17091. +{
  17092. + while (!kthread_should_stop()) {
  17093. + set_current_state(TASK_INTERRUPTIBLE);
  17094. + if (do_clock_set_delay) {
  17095. + do_clock_set_delay = false;
  17096. + schedule_work(&hrtimer_work);
  17097. + }
  17098. + schedule();
  17099. + }
  17100. + __set_current_state(TASK_RUNNING);
  17101. + return 0;
  17102. +}
  17103. +
  17104. +void clock_was_set_delayed(void)
  17105. +{
  17106. + do_clock_set_delay = true;
  17107. + /* Make visible before waking up process */
  17108. + smp_wmb();
  17109. + wake_up_process(clock_set_delay_thread);
  17110. +}
  17111. +
  17112. +static __init int create_clock_set_delay_thread(void)
  17113. +{
  17114. + clock_set_delay_thread = kthread_run(run_clock_set_delay, NULL, "kclksetdelayd");
  17115. + BUG_ON(!clock_set_delay_thread);
  17116. + return 0;
  17117. +}
  17118. +early_initcall(create_clock_set_delay_thread);
  17119. +#else /* PREEMPT_RT_FULL */
  17120. /*
  17121. * Called from timekeeping and resume code to reprogramm the hrtimer
  17122. * interrupt device on all cpus.
  17123. @@ -686,6 +743,7 @@
  17124. {
  17125. schedule_work(&hrtimer_work);
  17126. }
  17127. +#endif
  17128. #else
  17129. @@ -694,6 +752,13 @@
  17130. static inline int hrtimer_switch_to_hres(void) { return 0; }
  17131. static inline void
  17132. hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
  17133. +static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
  17134. + struct hrtimer_clock_base *base,
  17135. + int wakeup)
  17136. +{
  17137. + return 0;
  17138. +}
  17139. +
  17140. static inline int hrtimer_reprogram(struct hrtimer *timer,
  17141. struct hrtimer_clock_base *base)
  17142. {
  17143. @@ -701,7 +766,6 @@
  17144. }
  17145. static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
  17146. static inline void retrigger_next_event(void *arg) { }
  17147. -
  17148. #endif /* CONFIG_HIGH_RES_TIMERS */
  17149. /*
  17150. @@ -819,6 +883,32 @@
  17151. }
  17152. EXPORT_SYMBOL_GPL(hrtimer_forward);
  17153. +#ifdef CONFIG_PREEMPT_RT_BASE
  17154. +# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
  17155. +
  17156. +/**
  17157. + * hrtimer_wait_for_timer - Wait for a running timer
  17158. + *
  17159. + * @timer: timer to wait for
  17160. + *
  17161. + * The function waits in case the timers callback function is
  17162. + * currently executed on the waitqueue of the timer base. The
  17163. + * waitqueue is woken up after the timer callback function has
  17164. + * finished execution.
  17165. + */
  17166. +void hrtimer_wait_for_timer(const struct hrtimer *timer)
  17167. +{
  17168. + struct hrtimer_clock_base *base = timer->base;
  17169. +
  17170. + if (base && base->cpu_base && !timer->irqsafe)
  17171. + wait_event(base->cpu_base->wait,
  17172. + !(timer->state & HRTIMER_STATE_CALLBACK));
  17173. +}
  17174. +
  17175. +#else
  17176. +# define wake_up_timer_waiters(b) do { } while (0)
  17177. +#endif
  17178. +
  17179. /*
  17180. * enqueue_hrtimer - internal function to (re)start a timer
  17181. *
  17182. @@ -862,6 +952,11 @@
  17183. if (!(timer->state & HRTIMER_STATE_ENQUEUED))
  17184. goto out;
  17185. + if (unlikely(!list_empty(&timer->cb_entry))) {
  17186. + list_del_init(&timer->cb_entry);
  17187. + goto out;
  17188. + }
  17189. +
  17190. next_timer = timerqueue_getnext(&base->active);
  17191. timerqueue_del(&base->active, &timer->node);
  17192. if (&timer->node == next_timer) {
  17193. @@ -949,7 +1044,16 @@
  17194. new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
  17195. timer_stats_hrtimer_set_start_info(timer);
  17196. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  17197. + {
  17198. + ktime_t now = new_base->get_time();
  17199. + if (ktime_to_ns(tim) < ktime_to_ns(now))
  17200. + timer->praecox = now;
  17201. + else
  17202. + timer->praecox = ktime_set(0, 0);
  17203. + }
  17204. +#endif
  17205. leftmost = enqueue_hrtimer(timer, new_base);
  17206. if (!leftmost) {
  17207. @@ -963,15 +1067,26 @@
  17208. * on dynticks target.
  17209. */
  17210. wake_up_nohz_cpu(new_base->cpu_base->cpu);
  17211. - } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases) &&
  17212. - hrtimer_reprogram(timer, new_base)) {
  17213. + } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases)) {
  17214. +
  17215. + ret = hrtimer_enqueue_reprogram(timer, new_base, wakeup);
  17216. + if (ret < 0) {
  17217. + /*
  17218. + * In case we failed to reprogram the timer (mostly
  17219. + * because out current timer is already elapsed),
  17220. + * remove it again and report a failure. This avoids
  17221. + * stale base->first entries.
  17222. + */
  17223. + debug_deactivate(timer);
  17224. + __remove_hrtimer(timer, new_base,
  17225. + timer->state & HRTIMER_STATE_CALLBACK, 0);
  17226. + } else if (ret > 0) {
  17227. /*
  17228. * Only allow reprogramming if the new base is on this CPU.
  17229. * (it might still be on another CPU if the timer was pending)
  17230. *
  17231. * XXX send_remote_softirq() ?
  17232. */
  17233. - if (wakeup) {
  17234. /*
  17235. * We need to drop cpu_base->lock to avoid a
  17236. * lock ordering issue vs. rq->lock.
  17237. @@ -979,9 +1094,7 @@
  17238. raw_spin_unlock(&new_base->cpu_base->lock);
  17239. raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  17240. local_irq_restore(flags);
  17241. - return ret;
  17242. - } else {
  17243. - __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  17244. + return 0;
  17245. }
  17246. }
  17247. @@ -1072,7 +1185,7 @@
  17248. if (ret >= 0)
  17249. return ret;
  17250. - cpu_relax();
  17251. + hrtimer_wait_for_timer(timer);
  17252. }
  17253. }
  17254. EXPORT_SYMBOL_GPL(hrtimer_cancel);
  17255. @@ -1151,6 +1264,7 @@
  17256. base = hrtimer_clockid_to_base(clock_id);
  17257. timer->base = &cpu_base->clock_base[base];
  17258. + INIT_LIST_HEAD(&timer->cb_entry);
  17259. timerqueue_init(&timer->node);
  17260. #ifdef CONFIG_TIMER_STATS
  17261. @@ -1234,6 +1348,126 @@
  17262. timer->state &= ~HRTIMER_STATE_CALLBACK;
  17263. }
  17264. +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
  17265. +
  17266. +#ifdef CONFIG_PREEMPT_RT_BASE
  17267. +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
  17268. + struct hrtimer_clock_base *base)
  17269. +{
  17270. + /*
  17271. + * Note, we clear the callback flag before we requeue the
  17272. + * timer otherwise we trigger the callback_running() check
  17273. + * in hrtimer_reprogram().
  17274. + */
  17275. + timer->state &= ~HRTIMER_STATE_CALLBACK;
  17276. +
  17277. + if (restart != HRTIMER_NORESTART) {
  17278. + BUG_ON(hrtimer_active(timer));
  17279. + /*
  17280. + * Enqueue the timer, if it's the leftmost timer then
  17281. + * we need to reprogram it.
  17282. + */
  17283. + if (!enqueue_hrtimer(timer, base))
  17284. + return;
  17285. +
  17286. +#ifndef CONFIG_HIGH_RES_TIMERS
  17287. + }
  17288. +#else
  17289. + if (base->cpu_base->hres_active &&
  17290. + hrtimer_reprogram(timer, base))
  17291. + goto requeue;
  17292. +
  17293. + } else if (hrtimer_active(timer)) {
  17294. + /*
  17295. + * If the timer was rearmed on another CPU, reprogram
  17296. + * the event device.
  17297. + */
  17298. + if (&timer->node == base->active.next &&
  17299. + base->cpu_base->hres_active &&
  17300. + hrtimer_reprogram(timer, base))
  17301. + goto requeue;
  17302. + }
  17303. + return;
  17304. +
  17305. +requeue:
  17306. + /*
  17307. + * Timer is expired. Thus move it from tree to pending list
  17308. + * again.
  17309. + */
  17310. + __remove_hrtimer(timer, base, timer->state, 0);
  17311. + list_add_tail(&timer->cb_entry, &base->expired);
  17312. +#endif
  17313. +}
  17314. +
  17315. +/*
  17316. + * The changes in mainline which removed the callback modes from
  17317. + * hrtimer are not yet working with -rt. The non wakeup_process()
  17318. + * based callbacks which involve sleeping locks need to be treated
  17319. + * seperately.
  17320. + */
  17321. +static void hrtimer_rt_run_pending(void)
  17322. +{
  17323. + enum hrtimer_restart (*fn)(struct hrtimer *);
  17324. + struct hrtimer_cpu_base *cpu_base;
  17325. + struct hrtimer_clock_base *base;
  17326. + struct hrtimer *timer;
  17327. + int index, restart;
  17328. +
  17329. + local_irq_disable();
  17330. + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
  17331. +
  17332. + raw_spin_lock(&cpu_base->lock);
  17333. +
  17334. + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
  17335. + base = &cpu_base->clock_base[index];
  17336. +
  17337. + while (!list_empty(&base->expired)) {
  17338. + timer = list_first_entry(&base->expired,
  17339. + struct hrtimer, cb_entry);
  17340. +
  17341. + /*
  17342. + * Same as the above __run_hrtimer function
  17343. + * just we run with interrupts enabled.
  17344. + */
  17345. + debug_hrtimer_deactivate(timer);
  17346. + __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
  17347. + timer_stats_account_hrtimer(timer);
  17348. + fn = timer->function;
  17349. +
  17350. + raw_spin_unlock_irq(&cpu_base->lock);
  17351. + restart = fn(timer);
  17352. + raw_spin_lock_irq(&cpu_base->lock);
  17353. +
  17354. + hrtimer_rt_reprogram(restart, timer, base);
  17355. + }
  17356. + }
  17357. +
  17358. + raw_spin_unlock_irq(&cpu_base->lock);
  17359. +
  17360. + wake_up_timer_waiters(cpu_base);
  17361. +}
  17362. +
  17363. +static int hrtimer_rt_defer(struct hrtimer *timer)
  17364. +{
  17365. + if (timer->irqsafe)
  17366. + return 0;
  17367. +
  17368. + __remove_hrtimer(timer, timer->base, timer->state, 0);
  17369. + list_add_tail(&timer->cb_entry, &timer->base->expired);
  17370. + return 1;
  17371. +}
  17372. +
  17373. +#else
  17374. +
  17375. +static inline void hrtimer_rt_run_pending(void)
  17376. +{
  17377. + hrtimer_peek_ahead_timers();
  17378. +}
  17379. +
  17380. +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
  17381. +
  17382. +#endif
  17383. +
  17384. #ifdef CONFIG_HIGH_RES_TIMERS
  17385. /*
  17386. @@ -1244,7 +1478,7 @@
  17387. {
  17388. struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
  17389. ktime_t expires_next, now, entry_time, delta;
  17390. - int i, retries = 0;
  17391. + int i, retries = 0, raise = 0;
  17392. BUG_ON(!cpu_base->hres_active);
  17393. cpu_base->nr_events++;
  17394. @@ -1279,6 +1513,15 @@
  17395. timer = container_of(node, struct hrtimer, node);
  17396. + trace_hrtimer_interrupt(raw_smp_processor_id(),
  17397. + ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ?
  17398. + timer->praecox : hrtimer_get_expires(timer),
  17399. + basenow)),
  17400. + current,
  17401. + timer->function == hrtimer_wakeup ?
  17402. + container_of(timer, struct hrtimer_sleeper,
  17403. + timer)->task : NULL);
  17404. +
  17405. /*
  17406. * The immediate goal for using the softexpires is
  17407. * minimizing wakeups, not running timers at the
  17408. @@ -1304,7 +1547,10 @@
  17409. break;
  17410. }
  17411. - __run_hrtimer(timer, &basenow);
  17412. + if (!hrtimer_rt_defer(timer))
  17413. + __run_hrtimer(timer, &basenow);
  17414. + else
  17415. + raise = 1;
  17416. }
  17417. }
  17418. @@ -1319,7 +1565,7 @@
  17419. if (expires_next.tv64 == KTIME_MAX ||
  17420. !tick_program_event(expires_next, 0)) {
  17421. cpu_base->hang_detected = 0;
  17422. - return;
  17423. + goto out;
  17424. }
  17425. /*
  17426. @@ -1363,6 +1609,9 @@
  17427. tick_program_event(expires_next, 1);
  17428. printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
  17429. ktime_to_ns(delta));
  17430. +out:
  17431. + if (raise)
  17432. + raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  17433. }
  17434. /*
  17435. @@ -1398,18 +1647,18 @@
  17436. __hrtimer_peek_ahead_timers();
  17437. local_irq_restore(flags);
  17438. }
  17439. -
  17440. -static void run_hrtimer_softirq(struct softirq_action *h)
  17441. -{
  17442. - hrtimer_peek_ahead_timers();
  17443. -}
  17444. -
  17445. #else /* CONFIG_HIGH_RES_TIMERS */
  17446. static inline void __hrtimer_peek_ahead_timers(void) { }
  17447. #endif /* !CONFIG_HIGH_RES_TIMERS */
  17448. +
  17449. +static void run_hrtimer_softirq(struct softirq_action *h)
  17450. +{
  17451. + hrtimer_rt_run_pending();
  17452. +}
  17453. +
  17454. /*
  17455. * Called from timer softirq every jiffy, expire hrtimers:
  17456. *
  17457. @@ -1442,7 +1691,7 @@
  17458. struct timerqueue_node *node;
  17459. struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
  17460. struct hrtimer_clock_base *base;
  17461. - int index, gettime = 1;
  17462. + int index, gettime = 1, raise = 0;
  17463. if (hrtimer_hres_active())
  17464. return;
  17465. @@ -1467,10 +1716,16 @@
  17466. hrtimer_get_expires_tv64(timer))
  17467. break;
  17468. - __run_hrtimer(timer, &base->softirq_time);
  17469. + if (!hrtimer_rt_defer(timer))
  17470. + __run_hrtimer(timer, &base->softirq_time);
  17471. + else
  17472. + raise = 1;
  17473. }
  17474. raw_spin_unlock(&cpu_base->lock);
  17475. }
  17476. +
  17477. + if (raise)
  17478. + raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  17479. }
  17480. /*
  17481. @@ -1492,16 +1747,18 @@
  17482. void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
  17483. {
  17484. sl->timer.function = hrtimer_wakeup;
  17485. + sl->timer.irqsafe = 1;
  17486. sl->task = task;
  17487. }
  17488. EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
  17489. -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
  17490. +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
  17491. + unsigned long state)
  17492. {
  17493. hrtimer_init_sleeper(t, current);
  17494. do {
  17495. - set_current_state(TASK_INTERRUPTIBLE);
  17496. + set_current_state(state);
  17497. hrtimer_start_expires(&t->timer, mode);
  17498. if (!hrtimer_active(&t->timer))
  17499. t->task = NULL;
  17500. @@ -1545,7 +1802,8 @@
  17501. HRTIMER_MODE_ABS);
  17502. hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
  17503. - if (do_nanosleep(&t, HRTIMER_MODE_ABS))
  17504. + /* cpu_chill() does not care about restart state. */
  17505. + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE))
  17506. goto out;
  17507. rmtp = restart->nanosleep.rmtp;
  17508. @@ -1562,8 +1820,10 @@
  17509. return ret;
  17510. }
  17511. -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  17512. - const enum hrtimer_mode mode, const clockid_t clockid)
  17513. +static long
  17514. +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  17515. + const enum hrtimer_mode mode, const clockid_t clockid,
  17516. + unsigned long state)
  17517. {
  17518. struct restart_block *restart;
  17519. struct hrtimer_sleeper t;
  17520. @@ -1576,7 +1836,7 @@
  17521. hrtimer_init_on_stack(&t.timer, clockid, mode);
  17522. hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
  17523. - if (do_nanosleep(&t, mode))
  17524. + if (do_nanosleep(&t, mode, state))
  17525. goto out;
  17526. /* Absolute timers do not update the rmtp value and restart: */
  17527. @@ -1603,6 +1863,12 @@
  17528. return ret;
  17529. }
  17530. +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  17531. + const enum hrtimer_mode mode, const clockid_t clockid)
  17532. +{
  17533. + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE);
  17534. +}
  17535. +
  17536. SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
  17537. struct timespec __user *, rmtp)
  17538. {
  17539. @@ -1617,6 +1883,26 @@
  17540. return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
  17541. }
  17542. +#ifdef CONFIG_PREEMPT_RT_FULL
  17543. +/*
  17544. + * Sleep for 1 ms in hope whoever holds what we want will let it go.
  17545. + */
  17546. +void cpu_chill(void)
  17547. +{
  17548. + struct timespec tu = {
  17549. + .tv_nsec = NSEC_PER_MSEC,
  17550. + };
  17551. + unsigned int freeze_flag = current->flags & PF_NOFREEZE;
  17552. +
  17553. + current->flags |= PF_NOFREEZE;
  17554. + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
  17555. + TASK_UNINTERRUPTIBLE);
  17556. + if (!freeze_flag)
  17557. + current->flags &= ~PF_NOFREEZE;
  17558. +}
  17559. +EXPORT_SYMBOL(cpu_chill);
  17560. +#endif
  17561. +
  17562. /*
  17563. * Functions related to boot-time initialization:
  17564. */
  17565. @@ -1628,10 +1914,14 @@
  17566. for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
  17567. cpu_base->clock_base[i].cpu_base = cpu_base;
  17568. timerqueue_init_head(&cpu_base->clock_base[i].active);
  17569. + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
  17570. }
  17571. cpu_base->cpu = cpu;
  17572. hrtimer_init_hres(cpu_base);
  17573. +#ifdef CONFIG_PREEMPT_RT_BASE
  17574. + init_waitqueue_head(&cpu_base->wait);
  17575. +#endif
  17576. }
  17577. #ifdef CONFIG_HOTPLUG_CPU
  17578. @@ -1744,9 +2034,7 @@
  17579. hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
  17580. (void *)(long)smp_processor_id());
  17581. register_cpu_notifier(&hrtimers_nb);
  17582. -#ifdef CONFIG_HIGH_RES_TIMERS
  17583. open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
  17584. -#endif
  17585. }
  17586. /**
  17587. diff -Nur linux-3.18.10.orig/kernel/time/itimer.c linux-3.18.10/kernel/time/itimer.c
  17588. --- linux-3.18.10.orig/kernel/time/itimer.c 2015-03-24 02:05:12.000000000 +0100
  17589. +++ linux-3.18.10/kernel/time/itimer.c 2015-03-26 12:42:18.683588345 +0100
  17590. @@ -213,6 +213,7 @@
  17591. /* We are sharing ->siglock with it_real_fn() */
  17592. if (hrtimer_try_to_cancel(timer) < 0) {
  17593. spin_unlock_irq(&tsk->sighand->siglock);
  17594. + hrtimer_wait_for_timer(&tsk->signal->real_timer);
  17595. goto again;
  17596. }
  17597. expires = timeval_to_ktime(value->it_value);
  17598. diff -Nur linux-3.18.10.orig/kernel/time/jiffies.c linux-3.18.10/kernel/time/jiffies.c
  17599. --- linux-3.18.10.orig/kernel/time/jiffies.c 2015-03-24 02:05:12.000000000 +0100
  17600. +++ linux-3.18.10/kernel/time/jiffies.c 2015-03-26 12:42:18.683588345 +0100
  17601. @@ -73,7 +73,8 @@
  17602. .shift = JIFFIES_SHIFT,
  17603. };
  17604. -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
  17605. +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
  17606. +__cacheline_aligned_in_smp seqcount_t jiffies_seq;
  17607. #if (BITS_PER_LONG < 64)
  17608. u64 get_jiffies_64(void)
  17609. @@ -82,9 +83,9 @@
  17610. u64 ret;
  17611. do {
  17612. - seq = read_seqbegin(&jiffies_lock);
  17613. + seq = read_seqcount_begin(&jiffies_seq);
  17614. ret = jiffies_64;
  17615. - } while (read_seqretry(&jiffies_lock, seq));
  17616. + } while (read_seqcount_retry(&jiffies_seq, seq));
  17617. return ret;
  17618. }
  17619. EXPORT_SYMBOL(get_jiffies_64);
  17620. diff -Nur linux-3.18.10.orig/kernel/time/ntp.c linux-3.18.10/kernel/time/ntp.c
  17621. --- linux-3.18.10.orig/kernel/time/ntp.c 2015-03-24 02:05:12.000000000 +0100
  17622. +++ linux-3.18.10/kernel/time/ntp.c 2015-03-26 12:42:18.683588345 +0100
  17623. @@ -10,6 +10,7 @@
  17624. #include <linux/workqueue.h>
  17625. #include <linux/hrtimer.h>
  17626. #include <linux/jiffies.h>
  17627. +#include <linux/kthread.h>
  17628. #include <linux/math64.h>
  17629. #include <linux/timex.h>
  17630. #include <linux/time.h>
  17631. @@ -519,10 +520,52 @@
  17632. &sync_cmos_work, timespec_to_jiffies(&next));
  17633. }
  17634. +#ifdef CONFIG_PREEMPT_RT_FULL
  17635. +/*
  17636. + * RT can not call schedule_delayed_work from real interrupt context.
  17637. + * Need to make a thread to do the real work.
  17638. + */
  17639. +static struct task_struct *cmos_delay_thread;
  17640. +static bool do_cmos_delay;
  17641. +
  17642. +static int run_cmos_delay(void *ignore)
  17643. +{
  17644. + while (!kthread_should_stop()) {
  17645. + set_current_state(TASK_INTERRUPTIBLE);
  17646. + if (do_cmos_delay) {
  17647. + do_cmos_delay = false;
  17648. + queue_delayed_work(system_power_efficient_wq,
  17649. + &sync_cmos_work, 0);
  17650. + }
  17651. + schedule();
  17652. + }
  17653. + __set_current_state(TASK_RUNNING);
  17654. + return 0;
  17655. +}
  17656. +
  17657. +void ntp_notify_cmos_timer(void)
  17658. +{
  17659. + do_cmos_delay = true;
  17660. + /* Make visible before waking up process */
  17661. + smp_wmb();
  17662. + wake_up_process(cmos_delay_thread);
  17663. +}
  17664. +
  17665. +static __init int create_cmos_delay_thread(void)
  17666. +{
  17667. + cmos_delay_thread = kthread_run(run_cmos_delay, NULL, "kcmosdelayd");
  17668. + BUG_ON(!cmos_delay_thread);
  17669. + return 0;
  17670. +}
  17671. +early_initcall(create_cmos_delay_thread);
  17672. +
  17673. +#else
  17674. +
  17675. void ntp_notify_cmos_timer(void)
  17676. {
  17677. queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
  17678. }
  17679. +#endif /* CONFIG_PREEMPT_RT_FULL */
  17680. #else
  17681. void ntp_notify_cmos_timer(void) { }
  17682. diff -Nur linux-3.18.10.orig/kernel/time/posix-cpu-timers.c linux-3.18.10/kernel/time/posix-cpu-timers.c
  17683. --- linux-3.18.10.orig/kernel/time/posix-cpu-timers.c 2015-03-24 02:05:12.000000000 +0100
  17684. +++ linux-3.18.10/kernel/time/posix-cpu-timers.c 2015-03-26 12:42:18.683588345 +0100
  17685. @@ -3,6 +3,7 @@
  17686. */
  17687. #include <linux/sched.h>
  17688. +#include <linux/sched/rt.h>
  17689. #include <linux/posix-timers.h>
  17690. #include <linux/errno.h>
  17691. #include <linux/math64.h>
  17692. @@ -626,7 +627,7 @@
  17693. /*
  17694. * Disarm any old timer after extracting its expiry time.
  17695. */
  17696. - WARN_ON_ONCE(!irqs_disabled());
  17697. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  17698. ret = 0;
  17699. old_incr = timer->it.cpu.incr;
  17700. @@ -1047,7 +1048,7 @@
  17701. /*
  17702. * Now re-arm for the new expiry time.
  17703. */
  17704. - WARN_ON_ONCE(!irqs_disabled());
  17705. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  17706. arm_timer(timer);
  17707. unlock_task_sighand(p, &flags);
  17708. @@ -1113,10 +1114,11 @@
  17709. sig = tsk->signal;
  17710. if (sig->cputimer.running) {
  17711. struct task_cputime group_sample;
  17712. + unsigned long flags;
  17713. - raw_spin_lock(&sig->cputimer.lock);
  17714. + raw_spin_lock_irqsave(&sig->cputimer.lock, flags);
  17715. group_sample = sig->cputimer.cputime;
  17716. - raw_spin_unlock(&sig->cputimer.lock);
  17717. + raw_spin_unlock_irqrestore(&sig->cputimer.lock, flags);
  17718. if (task_cputime_expired(&group_sample, &sig->cputime_expires))
  17719. return 1;
  17720. @@ -1130,13 +1132,13 @@
  17721. * already updated our counts. We need to check if any timers fire now.
  17722. * Interrupts are disabled.
  17723. */
  17724. -void run_posix_cpu_timers(struct task_struct *tsk)
  17725. +static void __run_posix_cpu_timers(struct task_struct *tsk)
  17726. {
  17727. LIST_HEAD(firing);
  17728. struct k_itimer *timer, *next;
  17729. unsigned long flags;
  17730. - WARN_ON_ONCE(!irqs_disabled());
  17731. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  17732. /*
  17733. * The fast path checks that there are no expired thread or thread
  17734. @@ -1194,6 +1196,190 @@
  17735. }
  17736. }
  17737. +#ifdef CONFIG_PREEMPT_RT_BASE
  17738. +#include <linux/kthread.h>
  17739. +#include <linux/cpu.h>
  17740. +DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
  17741. +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
  17742. +
  17743. +static int posix_cpu_timers_thread(void *data)
  17744. +{
  17745. + int cpu = (long)data;
  17746. +
  17747. + BUG_ON(per_cpu(posix_timer_task,cpu) != current);
  17748. +
  17749. + while (!kthread_should_stop()) {
  17750. + struct task_struct *tsk = NULL;
  17751. + struct task_struct *next = NULL;
  17752. +
  17753. + if (cpu_is_offline(cpu))
  17754. + goto wait_to_die;
  17755. +
  17756. + /* grab task list */
  17757. + raw_local_irq_disable();
  17758. + tsk = per_cpu(posix_timer_tasklist, cpu);
  17759. + per_cpu(posix_timer_tasklist, cpu) = NULL;
  17760. + raw_local_irq_enable();
  17761. +
  17762. + /* its possible the list is empty, just return */
  17763. + if (!tsk) {
  17764. + set_current_state(TASK_INTERRUPTIBLE);
  17765. + schedule();
  17766. + __set_current_state(TASK_RUNNING);
  17767. + continue;
  17768. + }
  17769. +
  17770. + /* Process task list */
  17771. + while (1) {
  17772. + /* save next */
  17773. + next = tsk->posix_timer_list;
  17774. +
  17775. + /* run the task timers, clear its ptr and
  17776. + * unreference it
  17777. + */
  17778. + __run_posix_cpu_timers(tsk);
  17779. + tsk->posix_timer_list = NULL;
  17780. + put_task_struct(tsk);
  17781. +
  17782. + /* check if this is the last on the list */
  17783. + if (next == tsk)
  17784. + break;
  17785. + tsk = next;
  17786. + }
  17787. + }
  17788. + return 0;
  17789. +
  17790. +wait_to_die:
  17791. + /* Wait for kthread_stop */
  17792. + set_current_state(TASK_INTERRUPTIBLE);
  17793. + while (!kthread_should_stop()) {
  17794. + schedule();
  17795. + set_current_state(TASK_INTERRUPTIBLE);
  17796. + }
  17797. + __set_current_state(TASK_RUNNING);
  17798. + return 0;
  17799. +}
  17800. +
  17801. +static inline int __fastpath_timer_check(struct task_struct *tsk)
  17802. +{
  17803. + /* tsk == current, ensure it is safe to use ->signal/sighand */
  17804. + if (unlikely(tsk->exit_state))
  17805. + return 0;
  17806. +
  17807. + if (!task_cputime_zero(&tsk->cputime_expires))
  17808. + return 1;
  17809. +
  17810. + if (!task_cputime_zero(&tsk->signal->cputime_expires))
  17811. + return 1;
  17812. +
  17813. + return 0;
  17814. +}
  17815. +
  17816. +void run_posix_cpu_timers(struct task_struct *tsk)
  17817. +{
  17818. + unsigned long cpu = smp_processor_id();
  17819. + struct task_struct *tasklist;
  17820. +
  17821. + BUG_ON(!irqs_disabled());
  17822. + if(!per_cpu(posix_timer_task, cpu))
  17823. + return;
  17824. + /* get per-cpu references */
  17825. + tasklist = per_cpu(posix_timer_tasklist, cpu);
  17826. +
  17827. + /* check to see if we're already queued */
  17828. + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
  17829. + get_task_struct(tsk);
  17830. + if (tasklist) {
  17831. + tsk->posix_timer_list = tasklist;
  17832. + } else {
  17833. + /*
  17834. + * The list is terminated by a self-pointing
  17835. + * task_struct
  17836. + */
  17837. + tsk->posix_timer_list = tsk;
  17838. + }
  17839. + per_cpu(posix_timer_tasklist, cpu) = tsk;
  17840. +
  17841. + wake_up_process(per_cpu(posix_timer_task, cpu));
  17842. + }
  17843. +}
  17844. +
  17845. +/*
  17846. + * posix_cpu_thread_call - callback that gets triggered when a CPU is added.
  17847. + * Here we can start up the necessary migration thread for the new CPU.
  17848. + */
  17849. +static int posix_cpu_thread_call(struct notifier_block *nfb,
  17850. + unsigned long action, void *hcpu)
  17851. +{
  17852. + int cpu = (long)hcpu;
  17853. + struct task_struct *p;
  17854. + struct sched_param param;
  17855. +
  17856. + switch (action) {
  17857. + case CPU_UP_PREPARE:
  17858. + p = kthread_create(posix_cpu_timers_thread, hcpu,
  17859. + "posixcputmr/%d",cpu);
  17860. + if (IS_ERR(p))
  17861. + return NOTIFY_BAD;
  17862. + p->flags |= PF_NOFREEZE;
  17863. + kthread_bind(p, cpu);
  17864. + /* Must be high prio to avoid getting starved */
  17865. + param.sched_priority = MAX_RT_PRIO-1;
  17866. + sched_setscheduler(p, SCHED_FIFO, &param);
  17867. + per_cpu(posix_timer_task,cpu) = p;
  17868. + break;
  17869. + case CPU_ONLINE:
  17870. + /* Strictly unneccessary, as first user will wake it. */
  17871. + wake_up_process(per_cpu(posix_timer_task,cpu));
  17872. + break;
  17873. +#ifdef CONFIG_HOTPLUG_CPU
  17874. + case CPU_UP_CANCELED:
  17875. + /* Unbind it from offline cpu so it can run. Fall thru. */
  17876. + kthread_bind(per_cpu(posix_timer_task, cpu),
  17877. + cpumask_any(cpu_online_mask));
  17878. + kthread_stop(per_cpu(posix_timer_task,cpu));
  17879. + per_cpu(posix_timer_task,cpu) = NULL;
  17880. + break;
  17881. + case CPU_DEAD:
  17882. + kthread_stop(per_cpu(posix_timer_task,cpu));
  17883. + per_cpu(posix_timer_task,cpu) = NULL;
  17884. + break;
  17885. +#endif
  17886. + }
  17887. + return NOTIFY_OK;
  17888. +}
  17889. +
  17890. +/* Register at highest priority so that task migration (migrate_all_tasks)
  17891. + * happens before everything else.
  17892. + */
  17893. +static struct notifier_block posix_cpu_thread_notifier = {
  17894. + .notifier_call = posix_cpu_thread_call,
  17895. + .priority = 10
  17896. +};
  17897. +
  17898. +static int __init posix_cpu_thread_init(void)
  17899. +{
  17900. + void *hcpu = (void *)(long)smp_processor_id();
  17901. + /* Start one for boot CPU. */
  17902. + unsigned long cpu;
  17903. +
  17904. + /* init the per-cpu posix_timer_tasklets */
  17905. + for_each_possible_cpu(cpu)
  17906. + per_cpu(posix_timer_tasklist, cpu) = NULL;
  17907. +
  17908. + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu);
  17909. + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu);
  17910. + register_cpu_notifier(&posix_cpu_thread_notifier);
  17911. + return 0;
  17912. +}
  17913. +early_initcall(posix_cpu_thread_init);
  17914. +#else /* CONFIG_PREEMPT_RT_BASE */
  17915. +void run_posix_cpu_timers(struct task_struct *tsk)
  17916. +{
  17917. + __run_posix_cpu_timers(tsk);
  17918. +}
  17919. +#endif /* CONFIG_PREEMPT_RT_BASE */
  17920. +
  17921. /*
  17922. * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
  17923. * The tsk->sighand->siglock must be held by the caller.
  17924. diff -Nur linux-3.18.10.orig/kernel/time/posix-timers.c linux-3.18.10/kernel/time/posix-timers.c
  17925. --- linux-3.18.10.orig/kernel/time/posix-timers.c 2015-03-24 02:05:12.000000000 +0100
  17926. +++ linux-3.18.10/kernel/time/posix-timers.c 2015-03-26 12:42:18.683588345 +0100
  17927. @@ -499,6 +499,7 @@
  17928. static struct pid *good_sigevent(sigevent_t * event)
  17929. {
  17930. struct task_struct *rtn = current->group_leader;
  17931. + int sig = event->sigev_signo;
  17932. if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
  17933. (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
  17934. @@ -507,7 +508,8 @@
  17935. return NULL;
  17936. if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
  17937. - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
  17938. + (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) ||
  17939. + sig_kernel_coredump(sig)))
  17940. return NULL;
  17941. return task_pid(rtn);
  17942. @@ -819,6 +821,20 @@
  17943. return overrun;
  17944. }
  17945. +/*
  17946. + * Protected by RCU!
  17947. + */
  17948. +static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
  17949. +{
  17950. +#ifdef CONFIG_PREEMPT_RT_FULL
  17951. + if (kc->timer_set == common_timer_set)
  17952. + hrtimer_wait_for_timer(&timr->it.real.timer);
  17953. + else
  17954. + /* FIXME: Whacky hack for posix-cpu-timers */
  17955. + schedule_timeout(1);
  17956. +#endif
  17957. +}
  17958. +
  17959. /* Set a POSIX.1b interval timer. */
  17960. /* timr->it_lock is taken. */
  17961. static int
  17962. @@ -896,6 +912,7 @@
  17963. if (!timr)
  17964. return -EINVAL;
  17965. + rcu_read_lock();
  17966. kc = clockid_to_kclock(timr->it_clock);
  17967. if (WARN_ON_ONCE(!kc || !kc->timer_set))
  17968. error = -EINVAL;
  17969. @@ -904,9 +921,12 @@
  17970. unlock_timer(timr, flag);
  17971. if (error == TIMER_RETRY) {
  17972. + timer_wait_for_callback(kc, timr);
  17973. rtn = NULL; // We already got the old time...
  17974. + rcu_read_unlock();
  17975. goto retry;
  17976. }
  17977. + rcu_read_unlock();
  17978. if (old_setting && !error &&
  17979. copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
  17980. @@ -944,10 +964,15 @@
  17981. if (!timer)
  17982. return -EINVAL;
  17983. + rcu_read_lock();
  17984. if (timer_delete_hook(timer) == TIMER_RETRY) {
  17985. unlock_timer(timer, flags);
  17986. + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
  17987. + timer);
  17988. + rcu_read_unlock();
  17989. goto retry_delete;
  17990. }
  17991. + rcu_read_unlock();
  17992. spin_lock(&current->sighand->siglock);
  17993. list_del(&timer->list);
  17994. @@ -973,8 +998,18 @@
  17995. retry_delete:
  17996. spin_lock_irqsave(&timer->it_lock, flags);
  17997. + /* On RT we can race with a deletion */
  17998. + if (!timer->it_signal) {
  17999. + unlock_timer(timer, flags);
  18000. + return;
  18001. + }
  18002. +
  18003. if (timer_delete_hook(timer) == TIMER_RETRY) {
  18004. + rcu_read_lock();
  18005. unlock_timer(timer, flags);
  18006. + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
  18007. + timer);
  18008. + rcu_read_unlock();
  18009. goto retry_delete;
  18010. }
  18011. list_del(&timer->list);
  18012. diff -Nur linux-3.18.10.orig/kernel/time/tick-common.c linux-3.18.10/kernel/time/tick-common.c
  18013. --- linux-3.18.10.orig/kernel/time/tick-common.c 2015-03-24 02:05:12.000000000 +0100
  18014. +++ linux-3.18.10/kernel/time/tick-common.c 2015-03-26 12:42:18.683588345 +0100
  18015. @@ -78,13 +78,15 @@
  18016. static void tick_periodic(int cpu)
  18017. {
  18018. if (tick_do_timer_cpu == cpu) {
  18019. - write_seqlock(&jiffies_lock);
  18020. + raw_spin_lock(&jiffies_lock);
  18021. + write_seqcount_begin(&jiffies_seq);
  18022. /* Keep track of the next tick event */
  18023. tick_next_period = ktime_add(tick_next_period, tick_period);
  18024. do_timer(1);
  18025. - write_sequnlock(&jiffies_lock);
  18026. + write_seqcount_end(&jiffies_seq);
  18027. + raw_spin_unlock(&jiffies_lock);
  18028. update_wall_time();
  18029. }
  18030. @@ -146,9 +148,9 @@
  18031. ktime_t next;
  18032. do {
  18033. - seq = read_seqbegin(&jiffies_lock);
  18034. + seq = read_seqcount_begin(&jiffies_seq);
  18035. next = tick_next_period;
  18036. - } while (read_seqretry(&jiffies_lock, seq));
  18037. + } while (read_seqcount_retry(&jiffies_seq, seq));
  18038. clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
  18039. diff -Nur linux-3.18.10.orig/kernel/time/tick-internal.h linux-3.18.10/kernel/time/tick-internal.h
  18040. --- linux-3.18.10.orig/kernel/time/tick-internal.h 2015-03-24 02:05:12.000000000 +0100
  18041. +++ linux-3.18.10/kernel/time/tick-internal.h 2015-03-26 12:42:18.683588345 +0100
  18042. @@ -6,7 +6,8 @@
  18043. #include "timekeeping.h"
  18044. -extern seqlock_t jiffies_lock;
  18045. +extern raw_spinlock_t jiffies_lock;
  18046. +extern seqcount_t jiffies_seq;
  18047. #define CS_NAME_LEN 32
  18048. diff -Nur linux-3.18.10.orig/kernel/time/tick-sched.c linux-3.18.10/kernel/time/tick-sched.c
  18049. --- linux-3.18.10.orig/kernel/time/tick-sched.c 2015-03-24 02:05:12.000000000 +0100
  18050. +++ linux-3.18.10/kernel/time/tick-sched.c 2015-03-26 12:42:18.683588345 +0100
  18051. @@ -62,7 +62,8 @@
  18052. return;
  18053. /* Reevalute with jiffies_lock held */
  18054. - write_seqlock(&jiffies_lock);
  18055. + raw_spin_lock(&jiffies_lock);
  18056. + write_seqcount_begin(&jiffies_seq);
  18057. delta = ktime_sub(now, last_jiffies_update);
  18058. if (delta.tv64 >= tick_period.tv64) {
  18059. @@ -85,10 +86,12 @@
  18060. /* Keep the tick_next_period variable up to date */
  18061. tick_next_period = ktime_add(last_jiffies_update, tick_period);
  18062. } else {
  18063. - write_sequnlock(&jiffies_lock);
  18064. + write_seqcount_end(&jiffies_seq);
  18065. + raw_spin_unlock(&jiffies_lock);
  18066. return;
  18067. }
  18068. - write_sequnlock(&jiffies_lock);
  18069. + write_seqcount_end(&jiffies_seq);
  18070. + raw_spin_unlock(&jiffies_lock);
  18071. update_wall_time();
  18072. }
  18073. @@ -99,12 +102,14 @@
  18074. {
  18075. ktime_t period;
  18076. - write_seqlock(&jiffies_lock);
  18077. + raw_spin_lock(&jiffies_lock);
  18078. + write_seqcount_begin(&jiffies_seq);
  18079. /* Did we start the jiffies update yet ? */
  18080. if (last_jiffies_update.tv64 == 0)
  18081. last_jiffies_update = tick_next_period;
  18082. period = last_jiffies_update;
  18083. - write_sequnlock(&jiffies_lock);
  18084. + write_seqcount_end(&jiffies_seq);
  18085. + raw_spin_unlock(&jiffies_lock);
  18086. return period;
  18087. }
  18088. @@ -222,6 +227,7 @@
  18089. static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
  18090. .func = nohz_full_kick_work_func,
  18091. + .flags = IRQ_WORK_HARD_IRQ,
  18092. };
  18093. /*
  18094. @@ -580,10 +586,10 @@
  18095. /* Read jiffies and the time when jiffies were updated last */
  18096. do {
  18097. - seq = read_seqbegin(&jiffies_lock);
  18098. + seq = read_seqcount_begin(&jiffies_seq);
  18099. last_update = last_jiffies_update;
  18100. last_jiffies = jiffies;
  18101. - } while (read_seqretry(&jiffies_lock, seq));
  18102. + } while (read_seqcount_retry(&jiffies_seq, seq));
  18103. if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) ||
  18104. arch_needs_cpu() || irq_work_needs_cpu()) {
  18105. @@ -761,14 +767,7 @@
  18106. return false;
  18107. if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
  18108. - static int ratelimit;
  18109. -
  18110. - if (ratelimit < 10 &&
  18111. - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  18112. - pr_warn("NOHZ: local_softirq_pending %02x\n",
  18113. - (unsigned int) local_softirq_pending());
  18114. - ratelimit++;
  18115. - }
  18116. + softirq_check_pending_idle();
  18117. return false;
  18118. }
  18119. @@ -1156,6 +1155,7 @@
  18120. * Emulate tick processing via per-CPU hrtimers:
  18121. */
  18122. hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  18123. + ts->sched_timer.irqsafe = 1;
  18124. ts->sched_timer.function = tick_sched_timer;
  18125. /* Get the next period (per cpu) */
  18126. diff -Nur linux-3.18.10.orig/kernel/time/timekeeping.c linux-3.18.10/kernel/time/timekeeping.c
  18127. --- linux-3.18.10.orig/kernel/time/timekeeping.c 2015-03-24 02:05:12.000000000 +0100
  18128. +++ linux-3.18.10/kernel/time/timekeeping.c 2015-03-26 12:42:18.683588345 +0100
  18129. @@ -1814,8 +1814,10 @@
  18130. */
  18131. void xtime_update(unsigned long ticks)
  18132. {
  18133. - write_seqlock(&jiffies_lock);
  18134. + raw_spin_lock(&jiffies_lock);
  18135. + write_seqcount_begin(&jiffies_seq);
  18136. do_timer(ticks);
  18137. - write_sequnlock(&jiffies_lock);
  18138. + write_seqcount_end(&jiffies_seq);
  18139. + raw_spin_unlock(&jiffies_lock);
  18140. update_wall_time();
  18141. }
  18142. diff -Nur linux-3.18.10.orig/kernel/time/timer.c linux-3.18.10/kernel/time/timer.c
  18143. --- linux-3.18.10.orig/kernel/time/timer.c 2015-03-24 02:05:12.000000000 +0100
  18144. +++ linux-3.18.10/kernel/time/timer.c 2015-03-26 12:42:18.683588345 +0100
  18145. @@ -78,6 +78,9 @@
  18146. struct tvec_base {
  18147. spinlock_t lock;
  18148. struct timer_list *running_timer;
  18149. +#ifdef CONFIG_PREEMPT_RT_FULL
  18150. + wait_queue_head_t wait_for_running_timer;
  18151. +#endif
  18152. unsigned long timer_jiffies;
  18153. unsigned long next_timer;
  18154. unsigned long active_timers;
  18155. @@ -758,6 +761,36 @@
  18156. }
  18157. }
  18158. +#ifndef CONFIG_PREEMPT_RT_FULL
  18159. +static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
  18160. + struct tvec_base *old,
  18161. + struct tvec_base *new)
  18162. +{
  18163. + /* See the comment in lock_timer_base() */
  18164. + timer_set_base(timer, NULL);
  18165. + spin_unlock(&old->lock);
  18166. + spin_lock(&new->lock);
  18167. + timer_set_base(timer, new);
  18168. + return new;
  18169. +}
  18170. +#else
  18171. +static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
  18172. + struct tvec_base *old,
  18173. + struct tvec_base *new)
  18174. +{
  18175. + /*
  18176. + * We cannot do the above because we might be preempted and
  18177. + * then the preempter would see NULL and loop forever.
  18178. + */
  18179. + if (spin_trylock(&new->lock)) {
  18180. + timer_set_base(timer, new);
  18181. + spin_unlock(&old->lock);
  18182. + return new;
  18183. + }
  18184. + return old;
  18185. +}
  18186. +#endif
  18187. +
  18188. static inline int
  18189. __mod_timer(struct timer_list *timer, unsigned long expires,
  18190. bool pending_only, int pinned)
  18191. @@ -788,14 +821,8 @@
  18192. * handler yet has not finished. This also guarantees that
  18193. * the timer is serialized wrt itself.
  18194. */
  18195. - if (likely(base->running_timer != timer)) {
  18196. - /* See the comment in lock_timer_base() */
  18197. - timer_set_base(timer, NULL);
  18198. - spin_unlock(&base->lock);
  18199. - base = new_base;
  18200. - spin_lock(&base->lock);
  18201. - timer_set_base(timer, base);
  18202. - }
  18203. + if (likely(base->running_timer != timer))
  18204. + base = switch_timer_base(timer, base, new_base);
  18205. }
  18206. timer->expires = expires;
  18207. @@ -969,6 +996,29 @@
  18208. }
  18209. EXPORT_SYMBOL_GPL(add_timer_on);
  18210. +#ifdef CONFIG_PREEMPT_RT_FULL
  18211. +/*
  18212. + * Wait for a running timer
  18213. + */
  18214. +static void wait_for_running_timer(struct timer_list *timer)
  18215. +{
  18216. + struct tvec_base *base = timer->base;
  18217. +
  18218. + if (base->running_timer == timer)
  18219. + wait_event(base->wait_for_running_timer,
  18220. + base->running_timer != timer);
  18221. +}
  18222. +
  18223. +# define wakeup_timer_waiters(b) wake_up(&(b)->wait_for_running_timer)
  18224. +#else
  18225. +static inline void wait_for_running_timer(struct timer_list *timer)
  18226. +{
  18227. + cpu_relax();
  18228. +}
  18229. +
  18230. +# define wakeup_timer_waiters(b) do { } while (0)
  18231. +#endif
  18232. +
  18233. /**
  18234. * del_timer - deactive a timer.
  18235. * @timer: the timer to be deactivated
  18236. @@ -1026,7 +1076,7 @@
  18237. }
  18238. EXPORT_SYMBOL(try_to_del_timer_sync);
  18239. -#ifdef CONFIG_SMP
  18240. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  18241. /**
  18242. * del_timer_sync - deactivate a timer and wait for the handler to finish.
  18243. * @timer: the timer to be deactivated
  18244. @@ -1086,7 +1136,7 @@
  18245. int ret = try_to_del_timer_sync(timer);
  18246. if (ret >= 0)
  18247. return ret;
  18248. - cpu_relax();
  18249. + wait_for_running_timer(timer);
  18250. }
  18251. }
  18252. EXPORT_SYMBOL(del_timer_sync);
  18253. @@ -1207,15 +1257,17 @@
  18254. if (irqsafe) {
  18255. spin_unlock(&base->lock);
  18256. call_timer_fn(timer, fn, data);
  18257. + base->running_timer = NULL;
  18258. spin_lock(&base->lock);
  18259. } else {
  18260. spin_unlock_irq(&base->lock);
  18261. call_timer_fn(timer, fn, data);
  18262. + base->running_timer = NULL;
  18263. spin_lock_irq(&base->lock);
  18264. }
  18265. }
  18266. }
  18267. - base->running_timer = NULL;
  18268. + wakeup_timer_waiters(base);
  18269. spin_unlock_irq(&base->lock);
  18270. }
  18271. @@ -1355,17 +1407,31 @@
  18272. if (cpu_is_offline(smp_processor_id()))
  18273. return expires;
  18274. +#ifdef CONFIG_PREEMPT_RT_FULL
  18275. + /*
  18276. + * On PREEMPT_RT we cannot sleep here. If the trylock does not
  18277. + * succeed then we return the worst-case 'expires in 1 tick'
  18278. + * value. We use the rt functions here directly to avoid a
  18279. + * migrate_disable() call.
  18280. + */
  18281. + if (!spin_do_trylock(&base->lock))
  18282. + return now + 1;
  18283. +#else
  18284. spin_lock(&base->lock);
  18285. +#endif
  18286. if (base->active_timers) {
  18287. if (time_before_eq(base->next_timer, base->timer_jiffies))
  18288. base->next_timer = __next_timer_interrupt(base);
  18289. expires = base->next_timer;
  18290. }
  18291. +#ifdef CONFIG_PREEMPT_RT_FULL
  18292. + rt_spin_unlock_after_trylock_in_irq(&base->lock);
  18293. +#else
  18294. spin_unlock(&base->lock);
  18295. +#endif
  18296. if (time_before_eq(expires, now))
  18297. return now;
  18298. -
  18299. return cmp_next_hrtimer_event(now, expires);
  18300. }
  18301. #endif
  18302. @@ -1381,13 +1447,13 @@
  18303. /* Note: this timer irq context must be accounted for as well. */
  18304. account_process_tick(p, user_tick);
  18305. + scheduler_tick();
  18306. run_local_timers();
  18307. rcu_check_callbacks(cpu, user_tick);
  18308. #ifdef CONFIG_IRQ_WORK
  18309. if (in_irq())
  18310. irq_work_tick();
  18311. #endif
  18312. - scheduler_tick();
  18313. run_posix_cpu_timers(p);
  18314. }
  18315. @@ -1400,6 +1466,10 @@
  18316. hrtimer_run_pending();
  18317. +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
  18318. + irq_work_tick();
  18319. +#endif
  18320. +
  18321. if (time_after_eq(jiffies, base->timer_jiffies))
  18322. __run_timers(base);
  18323. }
  18324. @@ -1574,6 +1644,9 @@
  18325. base = per_cpu(tvec_bases, cpu);
  18326. }
  18327. +#ifdef CONFIG_PREEMPT_RT_FULL
  18328. + init_waitqueue_head(&base->wait_for_running_timer);
  18329. +#endif
  18330. for (j = 0; j < TVN_SIZE; j++) {
  18331. INIT_LIST_HEAD(base->tv5.vec + j);
  18332. @@ -1613,7 +1686,7 @@
  18333. BUG_ON(cpu_online(cpu));
  18334. old_base = per_cpu(tvec_bases, cpu);
  18335. - new_base = get_cpu_var(tvec_bases);
  18336. + new_base = get_local_var(tvec_bases);
  18337. /*
  18338. * The caller is globally serialized and nobody else
  18339. * takes two locks at once, deadlock is not possible.
  18340. @@ -1634,7 +1707,7 @@
  18341. spin_unlock(&old_base->lock);
  18342. spin_unlock_irq(&new_base->lock);
  18343. - put_cpu_var(tvec_bases);
  18344. + put_local_var(tvec_bases);
  18345. }
  18346. #endif /* CONFIG_HOTPLUG_CPU */
  18347. diff -Nur linux-3.18.10.orig/kernel/trace/Kconfig linux-3.18.10/kernel/trace/Kconfig
  18348. --- linux-3.18.10.orig/kernel/trace/Kconfig 2015-03-24 02:05:12.000000000 +0100
  18349. +++ linux-3.18.10/kernel/trace/Kconfig 2015-03-26 12:42:18.683588345 +0100
  18350. @@ -187,6 +187,24 @@
  18351. enabled. This option and the preempt-off timing option can be
  18352. used together or separately.)
  18353. +config INTERRUPT_OFF_HIST
  18354. + bool "Interrupts-off Latency Histogram"
  18355. + depends on IRQSOFF_TRACER
  18356. + help
  18357. + This option generates continuously updated histograms (one per cpu)
  18358. + of the duration of time periods with interrupts disabled. The
  18359. + histograms are disabled by default. To enable them, write a non-zero
  18360. + number to
  18361. +
  18362. + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
  18363. +
  18364. + If PREEMPT_OFF_HIST is also selected, additional histograms (one
  18365. + per cpu) are generated that accumulate the duration of time periods
  18366. + when both interrupts and preemption are disabled. The histogram data
  18367. + will be located in the debug file system at
  18368. +
  18369. + /sys/kernel/debug/tracing/latency_hist/irqsoff
  18370. +
  18371. config PREEMPT_TRACER
  18372. bool "Preemption-off Latency Tracer"
  18373. default n
  18374. @@ -211,6 +229,24 @@
  18375. enabled. This option and the irqs-off timing option can be
  18376. used together or separately.)
  18377. +config PREEMPT_OFF_HIST
  18378. + bool "Preemption-off Latency Histogram"
  18379. + depends on PREEMPT_TRACER
  18380. + help
  18381. + This option generates continuously updated histograms (one per cpu)
  18382. + of the duration of time periods with preemption disabled. The
  18383. + histograms are disabled by default. To enable them, write a non-zero
  18384. + number to
  18385. +
  18386. + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
  18387. +
  18388. + If INTERRUPT_OFF_HIST is also selected, additional histograms (one
  18389. + per cpu) are generated that accumulate the duration of time periods
  18390. + when both interrupts and preemption are disabled. The histogram data
  18391. + will be located in the debug file system at
  18392. +
  18393. + /sys/kernel/debug/tracing/latency_hist/preemptoff
  18394. +
  18395. config SCHED_TRACER
  18396. bool "Scheduling Latency Tracer"
  18397. select GENERIC_TRACER
  18398. @@ -221,6 +257,74 @@
  18399. This tracer tracks the latency of the highest priority task
  18400. to be scheduled in, starting from the point it has woken up.
  18401. +config WAKEUP_LATENCY_HIST
  18402. + bool "Scheduling Latency Histogram"
  18403. + depends on SCHED_TRACER
  18404. + help
  18405. + This option generates continuously updated histograms (one per cpu)
  18406. + of the scheduling latency of the highest priority task.
  18407. + The histograms are disabled by default. To enable them, write a
  18408. + non-zero number to
  18409. +
  18410. + /sys/kernel/debug/tracing/latency_hist/enable/wakeup
  18411. +
  18412. + Two different algorithms are used, one to determine the latency of
  18413. + processes that exclusively use the highest priority of the system and
  18414. + another one to determine the latency of processes that share the
  18415. + highest system priority with other processes. The former is used to
  18416. + improve hardware and system software, the latter to optimize the
  18417. + priority design of a given system. The histogram data will be
  18418. + located in the debug file system at
  18419. +
  18420. + /sys/kernel/debug/tracing/latency_hist/wakeup
  18421. +
  18422. + and
  18423. +
  18424. + /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio
  18425. +
  18426. + If both Scheduling Latency Histogram and Missed Timer Offsets
  18427. + Histogram are selected, additional histogram data will be collected
  18428. + that contain, in addition to the wakeup latency, the timer latency, in
  18429. + case the wakeup was triggered by an expired timer. These histograms
  18430. + are available in the
  18431. +
  18432. + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
  18433. +
  18434. + directory. They reflect the apparent interrupt and scheduling latency
  18435. + and are best suitable to determine the worst-case latency of a given
  18436. + system. To enable these histograms, write a non-zero number to
  18437. +
  18438. + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
  18439. +
  18440. +config MISSED_TIMER_OFFSETS_HIST
  18441. + depends on HIGH_RES_TIMERS
  18442. + select GENERIC_TRACER
  18443. + bool "Missed Timer Offsets Histogram"
  18444. + help
  18445. + Generate a histogram of missed timer offsets in microseconds. The
  18446. + histograms are disabled by default. To enable them, write a non-zero
  18447. + number to
  18448. +
  18449. + /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets
  18450. +
  18451. + The histogram data will be located in the debug file system at
  18452. +
  18453. + /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets
  18454. +
  18455. + If both Scheduling Latency Histogram and Missed Timer Offsets
  18456. + Histogram are selected, additional histogram data will be collected
  18457. + that contain, in addition to the wakeup latency, the timer latency, in
  18458. + case the wakeup was triggered by an expired timer. These histograms
  18459. + are available in the
  18460. +
  18461. + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
  18462. +
  18463. + directory. They reflect the apparent interrupt and scheduling latency
  18464. + and are best suitable to determine the worst-case latency of a given
  18465. + system. To enable these histograms, write a non-zero number to
  18466. +
  18467. + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
  18468. +
  18469. config ENABLE_DEFAULT_TRACERS
  18470. bool "Trace process context switches and events"
  18471. depends on !GENERIC_TRACER
  18472. diff -Nur linux-3.18.10.orig/kernel/trace/latency_hist.c linux-3.18.10/kernel/trace/latency_hist.c
  18473. --- linux-3.18.10.orig/kernel/trace/latency_hist.c 1970-01-01 01:00:00.000000000 +0100
  18474. +++ linux-3.18.10/kernel/trace/latency_hist.c 2015-03-26 12:42:18.683588345 +0100
  18475. @@ -0,0 +1,1178 @@
  18476. +/*
  18477. + * kernel/trace/latency_hist.c
  18478. + *
  18479. + * Add support for histograms of preemption-off latency and
  18480. + * interrupt-off latency and wakeup latency, it depends on
  18481. + * Real-Time Preemption Support.
  18482. + *
  18483. + * Copyright (C) 2005 MontaVista Software, Inc.
  18484. + * Yi Yang <yyang@ch.mvista.com>
  18485. + *
  18486. + * Converted to work with the new latency tracer.
  18487. + * Copyright (C) 2008 Red Hat, Inc.
  18488. + * Steven Rostedt <srostedt@redhat.com>
  18489. + *
  18490. + */
  18491. +#include <linux/module.h>
  18492. +#include <linux/debugfs.h>
  18493. +#include <linux/seq_file.h>
  18494. +#include <linux/percpu.h>
  18495. +#include <linux/kallsyms.h>
  18496. +#include <linux/uaccess.h>
  18497. +#include <linux/sched.h>
  18498. +#include <linux/sched/rt.h>
  18499. +#include <linux/slab.h>
  18500. +#include <linux/atomic.h>
  18501. +#include <asm/div64.h>
  18502. +
  18503. +#include "trace.h"
  18504. +#include <trace/events/sched.h>
  18505. +
  18506. +#define NSECS_PER_USECS 1000L
  18507. +
  18508. +#define CREATE_TRACE_POINTS
  18509. +#include <trace/events/hist.h>
  18510. +
  18511. +enum {
  18512. + IRQSOFF_LATENCY = 0,
  18513. + PREEMPTOFF_LATENCY,
  18514. + PREEMPTIRQSOFF_LATENCY,
  18515. + WAKEUP_LATENCY,
  18516. + WAKEUP_LATENCY_SHAREDPRIO,
  18517. + MISSED_TIMER_OFFSETS,
  18518. + TIMERANDWAKEUP_LATENCY,
  18519. + MAX_LATENCY_TYPE,
  18520. +};
  18521. +
  18522. +#define MAX_ENTRY_NUM 10240
  18523. +
  18524. +struct hist_data {
  18525. + atomic_t hist_mode; /* 0 log, 1 don't log */
  18526. + long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */
  18527. + long min_lat;
  18528. + long max_lat;
  18529. + unsigned long long below_hist_bound_samples;
  18530. + unsigned long long above_hist_bound_samples;
  18531. + long long accumulate_lat;
  18532. + unsigned long long total_samples;
  18533. + unsigned long long hist_array[MAX_ENTRY_NUM];
  18534. +};
  18535. +
  18536. +struct enable_data {
  18537. + int latency_type;
  18538. + int enabled;
  18539. +};
  18540. +
  18541. +static char *latency_hist_dir_root = "latency_hist";
  18542. +
  18543. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  18544. +static DEFINE_PER_CPU(struct hist_data, irqsoff_hist);
  18545. +static char *irqsoff_hist_dir = "irqsoff";
  18546. +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start);
  18547. +static DEFINE_PER_CPU(int, hist_irqsoff_counting);
  18548. +#endif
  18549. +
  18550. +#ifdef CONFIG_PREEMPT_OFF_HIST
  18551. +static DEFINE_PER_CPU(struct hist_data, preemptoff_hist);
  18552. +static char *preemptoff_hist_dir = "preemptoff";
  18553. +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start);
  18554. +static DEFINE_PER_CPU(int, hist_preemptoff_counting);
  18555. +#endif
  18556. +
  18557. +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
  18558. +static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist);
  18559. +static char *preemptirqsoff_hist_dir = "preemptirqsoff";
  18560. +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start);
  18561. +static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting);
  18562. +#endif
  18563. +
  18564. +#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST)
  18565. +static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start);
  18566. +static struct enable_data preemptirqsoff_enabled_data = {
  18567. + .latency_type = PREEMPTIRQSOFF_LATENCY,
  18568. + .enabled = 0,
  18569. +};
  18570. +#endif
  18571. +
  18572. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  18573. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  18574. +struct maxlatproc_data {
  18575. + char comm[FIELD_SIZEOF(struct task_struct, comm)];
  18576. + char current_comm[FIELD_SIZEOF(struct task_struct, comm)];
  18577. + int pid;
  18578. + int current_pid;
  18579. + int prio;
  18580. + int current_prio;
  18581. + long latency;
  18582. + long timeroffset;
  18583. + cycle_t timestamp;
  18584. +};
  18585. +#endif
  18586. +
  18587. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  18588. +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist);
  18589. +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio);
  18590. +static char *wakeup_latency_hist_dir = "wakeup";
  18591. +static char *wakeup_latency_hist_dir_sharedprio = "sharedprio";
  18592. +static notrace void probe_wakeup_latency_hist_start(void *v,
  18593. + struct task_struct *p, int success);
  18594. +static notrace void probe_wakeup_latency_hist_stop(void *v,
  18595. + struct task_struct *prev, struct task_struct *next);
  18596. +static notrace void probe_sched_migrate_task(void *,
  18597. + struct task_struct *task, int cpu);
  18598. +static struct enable_data wakeup_latency_enabled_data = {
  18599. + .latency_type = WAKEUP_LATENCY,
  18600. + .enabled = 0,
  18601. +};
  18602. +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc);
  18603. +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio);
  18604. +static DEFINE_PER_CPU(struct task_struct *, wakeup_task);
  18605. +static DEFINE_PER_CPU(int, wakeup_sharedprio);
  18606. +static unsigned long wakeup_pid;
  18607. +#endif
  18608. +
  18609. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  18610. +static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets);
  18611. +static char *missed_timer_offsets_dir = "missed_timer_offsets";
  18612. +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
  18613. + long long offset, struct task_struct *curr, struct task_struct *task);
  18614. +static struct enable_data missed_timer_offsets_enabled_data = {
  18615. + .latency_type = MISSED_TIMER_OFFSETS,
  18616. + .enabled = 0,
  18617. +};
  18618. +static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc);
  18619. +static unsigned long missed_timer_offsets_pid;
  18620. +#endif
  18621. +
  18622. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  18623. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  18624. +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
  18625. +static char *timerandwakeup_latency_hist_dir = "timerandwakeup";
  18626. +static struct enable_data timerandwakeup_enabled_data = {
  18627. + .latency_type = TIMERANDWAKEUP_LATENCY,
  18628. + .enabled = 0,
  18629. +};
  18630. +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
  18631. +#endif
  18632. +
  18633. +void notrace latency_hist(int latency_type, int cpu, long latency,
  18634. + long timeroffset, cycle_t stop,
  18635. + struct task_struct *p)
  18636. +{
  18637. + struct hist_data *my_hist;
  18638. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  18639. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  18640. + struct maxlatproc_data *mp = NULL;
  18641. +#endif
  18642. +
  18643. + if (!cpu_possible(cpu) || latency_type < 0 ||
  18644. + latency_type >= MAX_LATENCY_TYPE)
  18645. + return;
  18646. +
  18647. + switch (latency_type) {
  18648. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  18649. + case IRQSOFF_LATENCY:
  18650. + my_hist = &per_cpu(irqsoff_hist, cpu);
  18651. + break;
  18652. +#endif
  18653. +#ifdef CONFIG_PREEMPT_OFF_HIST
  18654. + case PREEMPTOFF_LATENCY:
  18655. + my_hist = &per_cpu(preemptoff_hist, cpu);
  18656. + break;
  18657. +#endif
  18658. +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
  18659. + case PREEMPTIRQSOFF_LATENCY:
  18660. + my_hist = &per_cpu(preemptirqsoff_hist, cpu);
  18661. + break;
  18662. +#endif
  18663. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  18664. + case WAKEUP_LATENCY:
  18665. + my_hist = &per_cpu(wakeup_latency_hist, cpu);
  18666. + mp = &per_cpu(wakeup_maxlatproc, cpu);
  18667. + break;
  18668. + case WAKEUP_LATENCY_SHAREDPRIO:
  18669. + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
  18670. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
  18671. + break;
  18672. +#endif
  18673. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  18674. + case MISSED_TIMER_OFFSETS:
  18675. + my_hist = &per_cpu(missed_timer_offsets, cpu);
  18676. + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
  18677. + break;
  18678. +#endif
  18679. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  18680. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  18681. + case TIMERANDWAKEUP_LATENCY:
  18682. + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu);
  18683. + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
  18684. + break;
  18685. +#endif
  18686. +
  18687. + default:
  18688. + return;
  18689. + }
  18690. +
  18691. + latency += my_hist->offset;
  18692. +
  18693. + if (atomic_read(&my_hist->hist_mode) == 0)
  18694. + return;
  18695. +
  18696. + if (latency < 0 || latency >= MAX_ENTRY_NUM) {
  18697. + if (latency < 0)
  18698. + my_hist->below_hist_bound_samples++;
  18699. + else
  18700. + my_hist->above_hist_bound_samples++;
  18701. + } else
  18702. + my_hist->hist_array[latency]++;
  18703. +
  18704. + if (unlikely(latency > my_hist->max_lat ||
  18705. + my_hist->min_lat == LONG_MAX)) {
  18706. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  18707. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  18708. + if (latency_type == WAKEUP_LATENCY ||
  18709. + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
  18710. + latency_type == MISSED_TIMER_OFFSETS ||
  18711. + latency_type == TIMERANDWAKEUP_LATENCY) {
  18712. + strncpy(mp->comm, p->comm, sizeof(mp->comm));
  18713. + strncpy(mp->current_comm, current->comm,
  18714. + sizeof(mp->current_comm));
  18715. + mp->pid = task_pid_nr(p);
  18716. + mp->current_pid = task_pid_nr(current);
  18717. + mp->prio = p->prio;
  18718. + mp->current_prio = current->prio;
  18719. + mp->latency = latency;
  18720. + mp->timeroffset = timeroffset;
  18721. + mp->timestamp = stop;
  18722. + }
  18723. +#endif
  18724. + my_hist->max_lat = latency;
  18725. + }
  18726. + if (unlikely(latency < my_hist->min_lat))
  18727. + my_hist->min_lat = latency;
  18728. + my_hist->total_samples++;
  18729. + my_hist->accumulate_lat += latency;
  18730. +}
  18731. +
  18732. +static void *l_start(struct seq_file *m, loff_t *pos)
  18733. +{
  18734. + loff_t *index_ptr = NULL;
  18735. + loff_t index = *pos;
  18736. + struct hist_data *my_hist = m->private;
  18737. +
  18738. + if (index == 0) {
  18739. + char minstr[32], avgstr[32], maxstr[32];
  18740. +
  18741. + atomic_dec(&my_hist->hist_mode);
  18742. +
  18743. + if (likely(my_hist->total_samples)) {
  18744. + long avg = (long) div64_s64(my_hist->accumulate_lat,
  18745. + my_hist->total_samples);
  18746. + snprintf(minstr, sizeof(minstr), "%ld",
  18747. + my_hist->min_lat - my_hist->offset);
  18748. + snprintf(avgstr, sizeof(avgstr), "%ld",
  18749. + avg - my_hist->offset);
  18750. + snprintf(maxstr, sizeof(maxstr), "%ld",
  18751. + my_hist->max_lat - my_hist->offset);
  18752. + } else {
  18753. + strcpy(minstr, "<undef>");
  18754. + strcpy(avgstr, minstr);
  18755. + strcpy(maxstr, minstr);
  18756. + }
  18757. +
  18758. + seq_printf(m, "#Minimum latency: %s microseconds\n"
  18759. + "#Average latency: %s microseconds\n"
  18760. + "#Maximum latency: %s microseconds\n"
  18761. + "#Total samples: %llu\n"
  18762. + "#There are %llu samples lower than %ld"
  18763. + " microseconds.\n"
  18764. + "#There are %llu samples greater or equal"
  18765. + " than %ld microseconds.\n"
  18766. + "#usecs\t%16s\n",
  18767. + minstr, avgstr, maxstr,
  18768. + my_hist->total_samples,
  18769. + my_hist->below_hist_bound_samples,
  18770. + -my_hist->offset,
  18771. + my_hist->above_hist_bound_samples,
  18772. + MAX_ENTRY_NUM - my_hist->offset,
  18773. + "samples");
  18774. + }
  18775. + if (index < MAX_ENTRY_NUM) {
  18776. + index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL);
  18777. + if (index_ptr)
  18778. + *index_ptr = index;
  18779. + }
  18780. +
  18781. + return index_ptr;
  18782. +}
  18783. +
  18784. +static void *l_next(struct seq_file *m, void *p, loff_t *pos)
  18785. +{
  18786. + loff_t *index_ptr = p;
  18787. + struct hist_data *my_hist = m->private;
  18788. +
  18789. + if (++*pos >= MAX_ENTRY_NUM) {
  18790. + atomic_inc(&my_hist->hist_mode);
  18791. + return NULL;
  18792. + }
  18793. + *index_ptr = *pos;
  18794. + return index_ptr;
  18795. +}
  18796. +
  18797. +static void l_stop(struct seq_file *m, void *p)
  18798. +{
  18799. + kfree(p);
  18800. +}
  18801. +
  18802. +static int l_show(struct seq_file *m, void *p)
  18803. +{
  18804. + int index = *(loff_t *) p;
  18805. + struct hist_data *my_hist = m->private;
  18806. +
  18807. + seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset,
  18808. + my_hist->hist_array[index]);
  18809. + return 0;
  18810. +}
  18811. +
  18812. +static const struct seq_operations latency_hist_seq_op = {
  18813. + .start = l_start,
  18814. + .next = l_next,
  18815. + .stop = l_stop,
  18816. + .show = l_show
  18817. +};
  18818. +
  18819. +static int latency_hist_open(struct inode *inode, struct file *file)
  18820. +{
  18821. + int ret;
  18822. +
  18823. + ret = seq_open(file, &latency_hist_seq_op);
  18824. + if (!ret) {
  18825. + struct seq_file *seq = file->private_data;
  18826. + seq->private = inode->i_private;
  18827. + }
  18828. + return ret;
  18829. +}
  18830. +
  18831. +static const struct file_operations latency_hist_fops = {
  18832. + .open = latency_hist_open,
  18833. + .read = seq_read,
  18834. + .llseek = seq_lseek,
  18835. + .release = seq_release,
  18836. +};
  18837. +
  18838. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  18839. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  18840. +static void clear_maxlatprocdata(struct maxlatproc_data *mp)
  18841. +{
  18842. + mp->comm[0] = mp->current_comm[0] = '\0';
  18843. + mp->prio = mp->current_prio = mp->pid = mp->current_pid =
  18844. + mp->latency = mp->timeroffset = -1;
  18845. + mp->timestamp = 0;
  18846. +}
  18847. +#endif
  18848. +
  18849. +static void hist_reset(struct hist_data *hist)
  18850. +{
  18851. + atomic_dec(&hist->hist_mode);
  18852. +
  18853. + memset(hist->hist_array, 0, sizeof(hist->hist_array));
  18854. + hist->below_hist_bound_samples = 0ULL;
  18855. + hist->above_hist_bound_samples = 0ULL;
  18856. + hist->min_lat = LONG_MAX;
  18857. + hist->max_lat = LONG_MIN;
  18858. + hist->total_samples = 0ULL;
  18859. + hist->accumulate_lat = 0LL;
  18860. +
  18861. + atomic_inc(&hist->hist_mode);
  18862. +}
  18863. +
  18864. +static ssize_t
  18865. +latency_hist_reset(struct file *file, const char __user *a,
  18866. + size_t size, loff_t *off)
  18867. +{
  18868. + int cpu;
  18869. + struct hist_data *hist = NULL;
  18870. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  18871. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  18872. + struct maxlatproc_data *mp = NULL;
  18873. +#endif
  18874. + off_t latency_type = (off_t) file->private_data;
  18875. +
  18876. + for_each_online_cpu(cpu) {
  18877. +
  18878. + switch (latency_type) {
  18879. +#ifdef CONFIG_PREEMPT_OFF_HIST
  18880. + case PREEMPTOFF_LATENCY:
  18881. + hist = &per_cpu(preemptoff_hist, cpu);
  18882. + break;
  18883. +#endif
  18884. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  18885. + case IRQSOFF_LATENCY:
  18886. + hist = &per_cpu(irqsoff_hist, cpu);
  18887. + break;
  18888. +#endif
  18889. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  18890. + case PREEMPTIRQSOFF_LATENCY:
  18891. + hist = &per_cpu(preemptirqsoff_hist, cpu);
  18892. + break;
  18893. +#endif
  18894. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  18895. + case WAKEUP_LATENCY:
  18896. + hist = &per_cpu(wakeup_latency_hist, cpu);
  18897. + mp = &per_cpu(wakeup_maxlatproc, cpu);
  18898. + break;
  18899. + case WAKEUP_LATENCY_SHAREDPRIO:
  18900. + hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
  18901. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
  18902. + break;
  18903. +#endif
  18904. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  18905. + case MISSED_TIMER_OFFSETS:
  18906. + hist = &per_cpu(missed_timer_offsets, cpu);
  18907. + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
  18908. + break;
  18909. +#endif
  18910. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  18911. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  18912. + case TIMERANDWAKEUP_LATENCY:
  18913. + hist = &per_cpu(timerandwakeup_latency_hist, cpu);
  18914. + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
  18915. + break;
  18916. +#endif
  18917. + }
  18918. +
  18919. + hist_reset(hist);
  18920. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  18921. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  18922. + if (latency_type == WAKEUP_LATENCY ||
  18923. + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
  18924. + latency_type == MISSED_TIMER_OFFSETS ||
  18925. + latency_type == TIMERANDWAKEUP_LATENCY)
  18926. + clear_maxlatprocdata(mp);
  18927. +#endif
  18928. + }
  18929. +
  18930. + return size;
  18931. +}
  18932. +
  18933. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  18934. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  18935. +static ssize_t
  18936. +show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  18937. +{
  18938. + char buf[64];
  18939. + int r;
  18940. + unsigned long *this_pid = file->private_data;
  18941. +
  18942. + r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid);
  18943. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  18944. +}
  18945. +
  18946. +static ssize_t do_pid(struct file *file, const char __user *ubuf,
  18947. + size_t cnt, loff_t *ppos)
  18948. +{
  18949. + char buf[64];
  18950. + unsigned long pid;
  18951. + unsigned long *this_pid = file->private_data;
  18952. +
  18953. + if (cnt >= sizeof(buf))
  18954. + return -EINVAL;
  18955. +
  18956. + if (copy_from_user(&buf, ubuf, cnt))
  18957. + return -EFAULT;
  18958. +
  18959. + buf[cnt] = '\0';
  18960. +
  18961. + if (kstrtoul(buf, 10, &pid))
  18962. + return -EINVAL;
  18963. +
  18964. + *this_pid = pid;
  18965. +
  18966. + return cnt;
  18967. +}
  18968. +#endif
  18969. +
  18970. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  18971. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  18972. +static ssize_t
  18973. +show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  18974. +{
  18975. + int r;
  18976. + struct maxlatproc_data *mp = file->private_data;
  18977. + int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
  18978. + unsigned long long t;
  18979. + unsigned long usecs, secs;
  18980. + char *buf;
  18981. +
  18982. + if (mp->pid == -1 || mp->current_pid == -1) {
  18983. + buf = "(none)\n";
  18984. + return simple_read_from_buffer(ubuf, cnt, ppos, buf,
  18985. + strlen(buf));
  18986. + }
  18987. +
  18988. + buf = kmalloc(strmaxlen, GFP_KERNEL);
  18989. + if (buf == NULL)
  18990. + return -ENOMEM;
  18991. +
  18992. + t = ns2usecs(mp->timestamp);
  18993. + usecs = do_div(t, USEC_PER_SEC);
  18994. + secs = (unsigned long) t;
  18995. + r = snprintf(buf, strmaxlen,
  18996. + "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
  18997. + MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
  18998. + mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
  18999. + secs, usecs);
  19000. + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  19001. + kfree(buf);
  19002. + return r;
  19003. +}
  19004. +#endif
  19005. +
  19006. +static ssize_t
  19007. +show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  19008. +{
  19009. + char buf[64];
  19010. + struct enable_data *ed = file->private_data;
  19011. + int r;
  19012. +
  19013. + r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled);
  19014. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  19015. +}
  19016. +
  19017. +static ssize_t
  19018. +do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
  19019. +{
  19020. + char buf[64];
  19021. + long enable;
  19022. + struct enable_data *ed = file->private_data;
  19023. +
  19024. + if (cnt >= sizeof(buf))
  19025. + return -EINVAL;
  19026. +
  19027. + if (copy_from_user(&buf, ubuf, cnt))
  19028. + return -EFAULT;
  19029. +
  19030. + buf[cnt] = 0;
  19031. +
  19032. + if (kstrtoul(buf, 10, &enable))
  19033. + return -EINVAL;
  19034. +
  19035. + if ((enable && ed->enabled) || (!enable && !ed->enabled))
  19036. + return cnt;
  19037. +
  19038. + if (enable) {
  19039. + int ret;
  19040. +
  19041. + switch (ed->latency_type) {
  19042. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  19043. + case PREEMPTIRQSOFF_LATENCY:
  19044. + ret = register_trace_preemptirqsoff_hist(
  19045. + probe_preemptirqsoff_hist, NULL);
  19046. + if (ret) {
  19047. + pr_info("wakeup trace: Couldn't assign "
  19048. + "probe_preemptirqsoff_hist "
  19049. + "to trace_preemptirqsoff_hist\n");
  19050. + return ret;
  19051. + }
  19052. + break;
  19053. +#endif
  19054. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19055. + case WAKEUP_LATENCY:
  19056. + ret = register_trace_sched_wakeup(
  19057. + probe_wakeup_latency_hist_start, NULL);
  19058. + if (ret) {
  19059. + pr_info("wakeup trace: Couldn't assign "
  19060. + "probe_wakeup_latency_hist_start "
  19061. + "to trace_sched_wakeup\n");
  19062. + return ret;
  19063. + }
  19064. + ret = register_trace_sched_wakeup_new(
  19065. + probe_wakeup_latency_hist_start, NULL);
  19066. + if (ret) {
  19067. + pr_info("wakeup trace: Couldn't assign "
  19068. + "probe_wakeup_latency_hist_start "
  19069. + "to trace_sched_wakeup_new\n");
  19070. + unregister_trace_sched_wakeup(
  19071. + probe_wakeup_latency_hist_start, NULL);
  19072. + return ret;
  19073. + }
  19074. + ret = register_trace_sched_switch(
  19075. + probe_wakeup_latency_hist_stop, NULL);
  19076. + if (ret) {
  19077. + pr_info("wakeup trace: Couldn't assign "
  19078. + "probe_wakeup_latency_hist_stop "
  19079. + "to trace_sched_switch\n");
  19080. + unregister_trace_sched_wakeup(
  19081. + probe_wakeup_latency_hist_start, NULL);
  19082. + unregister_trace_sched_wakeup_new(
  19083. + probe_wakeup_latency_hist_start, NULL);
  19084. + return ret;
  19085. + }
  19086. + ret = register_trace_sched_migrate_task(
  19087. + probe_sched_migrate_task, NULL);
  19088. + if (ret) {
  19089. + pr_info("wakeup trace: Couldn't assign "
  19090. + "probe_sched_migrate_task "
  19091. + "to trace_sched_migrate_task\n");
  19092. + unregister_trace_sched_wakeup(
  19093. + probe_wakeup_latency_hist_start, NULL);
  19094. + unregister_trace_sched_wakeup_new(
  19095. + probe_wakeup_latency_hist_start, NULL);
  19096. + unregister_trace_sched_switch(
  19097. + probe_wakeup_latency_hist_stop, NULL);
  19098. + return ret;
  19099. + }
  19100. + break;
  19101. +#endif
  19102. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19103. + case MISSED_TIMER_OFFSETS:
  19104. + ret = register_trace_hrtimer_interrupt(
  19105. + probe_hrtimer_interrupt, NULL);
  19106. + if (ret) {
  19107. + pr_info("wakeup trace: Couldn't assign "
  19108. + "probe_hrtimer_interrupt "
  19109. + "to trace_hrtimer_interrupt\n");
  19110. + return ret;
  19111. + }
  19112. + break;
  19113. +#endif
  19114. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  19115. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19116. + case TIMERANDWAKEUP_LATENCY:
  19117. + if (!wakeup_latency_enabled_data.enabled ||
  19118. + !missed_timer_offsets_enabled_data.enabled)
  19119. + return -EINVAL;
  19120. + break;
  19121. +#endif
  19122. + default:
  19123. + break;
  19124. + }
  19125. + } else {
  19126. + switch (ed->latency_type) {
  19127. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  19128. + case PREEMPTIRQSOFF_LATENCY:
  19129. + {
  19130. + int cpu;
  19131. +
  19132. + unregister_trace_preemptirqsoff_hist(
  19133. + probe_preemptirqsoff_hist, NULL);
  19134. + for_each_online_cpu(cpu) {
  19135. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  19136. + per_cpu(hist_irqsoff_counting,
  19137. + cpu) = 0;
  19138. +#endif
  19139. +#ifdef CONFIG_PREEMPT_OFF_HIST
  19140. + per_cpu(hist_preemptoff_counting,
  19141. + cpu) = 0;
  19142. +#endif
  19143. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  19144. + per_cpu(hist_preemptirqsoff_counting,
  19145. + cpu) = 0;
  19146. +#endif
  19147. + }
  19148. + }
  19149. + break;
  19150. +#endif
  19151. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19152. + case WAKEUP_LATENCY:
  19153. + {
  19154. + int cpu;
  19155. +
  19156. + unregister_trace_sched_wakeup(
  19157. + probe_wakeup_latency_hist_start, NULL);
  19158. + unregister_trace_sched_wakeup_new(
  19159. + probe_wakeup_latency_hist_start, NULL);
  19160. + unregister_trace_sched_switch(
  19161. + probe_wakeup_latency_hist_stop, NULL);
  19162. + unregister_trace_sched_migrate_task(
  19163. + probe_sched_migrate_task, NULL);
  19164. +
  19165. + for_each_online_cpu(cpu) {
  19166. + per_cpu(wakeup_task, cpu) = NULL;
  19167. + per_cpu(wakeup_sharedprio, cpu) = 0;
  19168. + }
  19169. + }
  19170. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19171. + timerandwakeup_enabled_data.enabled = 0;
  19172. +#endif
  19173. + break;
  19174. +#endif
  19175. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19176. + case MISSED_TIMER_OFFSETS:
  19177. + unregister_trace_hrtimer_interrupt(
  19178. + probe_hrtimer_interrupt, NULL);
  19179. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19180. + timerandwakeup_enabled_data.enabled = 0;
  19181. +#endif
  19182. + break;
  19183. +#endif
  19184. + default:
  19185. + break;
  19186. + }
  19187. + }
  19188. + ed->enabled = enable;
  19189. + return cnt;
  19190. +}
  19191. +
  19192. +static const struct file_operations latency_hist_reset_fops = {
  19193. + .open = tracing_open_generic,
  19194. + .write = latency_hist_reset,
  19195. +};
  19196. +
  19197. +static const struct file_operations enable_fops = {
  19198. + .open = tracing_open_generic,
  19199. + .read = show_enable,
  19200. + .write = do_enable,
  19201. +};
  19202. +
  19203. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  19204. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19205. +static const struct file_operations pid_fops = {
  19206. + .open = tracing_open_generic,
  19207. + .read = show_pid,
  19208. + .write = do_pid,
  19209. +};
  19210. +
  19211. +static const struct file_operations maxlatproc_fops = {
  19212. + .open = tracing_open_generic,
  19213. + .read = show_maxlatproc,
  19214. +};
  19215. +#endif
  19216. +
  19217. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  19218. +static notrace void probe_preemptirqsoff_hist(void *v, int reason,
  19219. + int starthist)
  19220. +{
  19221. + int cpu = raw_smp_processor_id();
  19222. + int time_set = 0;
  19223. +
  19224. + if (starthist) {
  19225. + cycle_t uninitialized_var(start);
  19226. +
  19227. + if (!preempt_count() && !irqs_disabled())
  19228. + return;
  19229. +
  19230. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  19231. + if ((reason == IRQS_OFF || reason == TRACE_START) &&
  19232. + !per_cpu(hist_irqsoff_counting, cpu)) {
  19233. + per_cpu(hist_irqsoff_counting, cpu) = 1;
  19234. + start = ftrace_now(cpu);
  19235. + time_set++;
  19236. + per_cpu(hist_irqsoff_start, cpu) = start;
  19237. + }
  19238. +#endif
  19239. +
  19240. +#ifdef CONFIG_PREEMPT_OFF_HIST
  19241. + if ((reason == PREEMPT_OFF || reason == TRACE_START) &&
  19242. + !per_cpu(hist_preemptoff_counting, cpu)) {
  19243. + per_cpu(hist_preemptoff_counting, cpu) = 1;
  19244. + if (!(time_set++))
  19245. + start = ftrace_now(cpu);
  19246. + per_cpu(hist_preemptoff_start, cpu) = start;
  19247. + }
  19248. +#endif
  19249. +
  19250. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  19251. + if (per_cpu(hist_irqsoff_counting, cpu) &&
  19252. + per_cpu(hist_preemptoff_counting, cpu) &&
  19253. + !per_cpu(hist_preemptirqsoff_counting, cpu)) {
  19254. + per_cpu(hist_preemptirqsoff_counting, cpu) = 1;
  19255. + if (!time_set)
  19256. + start = ftrace_now(cpu);
  19257. + per_cpu(hist_preemptirqsoff_start, cpu) = start;
  19258. + }
  19259. +#endif
  19260. + } else {
  19261. + cycle_t uninitialized_var(stop);
  19262. +
  19263. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  19264. + if ((reason == IRQS_ON || reason == TRACE_STOP) &&
  19265. + per_cpu(hist_irqsoff_counting, cpu)) {
  19266. + cycle_t start = per_cpu(hist_irqsoff_start, cpu);
  19267. +
  19268. + stop = ftrace_now(cpu);
  19269. + time_set++;
  19270. + if (start) {
  19271. + long latency = ((long) (stop - start)) /
  19272. + NSECS_PER_USECS;
  19273. +
  19274. + latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
  19275. + stop, NULL);
  19276. + }
  19277. + per_cpu(hist_irqsoff_counting, cpu) = 0;
  19278. + }
  19279. +#endif
  19280. +
  19281. +#ifdef CONFIG_PREEMPT_OFF_HIST
  19282. + if ((reason == PREEMPT_ON || reason == TRACE_STOP) &&
  19283. + per_cpu(hist_preemptoff_counting, cpu)) {
  19284. + cycle_t start = per_cpu(hist_preemptoff_start, cpu);
  19285. +
  19286. + if (!(time_set++))
  19287. + stop = ftrace_now(cpu);
  19288. + if (start) {
  19289. + long latency = ((long) (stop - start)) /
  19290. + NSECS_PER_USECS;
  19291. +
  19292. + latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
  19293. + 0, stop, NULL);
  19294. + }
  19295. + per_cpu(hist_preemptoff_counting, cpu) = 0;
  19296. + }
  19297. +#endif
  19298. +
  19299. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  19300. + if ((!per_cpu(hist_irqsoff_counting, cpu) ||
  19301. + !per_cpu(hist_preemptoff_counting, cpu)) &&
  19302. + per_cpu(hist_preemptirqsoff_counting, cpu)) {
  19303. + cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu);
  19304. +
  19305. + if (!time_set)
  19306. + stop = ftrace_now(cpu);
  19307. + if (start) {
  19308. + long latency = ((long) (stop - start)) /
  19309. + NSECS_PER_USECS;
  19310. +
  19311. + latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
  19312. + latency, 0, stop, NULL);
  19313. + }
  19314. + per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
  19315. + }
  19316. +#endif
  19317. + }
  19318. +}
  19319. +#endif
  19320. +
  19321. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19322. +static DEFINE_RAW_SPINLOCK(wakeup_lock);
  19323. +static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
  19324. + int cpu)
  19325. +{
  19326. + int old_cpu = task_cpu(task);
  19327. +
  19328. + if (cpu != old_cpu) {
  19329. + unsigned long flags;
  19330. + struct task_struct *cpu_wakeup_task;
  19331. +
  19332. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  19333. +
  19334. + cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
  19335. + if (task == cpu_wakeup_task) {
  19336. + put_task_struct(cpu_wakeup_task);
  19337. + per_cpu(wakeup_task, old_cpu) = NULL;
  19338. + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
  19339. + get_task_struct(cpu_wakeup_task);
  19340. + }
  19341. +
  19342. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  19343. + }
  19344. +}
  19345. +
  19346. +static notrace void probe_wakeup_latency_hist_start(void *v,
  19347. + struct task_struct *p, int success)
  19348. +{
  19349. + unsigned long flags;
  19350. + struct task_struct *curr = current;
  19351. + int cpu = task_cpu(p);
  19352. + struct task_struct *cpu_wakeup_task;
  19353. +
  19354. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  19355. +
  19356. + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
  19357. +
  19358. + if (wakeup_pid) {
  19359. + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
  19360. + p->prio == curr->prio)
  19361. + per_cpu(wakeup_sharedprio, cpu) = 1;
  19362. + if (likely(wakeup_pid != task_pid_nr(p)))
  19363. + goto out;
  19364. + } else {
  19365. + if (likely(!rt_task(p)) ||
  19366. + (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) ||
  19367. + p->prio > curr->prio)
  19368. + goto out;
  19369. + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
  19370. + p->prio == curr->prio)
  19371. + per_cpu(wakeup_sharedprio, cpu) = 1;
  19372. + }
  19373. +
  19374. + if (cpu_wakeup_task)
  19375. + put_task_struct(cpu_wakeup_task);
  19376. + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p;
  19377. + get_task_struct(cpu_wakeup_task);
  19378. + cpu_wakeup_task->preempt_timestamp_hist =
  19379. + ftrace_now(raw_smp_processor_id());
  19380. +out:
  19381. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  19382. +}
  19383. +
  19384. +static notrace void probe_wakeup_latency_hist_stop(void *v,
  19385. + struct task_struct *prev, struct task_struct *next)
  19386. +{
  19387. + unsigned long flags;
  19388. + int cpu = task_cpu(next);
  19389. + long latency;
  19390. + cycle_t stop;
  19391. + struct task_struct *cpu_wakeup_task;
  19392. +
  19393. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  19394. +
  19395. + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
  19396. +
  19397. + if (cpu_wakeup_task == NULL)
  19398. + goto out;
  19399. +
  19400. + /* Already running? */
  19401. + if (unlikely(current == cpu_wakeup_task))
  19402. + goto out_reset;
  19403. +
  19404. + if (next != cpu_wakeup_task) {
  19405. + if (next->prio < cpu_wakeup_task->prio)
  19406. + goto out_reset;
  19407. +
  19408. + if (next->prio == cpu_wakeup_task->prio)
  19409. + per_cpu(wakeup_sharedprio, cpu) = 1;
  19410. +
  19411. + goto out;
  19412. + }
  19413. +
  19414. + if (current->prio == cpu_wakeup_task->prio)
  19415. + per_cpu(wakeup_sharedprio, cpu) = 1;
  19416. +
  19417. + /*
  19418. + * The task we are waiting for is about to be switched to.
  19419. + * Calculate latency and store it in histogram.
  19420. + */
  19421. + stop = ftrace_now(raw_smp_processor_id());
  19422. +
  19423. + latency = ((long) (stop - next->preempt_timestamp_hist)) /
  19424. + NSECS_PER_USECS;
  19425. +
  19426. + if (per_cpu(wakeup_sharedprio, cpu)) {
  19427. + latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
  19428. + next);
  19429. + per_cpu(wakeup_sharedprio, cpu) = 0;
  19430. + } else {
  19431. + latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
  19432. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19433. + if (timerandwakeup_enabled_data.enabled) {
  19434. + latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
  19435. + next->timer_offset + latency, next->timer_offset,
  19436. + stop, next);
  19437. + }
  19438. +#endif
  19439. + }
  19440. +
  19441. +out_reset:
  19442. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19443. + next->timer_offset = 0;
  19444. +#endif
  19445. + put_task_struct(cpu_wakeup_task);
  19446. + per_cpu(wakeup_task, cpu) = NULL;
  19447. +out:
  19448. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  19449. +}
  19450. +#endif
  19451. +
  19452. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19453. +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
  19454. + long long latency_ns, struct task_struct *curr,
  19455. + struct task_struct *task)
  19456. +{
  19457. + if (latency_ns <= 0 && task != NULL && rt_task(task) &&
  19458. + (task->prio < curr->prio ||
  19459. + (task->prio == curr->prio &&
  19460. + !cpumask_test_cpu(cpu, &task->cpus_allowed)))) {
  19461. + long latency;
  19462. + cycle_t now;
  19463. +
  19464. + if (missed_timer_offsets_pid) {
  19465. + if (likely(missed_timer_offsets_pid !=
  19466. + task_pid_nr(task)))
  19467. + return;
  19468. + }
  19469. +
  19470. + now = ftrace_now(cpu);
  19471. + latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
  19472. + latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
  19473. + task);
  19474. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19475. + task->timer_offset = latency;
  19476. +#endif
  19477. + }
  19478. +}
  19479. +#endif
  19480. +
  19481. +static __init int latency_hist_init(void)
  19482. +{
  19483. + struct dentry *latency_hist_root = NULL;
  19484. + struct dentry *dentry;
  19485. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19486. + struct dentry *dentry_sharedprio;
  19487. +#endif
  19488. + struct dentry *entry;
  19489. + struct dentry *enable_root;
  19490. + int i = 0;
  19491. + struct hist_data *my_hist;
  19492. + char name[64];
  19493. + char *cpufmt = "CPU%d";
  19494. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  19495. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19496. + char *cpufmt_maxlatproc = "max_latency-CPU%d";
  19497. + struct maxlatproc_data *mp = NULL;
  19498. +#endif
  19499. +
  19500. + dentry = tracing_init_dentry();
  19501. + latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry);
  19502. + enable_root = debugfs_create_dir("enable", latency_hist_root);
  19503. +
  19504. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  19505. + dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root);
  19506. + for_each_possible_cpu(i) {
  19507. + sprintf(name, cpufmt, i);
  19508. + entry = debugfs_create_file(name, 0444, dentry,
  19509. + &per_cpu(irqsoff_hist, i), &latency_hist_fops);
  19510. + my_hist = &per_cpu(irqsoff_hist, i);
  19511. + atomic_set(&my_hist->hist_mode, 1);
  19512. + my_hist->min_lat = LONG_MAX;
  19513. + }
  19514. + entry = debugfs_create_file("reset", 0644, dentry,
  19515. + (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops);
  19516. +#endif
  19517. +
  19518. +#ifdef CONFIG_PREEMPT_OFF_HIST
  19519. + dentry = debugfs_create_dir(preemptoff_hist_dir,
  19520. + latency_hist_root);
  19521. + for_each_possible_cpu(i) {
  19522. + sprintf(name, cpufmt, i);
  19523. + entry = debugfs_create_file(name, 0444, dentry,
  19524. + &per_cpu(preemptoff_hist, i), &latency_hist_fops);
  19525. + my_hist = &per_cpu(preemptoff_hist, i);
  19526. + atomic_set(&my_hist->hist_mode, 1);
  19527. + my_hist->min_lat = LONG_MAX;
  19528. + }
  19529. + entry = debugfs_create_file("reset", 0644, dentry,
  19530. + (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops);
  19531. +#endif
  19532. +
  19533. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  19534. + dentry = debugfs_create_dir(preemptirqsoff_hist_dir,
  19535. + latency_hist_root);
  19536. + for_each_possible_cpu(i) {
  19537. + sprintf(name, cpufmt, i);
  19538. + entry = debugfs_create_file(name, 0444, dentry,
  19539. + &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops);
  19540. + my_hist = &per_cpu(preemptirqsoff_hist, i);
  19541. + atomic_set(&my_hist->hist_mode, 1);
  19542. + my_hist->min_lat = LONG_MAX;
  19543. + }
  19544. + entry = debugfs_create_file("reset", 0644, dentry,
  19545. + (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops);
  19546. +#endif
  19547. +
  19548. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  19549. + entry = debugfs_create_file("preemptirqsoff", 0644,
  19550. + enable_root, (void *)&preemptirqsoff_enabled_data,
  19551. + &enable_fops);
  19552. +#endif
  19553. +
  19554. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19555. + dentry = debugfs_create_dir(wakeup_latency_hist_dir,
  19556. + latency_hist_root);
  19557. + dentry_sharedprio = debugfs_create_dir(
  19558. + wakeup_latency_hist_dir_sharedprio, dentry);
  19559. + for_each_possible_cpu(i) {
  19560. + sprintf(name, cpufmt, i);
  19561. +
  19562. + entry = debugfs_create_file(name, 0444, dentry,
  19563. + &per_cpu(wakeup_latency_hist, i),
  19564. + &latency_hist_fops);
  19565. + my_hist = &per_cpu(wakeup_latency_hist, i);
  19566. + atomic_set(&my_hist->hist_mode, 1);
  19567. + my_hist->min_lat = LONG_MAX;
  19568. +
  19569. + entry = debugfs_create_file(name, 0444, dentry_sharedprio,
  19570. + &per_cpu(wakeup_latency_hist_sharedprio, i),
  19571. + &latency_hist_fops);
  19572. + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i);
  19573. + atomic_set(&my_hist->hist_mode, 1);
  19574. + my_hist->min_lat = LONG_MAX;
  19575. +
  19576. + sprintf(name, cpufmt_maxlatproc, i);
  19577. +
  19578. + mp = &per_cpu(wakeup_maxlatproc, i);
  19579. + entry = debugfs_create_file(name, 0444, dentry, mp,
  19580. + &maxlatproc_fops);
  19581. + clear_maxlatprocdata(mp);
  19582. +
  19583. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, i);
  19584. + entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp,
  19585. + &maxlatproc_fops);
  19586. + clear_maxlatprocdata(mp);
  19587. + }
  19588. + entry = debugfs_create_file("pid", 0644, dentry,
  19589. + (void *)&wakeup_pid, &pid_fops);
  19590. + entry = debugfs_create_file("reset", 0644, dentry,
  19591. + (void *)WAKEUP_LATENCY, &latency_hist_reset_fops);
  19592. + entry = debugfs_create_file("reset", 0644, dentry_sharedprio,
  19593. + (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops);
  19594. + entry = debugfs_create_file("wakeup", 0644,
  19595. + enable_root, (void *)&wakeup_latency_enabled_data,
  19596. + &enable_fops);
  19597. +#endif
  19598. +
  19599. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19600. + dentry = debugfs_create_dir(missed_timer_offsets_dir,
  19601. + latency_hist_root);
  19602. + for_each_possible_cpu(i) {
  19603. + sprintf(name, cpufmt, i);
  19604. + entry = debugfs_create_file(name, 0444, dentry,
  19605. + &per_cpu(missed_timer_offsets, i), &latency_hist_fops);
  19606. + my_hist = &per_cpu(missed_timer_offsets, i);
  19607. + atomic_set(&my_hist->hist_mode, 1);
  19608. + my_hist->min_lat = LONG_MAX;
  19609. +
  19610. + sprintf(name, cpufmt_maxlatproc, i);
  19611. + mp = &per_cpu(missed_timer_offsets_maxlatproc, i);
  19612. + entry = debugfs_create_file(name, 0444, dentry, mp,
  19613. + &maxlatproc_fops);
  19614. + clear_maxlatprocdata(mp);
  19615. + }
  19616. + entry = debugfs_create_file("pid", 0644, dentry,
  19617. + (void *)&missed_timer_offsets_pid, &pid_fops);
  19618. + entry = debugfs_create_file("reset", 0644, dentry,
  19619. + (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops);
  19620. + entry = debugfs_create_file("missed_timer_offsets", 0644,
  19621. + enable_root, (void *)&missed_timer_offsets_enabled_data,
  19622. + &enable_fops);
  19623. +#endif
  19624. +
  19625. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  19626. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19627. + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
  19628. + latency_hist_root);
  19629. + for_each_possible_cpu(i) {
  19630. + sprintf(name, cpufmt, i);
  19631. + entry = debugfs_create_file(name, 0444, dentry,
  19632. + &per_cpu(timerandwakeup_latency_hist, i),
  19633. + &latency_hist_fops);
  19634. + my_hist = &per_cpu(timerandwakeup_latency_hist, i);
  19635. + atomic_set(&my_hist->hist_mode, 1);
  19636. + my_hist->min_lat = LONG_MAX;
  19637. +
  19638. + sprintf(name, cpufmt_maxlatproc, i);
  19639. + mp = &per_cpu(timerandwakeup_maxlatproc, i);
  19640. + entry = debugfs_create_file(name, 0444, dentry, mp,
  19641. + &maxlatproc_fops);
  19642. + clear_maxlatprocdata(mp);
  19643. + }
  19644. + entry = debugfs_create_file("reset", 0644, dentry,
  19645. + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops);
  19646. + entry = debugfs_create_file("timerandwakeup", 0644,
  19647. + enable_root, (void *)&timerandwakeup_enabled_data,
  19648. + &enable_fops);
  19649. +#endif
  19650. + return 0;
  19651. +}
  19652. +
  19653. +device_initcall(latency_hist_init);
  19654. diff -Nur linux-3.18.10.orig/kernel/trace/Makefile linux-3.18.10/kernel/trace/Makefile
  19655. --- linux-3.18.10.orig/kernel/trace/Makefile 2015-03-24 02:05:12.000000000 +0100
  19656. +++ linux-3.18.10/kernel/trace/Makefile 2015-03-26 12:42:18.683588345 +0100
  19657. @@ -36,6 +36,10 @@
  19658. obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
  19659. obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
  19660. obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
  19661. +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o
  19662. +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o
  19663. +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o
  19664. +obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o
  19665. obj-$(CONFIG_NOP_TRACER) += trace_nop.o
  19666. obj-$(CONFIG_STACK_TRACER) += trace_stack.o
  19667. obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
  19668. diff -Nur linux-3.18.10.orig/kernel/trace/trace.c linux-3.18.10/kernel/trace/trace.c
  19669. --- linux-3.18.10.orig/kernel/trace/trace.c 2015-03-24 02:05:12.000000000 +0100
  19670. +++ linux-3.18.10/kernel/trace/trace.c 2015-03-26 12:42:18.687588350 +0100
  19671. @@ -1579,6 +1579,7 @@
  19672. struct task_struct *tsk = current;
  19673. entry->preempt_count = pc & 0xff;
  19674. + entry->preempt_lazy_count = preempt_lazy_count();
  19675. entry->pid = (tsk) ? tsk->pid : 0;
  19676. entry->flags =
  19677. #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
  19678. @@ -1588,8 +1589,11 @@
  19679. #endif
  19680. ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
  19681. ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
  19682. - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
  19683. + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) |
  19684. + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) |
  19685. (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
  19686. +
  19687. + entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
  19688. }
  19689. EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
  19690. @@ -2509,14 +2513,17 @@
  19691. static void print_lat_help_header(struct seq_file *m)
  19692. {
  19693. - seq_puts(m, "# _------=> CPU# \n");
  19694. - seq_puts(m, "# / _-----=> irqs-off \n");
  19695. - seq_puts(m, "# | / _----=> need-resched \n");
  19696. - seq_puts(m, "# || / _---=> hardirq/softirq \n");
  19697. - seq_puts(m, "# ||| / _--=> preempt-depth \n");
  19698. - seq_puts(m, "# |||| / delay \n");
  19699. - seq_puts(m, "# cmd pid ||||| time | caller \n");
  19700. - seq_puts(m, "# \\ / ||||| \\ | / \n");
  19701. + seq_puts(m, "# _--------=> CPU# \n");
  19702. + seq_puts(m, "# / _-------=> irqs-off \n");
  19703. + seq_puts(m, "# | / _------=> need-resched \n");
  19704. + seq_puts(m, "# || / _-----=> need-resched_lazy \n");
  19705. + seq_puts(m, "# ||| / _----=> hardirq/softirq \n");
  19706. + seq_puts(m, "# |||| / _---=> preempt-depth \n");
  19707. + seq_puts(m, "# ||||| / _--=> preempt-lazy-depth\n");
  19708. + seq_puts(m, "# |||||| / _-=> migrate-disable \n");
  19709. + seq_puts(m, "# ||||||| / delay \n");
  19710. + seq_puts(m, "# cmd pid |||||||| time | caller \n");
  19711. + seq_puts(m, "# \\ / |||||||| \\ | / \n");
  19712. }
  19713. static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
  19714. @@ -2540,13 +2547,16 @@
  19715. static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
  19716. {
  19717. print_event_info(buf, m);
  19718. - seq_puts(m, "# _-----=> irqs-off\n");
  19719. - seq_puts(m, "# / _----=> need-resched\n");
  19720. - seq_puts(m, "# | / _---=> hardirq/softirq\n");
  19721. - seq_puts(m, "# || / _--=> preempt-depth\n");
  19722. - seq_puts(m, "# ||| / delay\n");
  19723. - seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n");
  19724. - seq_puts(m, "# | | | |||| | |\n");
  19725. + seq_puts(m, "# _-------=> irqs-off \n");
  19726. + seq_puts(m, "# / _------=> need-resched \n");
  19727. + seq_puts(m, "# |/ _-----=> need-resched_lazy \n");
  19728. + seq_puts(m, "# ||/ _----=> hardirq/softirq \n");
  19729. + seq_puts(m, "# |||/ _---=> preempt-depth \n");
  19730. + seq_puts(m, "# ||||/ _--=> preempt-lazy-depth\n");
  19731. + seq_puts(m, "# ||||| / _-=> migrate-disable \n");
  19732. + seq_puts(m, "# |||||| / delay\n");
  19733. + seq_puts(m, "# TASK-PID CPU# |||||| TIMESTAMP FUNCTION\n");
  19734. + seq_puts(m, "# | | | |||||| | |\n");
  19735. }
  19736. void
  19737. diff -Nur linux-3.18.10.orig/kernel/trace/trace_events.c linux-3.18.10/kernel/trace/trace_events.c
  19738. --- linux-3.18.10.orig/kernel/trace/trace_events.c 2015-03-24 02:05:12.000000000 +0100
  19739. +++ linux-3.18.10/kernel/trace/trace_events.c 2015-03-26 12:42:18.687588350 +0100
  19740. @@ -162,6 +162,8 @@
  19741. __common_field(unsigned char, flags);
  19742. __common_field(unsigned char, preempt_count);
  19743. __common_field(int, pid);
  19744. + __common_field(unsigned short, migrate_disable);
  19745. + __common_field(unsigned short, padding);
  19746. return ret;
  19747. }
  19748. diff -Nur linux-3.18.10.orig/kernel/trace/trace.h linux-3.18.10/kernel/trace/trace.h
  19749. --- linux-3.18.10.orig/kernel/trace/trace.h 2015-03-24 02:05:12.000000000 +0100
  19750. +++ linux-3.18.10/kernel/trace/trace.h 2015-03-26 12:42:18.687588350 +0100
  19751. @@ -119,6 +119,7 @@
  19752. * NEED_RESCHED - reschedule is requested
  19753. * HARDIRQ - inside an interrupt handler
  19754. * SOFTIRQ - inside a softirq handler
  19755. + * NEED_RESCHED_LAZY - lazy reschedule is requested
  19756. */
  19757. enum trace_flag_type {
  19758. TRACE_FLAG_IRQS_OFF = 0x01,
  19759. @@ -127,6 +128,7 @@
  19760. TRACE_FLAG_HARDIRQ = 0x08,
  19761. TRACE_FLAG_SOFTIRQ = 0x10,
  19762. TRACE_FLAG_PREEMPT_RESCHED = 0x20,
  19763. + TRACE_FLAG_NEED_RESCHED_LAZY = 0x40,
  19764. };
  19765. #define TRACE_BUF_SIZE 1024
  19766. diff -Nur linux-3.18.10.orig/kernel/trace/trace_irqsoff.c linux-3.18.10/kernel/trace/trace_irqsoff.c
  19767. --- linux-3.18.10.orig/kernel/trace/trace_irqsoff.c 2015-03-24 02:05:12.000000000 +0100
  19768. +++ linux-3.18.10/kernel/trace/trace_irqsoff.c 2015-03-26 12:42:18.687588350 +0100
  19769. @@ -17,6 +17,7 @@
  19770. #include <linux/fs.h>
  19771. #include "trace.h"
  19772. +#include <trace/events/hist.h>
  19773. static struct trace_array *irqsoff_trace __read_mostly;
  19774. static int tracer_enabled __read_mostly;
  19775. @@ -435,11 +436,13 @@
  19776. {
  19777. if (preempt_trace() || irq_trace())
  19778. start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  19779. + trace_preemptirqsoff_hist(TRACE_START, 1);
  19780. }
  19781. EXPORT_SYMBOL_GPL(start_critical_timings);
  19782. void stop_critical_timings(void)
  19783. {
  19784. + trace_preemptirqsoff_hist(TRACE_STOP, 0);
  19785. if (preempt_trace() || irq_trace())
  19786. stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  19787. }
  19788. @@ -449,6 +452,7 @@
  19789. #ifdef CONFIG_PROVE_LOCKING
  19790. void time_hardirqs_on(unsigned long a0, unsigned long a1)
  19791. {
  19792. + trace_preemptirqsoff_hist(IRQS_ON, 0);
  19793. if (!preempt_trace() && irq_trace())
  19794. stop_critical_timing(a0, a1);
  19795. }
  19796. @@ -457,6 +461,7 @@
  19797. {
  19798. if (!preempt_trace() && irq_trace())
  19799. start_critical_timing(a0, a1);
  19800. + trace_preemptirqsoff_hist(IRQS_OFF, 1);
  19801. }
  19802. #else /* !CONFIG_PROVE_LOCKING */
  19803. @@ -482,6 +487,7 @@
  19804. */
  19805. void trace_hardirqs_on(void)
  19806. {
  19807. + trace_preemptirqsoff_hist(IRQS_ON, 0);
  19808. if (!preempt_trace() && irq_trace())
  19809. stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  19810. }
  19811. @@ -491,11 +497,13 @@
  19812. {
  19813. if (!preempt_trace() && irq_trace())
  19814. start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  19815. + trace_preemptirqsoff_hist(IRQS_OFF, 1);
  19816. }
  19817. EXPORT_SYMBOL(trace_hardirqs_off);
  19818. __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
  19819. {
  19820. + trace_preemptirqsoff_hist(IRQS_ON, 0);
  19821. if (!preempt_trace() && irq_trace())
  19822. stop_critical_timing(CALLER_ADDR0, caller_addr);
  19823. }
  19824. @@ -505,6 +513,7 @@
  19825. {
  19826. if (!preempt_trace() && irq_trace())
  19827. start_critical_timing(CALLER_ADDR0, caller_addr);
  19828. + trace_preemptirqsoff_hist(IRQS_OFF, 1);
  19829. }
  19830. EXPORT_SYMBOL(trace_hardirqs_off_caller);
  19831. @@ -514,12 +523,14 @@
  19832. #ifdef CONFIG_PREEMPT_TRACER
  19833. void trace_preempt_on(unsigned long a0, unsigned long a1)
  19834. {
  19835. + trace_preemptirqsoff_hist(PREEMPT_ON, 0);
  19836. if (preempt_trace() && !irq_trace())
  19837. stop_critical_timing(a0, a1);
  19838. }
  19839. void trace_preempt_off(unsigned long a0, unsigned long a1)
  19840. {
  19841. + trace_preemptirqsoff_hist(PREEMPT_ON, 1);
  19842. if (preempt_trace() && !irq_trace())
  19843. start_critical_timing(a0, a1);
  19844. }
  19845. diff -Nur linux-3.18.10.orig/kernel/trace/trace_output.c linux-3.18.10/kernel/trace/trace_output.c
  19846. --- linux-3.18.10.orig/kernel/trace/trace_output.c 2015-03-24 02:05:12.000000000 +0100
  19847. +++ linux-3.18.10/kernel/trace/trace_output.c 2015-03-26 12:42:18.687588350 +0100
  19848. @@ -410,6 +410,7 @@
  19849. {
  19850. char hardsoft_irq;
  19851. char need_resched;
  19852. + char need_resched_lazy;
  19853. char irqs_off;
  19854. int hardirq;
  19855. int softirq;
  19856. @@ -438,6 +439,8 @@
  19857. need_resched = '.';
  19858. break;
  19859. }
  19860. + need_resched_lazy =
  19861. + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
  19862. hardsoft_irq =
  19863. (hardirq && softirq) ? 'H' :
  19864. @@ -445,8 +448,9 @@
  19865. softirq ? 's' :
  19866. '.';
  19867. - if (!trace_seq_printf(s, "%c%c%c",
  19868. - irqs_off, need_resched, hardsoft_irq))
  19869. + if (!trace_seq_printf(s, "%c%c%c%c",
  19870. + irqs_off, need_resched, need_resched_lazy,
  19871. + hardsoft_irq))
  19872. return 0;
  19873. if (entry->preempt_count)
  19874. @@ -454,6 +458,16 @@
  19875. else
  19876. ret = trace_seq_putc(s, '.');
  19877. + if (entry->preempt_lazy_count)
  19878. + ret = trace_seq_printf(s, "%x", entry->preempt_lazy_count);
  19879. + else
  19880. + ret = trace_seq_putc(s, '.');
  19881. +
  19882. + if (entry->migrate_disable)
  19883. + ret = trace_seq_printf(s, "%x", entry->migrate_disable);
  19884. + else
  19885. + ret = trace_seq_putc(s, '.');
  19886. +
  19887. return ret;
  19888. }
  19889. diff -Nur linux-3.18.10.orig/kernel/user.c linux-3.18.10/kernel/user.c
  19890. --- linux-3.18.10.orig/kernel/user.c 2015-03-24 02:05:12.000000000 +0100
  19891. +++ linux-3.18.10/kernel/user.c 2015-03-26 12:42:18.687588350 +0100
  19892. @@ -158,11 +158,11 @@
  19893. if (!up)
  19894. return;
  19895. - local_irq_save(flags);
  19896. + local_irq_save_nort(flags);
  19897. if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
  19898. free_user(up, flags);
  19899. else
  19900. - local_irq_restore(flags);
  19901. + local_irq_restore_nort(flags);
  19902. }
  19903. struct user_struct *alloc_uid(kuid_t uid)
  19904. diff -Nur linux-3.18.10.orig/kernel/watchdog.c linux-3.18.10/kernel/watchdog.c
  19905. --- linux-3.18.10.orig/kernel/watchdog.c 2015-03-24 02:05:12.000000000 +0100
  19906. +++ linux-3.18.10/kernel/watchdog.c 2015-03-26 12:42:18.687588350 +0100
  19907. @@ -248,6 +248,8 @@
  19908. #ifdef CONFIG_HARDLOCKUP_DETECTOR
  19909. +static DEFINE_RAW_SPINLOCK(watchdog_output_lock);
  19910. +
  19911. static struct perf_event_attr wd_hw_attr = {
  19912. .type = PERF_TYPE_HARDWARE,
  19913. .config = PERF_COUNT_HW_CPU_CYCLES,
  19914. @@ -281,13 +283,21 @@
  19915. /* only print hardlockups once */
  19916. if (__this_cpu_read(hard_watchdog_warn) == true)
  19917. return;
  19918. + /*
  19919. + * If early-printk is enabled then make sure we do not
  19920. + * lock up in printk() and kill console logging:
  19921. + */
  19922. + printk_kill();
  19923. - if (hardlockup_panic)
  19924. + if (hardlockup_panic) {
  19925. panic("Watchdog detected hard LOCKUP on cpu %d",
  19926. this_cpu);
  19927. - else
  19928. + } else {
  19929. + raw_spin_lock(&watchdog_output_lock);
  19930. WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
  19931. this_cpu);
  19932. + raw_spin_unlock(&watchdog_output_lock);
  19933. + }
  19934. __this_cpu_write(hard_watchdog_warn, true);
  19935. return;
  19936. @@ -430,6 +440,7 @@
  19937. /* kick off the timer for the hardlockup detector */
  19938. hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  19939. hrtimer->function = watchdog_timer_fn;
  19940. + hrtimer->irqsafe = 1;
  19941. /* Enable the perf event */
  19942. watchdog_nmi_enable(cpu);
  19943. diff -Nur linux-3.18.10.orig/kernel/workqueue.c linux-3.18.10/kernel/workqueue.c
  19944. --- linux-3.18.10.orig/kernel/workqueue.c 2015-03-24 02:05:12.000000000 +0100
  19945. +++ linux-3.18.10/kernel/workqueue.c 2015-03-26 12:42:18.687588350 +0100
  19946. @@ -48,6 +48,8 @@
  19947. #include <linux/nodemask.h>
  19948. #include <linux/moduleparam.h>
  19949. #include <linux/uaccess.h>
  19950. +#include <linux/locallock.h>
  19951. +#include <linux/delay.h>
  19952. #include "workqueue_internal.h"
  19953. @@ -121,15 +123,20 @@
  19954. * cpu or grabbing pool->lock is enough for read access. If
  19955. * POOL_DISASSOCIATED is set, it's identical to L.
  19956. *
  19957. + * On RT we need the extra protection via rt_lock_idle_list() for
  19958. + * the list manipulations against read access from
  19959. + * wq_worker_sleeping(). All other places are nicely serialized via
  19960. + * pool->lock.
  19961. + *
  19962. * A: pool->attach_mutex protected.
  19963. *
  19964. * PL: wq_pool_mutex protected.
  19965. *
  19966. - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
  19967. + * PR: wq_pool_mutex protected for writes. RCU protected for reads.
  19968. *
  19969. * WQ: wq->mutex protected.
  19970. *
  19971. - * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
  19972. + * WR: wq->mutex protected for writes. RCU protected for reads.
  19973. *
  19974. * MD: wq_mayday_lock protected.
  19975. */
  19976. @@ -177,7 +184,7 @@
  19977. atomic_t nr_running ____cacheline_aligned_in_smp;
  19978. /*
  19979. - * Destruction of pool is sched-RCU protected to allow dereferences
  19980. + * Destruction of pool is RCU protected to allow dereferences
  19981. * from get_work_pool().
  19982. */
  19983. struct rcu_head rcu;
  19984. @@ -206,7 +213,7 @@
  19985. /*
  19986. * Release of unbound pwq is punted to system_wq. See put_pwq()
  19987. * and pwq_unbound_release_workfn() for details. pool_workqueue
  19988. - * itself is also sched-RCU protected so that the first pwq can be
  19989. + * itself is also RCU protected so that the first pwq can be
  19990. * determined without grabbing wq->mutex.
  19991. */
  19992. struct work_struct unbound_release_work;
  19993. @@ -321,6 +328,8 @@
  19994. struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
  19995. EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
  19996. +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock);
  19997. +
  19998. static int worker_thread(void *__worker);
  19999. static void copy_workqueue_attrs(struct workqueue_attrs *to,
  20000. const struct workqueue_attrs *from);
  20001. @@ -329,14 +338,14 @@
  20002. #include <trace/events/workqueue.h>
  20003. #define assert_rcu_or_pool_mutex() \
  20004. - rcu_lockdep_assert(rcu_read_lock_sched_held() || \
  20005. + rcu_lockdep_assert(rcu_read_lock_held() || \
  20006. lockdep_is_held(&wq_pool_mutex), \
  20007. - "sched RCU or wq_pool_mutex should be held")
  20008. + "RCU or wq_pool_mutex should be held")
  20009. #define assert_rcu_or_wq_mutex(wq) \
  20010. - rcu_lockdep_assert(rcu_read_lock_sched_held() || \
  20011. + rcu_lockdep_assert(rcu_read_lock_held() || \
  20012. lockdep_is_held(&wq->mutex), \
  20013. - "sched RCU or wq->mutex should be held")
  20014. + "RCU or wq->mutex should be held")
  20015. #define for_each_cpu_worker_pool(pool, cpu) \
  20016. for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
  20017. @@ -348,7 +357,7 @@
  20018. * @pool: iteration cursor
  20019. * @pi: integer used for iteration
  20020. *
  20021. - * This must be called either with wq_pool_mutex held or sched RCU read
  20022. + * This must be called either with wq_pool_mutex held or RCU read
  20023. * locked. If the pool needs to be used beyond the locking in effect, the
  20024. * caller is responsible for guaranteeing that the pool stays online.
  20025. *
  20026. @@ -380,7 +389,7 @@
  20027. * @pwq: iteration cursor
  20028. * @wq: the target workqueue
  20029. *
  20030. - * This must be called either with wq->mutex held or sched RCU read locked.
  20031. + * This must be called either with wq->mutex held or RCU read locked.
  20032. * If the pwq needs to be used beyond the locking in effect, the caller is
  20033. * responsible for guaranteeing that the pwq stays online.
  20034. *
  20035. @@ -392,6 +401,31 @@
  20036. if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
  20037. else
  20038. +#ifdef CONFIG_PREEMPT_RT_BASE
  20039. +static inline void rt_lock_idle_list(struct worker_pool *pool)
  20040. +{
  20041. + preempt_disable();
  20042. +}
  20043. +static inline void rt_unlock_idle_list(struct worker_pool *pool)
  20044. +{
  20045. + preempt_enable();
  20046. +}
  20047. +static inline void sched_lock_idle_list(struct worker_pool *pool) { }
  20048. +static inline void sched_unlock_idle_list(struct worker_pool *pool) { }
  20049. +#else
  20050. +static inline void rt_lock_idle_list(struct worker_pool *pool) { }
  20051. +static inline void rt_unlock_idle_list(struct worker_pool *pool) { }
  20052. +static inline void sched_lock_idle_list(struct worker_pool *pool)
  20053. +{
  20054. + spin_lock_irq(&pool->lock);
  20055. +}
  20056. +static inline void sched_unlock_idle_list(struct worker_pool *pool)
  20057. +{
  20058. + spin_unlock_irq(&pool->lock);
  20059. +}
  20060. +#endif
  20061. +
  20062. +
  20063. #ifdef CONFIG_DEBUG_OBJECTS_WORK
  20064. static struct debug_obj_descr work_debug_descr;
  20065. @@ -542,7 +576,7 @@
  20066. * @wq: the target workqueue
  20067. * @node: the node ID
  20068. *
  20069. - * This must be called either with pwq_lock held or sched RCU read locked.
  20070. + * This must be called either with pwq_lock held or RCU read locked.
  20071. * If the pwq needs to be used beyond the locking in effect, the caller is
  20072. * responsible for guaranteeing that the pwq stays online.
  20073. *
  20074. @@ -646,8 +680,8 @@
  20075. * @work: the work item of interest
  20076. *
  20077. * Pools are created and destroyed under wq_pool_mutex, and allows read
  20078. - * access under sched-RCU read lock. As such, this function should be
  20079. - * called under wq_pool_mutex or with preemption disabled.
  20080. + * access under RCU read lock. As such, this function should be
  20081. + * called under wq_pool_mutex or inside of a rcu_read_lock() region.
  20082. *
  20083. * All fields of the returned pool are accessible as long as the above
  20084. * mentioned locking is in effect. If the returned pool needs to be used
  20085. @@ -784,51 +818,44 @@
  20086. */
  20087. static void wake_up_worker(struct worker_pool *pool)
  20088. {
  20089. - struct worker *worker = first_idle_worker(pool);
  20090. + struct worker *worker;
  20091. +
  20092. + rt_lock_idle_list(pool);
  20093. +
  20094. + worker = first_idle_worker(pool);
  20095. if (likely(worker))
  20096. wake_up_process(worker->task);
  20097. +
  20098. + rt_unlock_idle_list(pool);
  20099. }
  20100. /**
  20101. - * wq_worker_waking_up - a worker is waking up
  20102. - * @task: task waking up
  20103. - * @cpu: CPU @task is waking up to
  20104. - *
  20105. - * This function is called during try_to_wake_up() when a worker is
  20106. - * being awoken.
  20107. + * wq_worker_running - a worker is running again
  20108. + * @task: task returning from sleep
  20109. *
  20110. - * CONTEXT:
  20111. - * spin_lock_irq(rq->lock)
  20112. + * This function is called when a worker returns from schedule()
  20113. */
  20114. -void wq_worker_waking_up(struct task_struct *task, int cpu)
  20115. +void wq_worker_running(struct task_struct *task)
  20116. {
  20117. struct worker *worker = kthread_data(task);
  20118. - if (!(worker->flags & WORKER_NOT_RUNNING)) {
  20119. - WARN_ON_ONCE(worker->pool->cpu != cpu);
  20120. + if (!worker->sleeping)
  20121. + return;
  20122. + if (!(worker->flags & WORKER_NOT_RUNNING))
  20123. atomic_inc(&worker->pool->nr_running);
  20124. - }
  20125. + worker->sleeping = 0;
  20126. }
  20127. /**
  20128. * wq_worker_sleeping - a worker is going to sleep
  20129. * @task: task going to sleep
  20130. - * @cpu: CPU in question, must be the current CPU number
  20131. - *
  20132. - * This function is called during schedule() when a busy worker is
  20133. - * going to sleep. Worker on the same cpu can be woken up by
  20134. - * returning pointer to its task.
  20135. - *
  20136. - * CONTEXT:
  20137. - * spin_lock_irq(rq->lock)
  20138. - *
  20139. - * Return:
  20140. - * Worker task on @cpu to wake up, %NULL if none.
  20141. + * This function is called from schedule() when a busy worker is
  20142. + * going to sleep.
  20143. */
  20144. -struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
  20145. +void wq_worker_sleeping(struct task_struct *task)
  20146. {
  20147. - struct worker *worker = kthread_data(task), *to_wakeup = NULL;
  20148. + struct worker *worker = kthread_data(task);
  20149. struct worker_pool *pool;
  20150. /*
  20151. @@ -837,29 +864,26 @@
  20152. * checking NOT_RUNNING.
  20153. */
  20154. if (worker->flags & WORKER_NOT_RUNNING)
  20155. - return NULL;
  20156. + return;
  20157. pool = worker->pool;
  20158. - /* this can only happen on the local cpu */
  20159. - if (WARN_ON_ONCE(cpu != raw_smp_processor_id() || pool->cpu != cpu))
  20160. - return NULL;
  20161. + if (WARN_ON_ONCE(worker->sleeping))
  20162. + return;
  20163. +
  20164. + worker->sleeping = 1;
  20165. /*
  20166. * The counterpart of the following dec_and_test, implied mb,
  20167. * worklist not empty test sequence is in insert_work().
  20168. * Please read comment there.
  20169. - *
  20170. - * NOT_RUNNING is clear. This means that we're bound to and
  20171. - * running on the local cpu w/ rq lock held and preemption
  20172. - * disabled, which in turn means that none else could be
  20173. - * manipulating idle_list, so dereferencing idle_list without pool
  20174. - * lock is safe.
  20175. */
  20176. if (atomic_dec_and_test(&pool->nr_running) &&
  20177. - !list_empty(&pool->worklist))
  20178. - to_wakeup = first_idle_worker(pool);
  20179. - return to_wakeup ? to_wakeup->task : NULL;
  20180. + !list_empty(&pool->worklist)) {
  20181. + sched_lock_idle_list(pool);
  20182. + wake_up_worker(pool);
  20183. + sched_unlock_idle_list(pool);
  20184. + }
  20185. }
  20186. /**
  20187. @@ -1053,12 +1077,12 @@
  20188. {
  20189. if (pwq) {
  20190. /*
  20191. - * As both pwqs and pools are sched-RCU protected, the
  20192. + * As both pwqs and pools are RCU protected, the
  20193. * following lock operations are safe.
  20194. */
  20195. - spin_lock_irq(&pwq->pool->lock);
  20196. + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock);
  20197. put_pwq(pwq);
  20198. - spin_unlock_irq(&pwq->pool->lock);
  20199. + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock);
  20200. }
  20201. }
  20202. @@ -1160,7 +1184,7 @@
  20203. struct worker_pool *pool;
  20204. struct pool_workqueue *pwq;
  20205. - local_irq_save(*flags);
  20206. + local_lock_irqsave(pendingb_lock, *flags);
  20207. /* try to steal the timer if it exists */
  20208. if (is_dwork) {
  20209. @@ -1179,6 +1203,7 @@
  20210. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
  20211. return 0;
  20212. + rcu_read_lock();
  20213. /*
  20214. * The queueing is in progress, or it is already queued. Try to
  20215. * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
  20216. @@ -1217,14 +1242,16 @@
  20217. set_work_pool_and_keep_pending(work, pool->id);
  20218. spin_unlock(&pool->lock);
  20219. + rcu_read_unlock();
  20220. return 1;
  20221. }
  20222. spin_unlock(&pool->lock);
  20223. fail:
  20224. - local_irq_restore(*flags);
  20225. + rcu_read_unlock();
  20226. + local_unlock_irqrestore(pendingb_lock, *flags);
  20227. if (work_is_canceling(work))
  20228. return -ENOENT;
  20229. - cpu_relax();
  20230. + cpu_chill();
  20231. return -EAGAIN;
  20232. }
  20233. @@ -1293,7 +1320,7 @@
  20234. * queued or lose PENDING. Grabbing PENDING and queueing should
  20235. * happen with IRQ disabled.
  20236. */
  20237. - WARN_ON_ONCE(!irqs_disabled());
  20238. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  20239. debug_work_activate(work);
  20240. @@ -1301,6 +1328,8 @@
  20241. if (unlikely(wq->flags & __WQ_DRAINING) &&
  20242. WARN_ON_ONCE(!is_chained_work(wq)))
  20243. return;
  20244. +
  20245. + rcu_read_lock();
  20246. retry:
  20247. if (req_cpu == WORK_CPU_UNBOUND)
  20248. cpu = raw_smp_processor_id();
  20249. @@ -1357,10 +1386,8 @@
  20250. /* pwq determined, queue */
  20251. trace_workqueue_queue_work(req_cpu, pwq, work);
  20252. - if (WARN_ON(!list_empty(&work->entry))) {
  20253. - spin_unlock(&pwq->pool->lock);
  20254. - return;
  20255. - }
  20256. + if (WARN_ON(!list_empty(&work->entry)))
  20257. + goto out;
  20258. pwq->nr_in_flight[pwq->work_color]++;
  20259. work_flags = work_color_to_flags(pwq->work_color);
  20260. @@ -1376,7 +1403,9 @@
  20261. insert_work(pwq, work, worklist, work_flags);
  20262. +out:
  20263. spin_unlock(&pwq->pool->lock);
  20264. + rcu_read_unlock();
  20265. }
  20266. /**
  20267. @@ -1396,14 +1425,14 @@
  20268. bool ret = false;
  20269. unsigned long flags;
  20270. - local_irq_save(flags);
  20271. + local_lock_irqsave(pendingb_lock,flags);
  20272. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
  20273. __queue_work(cpu, wq, work);
  20274. ret = true;
  20275. }
  20276. - local_irq_restore(flags);
  20277. + local_unlock_irqrestore(pendingb_lock, flags);
  20278. return ret;
  20279. }
  20280. EXPORT_SYMBOL(queue_work_on);
  20281. @@ -1470,14 +1499,14 @@
  20282. unsigned long flags;
  20283. /* read the comment in __queue_work() */
  20284. - local_irq_save(flags);
  20285. + local_lock_irqsave(pendingb_lock, flags);
  20286. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
  20287. __queue_delayed_work(cpu, wq, dwork, delay);
  20288. ret = true;
  20289. }
  20290. - local_irq_restore(flags);
  20291. + local_unlock_irqrestore(pendingb_lock, flags);
  20292. return ret;
  20293. }
  20294. EXPORT_SYMBOL(queue_delayed_work_on);
  20295. @@ -1512,7 +1541,7 @@
  20296. if (likely(ret >= 0)) {
  20297. __queue_delayed_work(cpu, wq, dwork, delay);
  20298. - local_irq_restore(flags);
  20299. + local_unlock_irqrestore(pendingb_lock, flags);
  20300. }
  20301. /* -ENOENT from try_to_grab_pending() becomes %true */
  20302. @@ -1545,7 +1574,9 @@
  20303. worker->last_active = jiffies;
  20304. /* idle_list is LIFO */
  20305. + rt_lock_idle_list(pool);
  20306. list_add(&worker->entry, &pool->idle_list);
  20307. + rt_unlock_idle_list(pool);
  20308. if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
  20309. mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
  20310. @@ -1578,7 +1609,9 @@
  20311. return;
  20312. worker_clr_flags(worker, WORKER_IDLE);
  20313. pool->nr_idle--;
  20314. + rt_lock_idle_list(pool);
  20315. list_del_init(&worker->entry);
  20316. + rt_unlock_idle_list(pool);
  20317. }
  20318. static struct worker *alloc_worker(int node)
  20319. @@ -1746,7 +1779,9 @@
  20320. pool->nr_workers--;
  20321. pool->nr_idle--;
  20322. + rt_lock_idle_list(pool);
  20323. list_del_init(&worker->entry);
  20324. + rt_unlock_idle_list(pool);
  20325. worker->flags |= WORKER_DIE;
  20326. wake_up_process(worker->task);
  20327. }
  20328. @@ -2641,14 +2676,14 @@
  20329. might_sleep();
  20330. - local_irq_disable();
  20331. + rcu_read_lock();
  20332. pool = get_work_pool(work);
  20333. if (!pool) {
  20334. - local_irq_enable();
  20335. + rcu_read_unlock();
  20336. return false;
  20337. }
  20338. - spin_lock(&pool->lock);
  20339. + spin_lock_irq(&pool->lock);
  20340. /* see the comment in try_to_grab_pending() with the same code */
  20341. pwq = get_work_pwq(work);
  20342. if (pwq) {
  20343. @@ -2675,10 +2710,11 @@
  20344. else
  20345. lock_map_acquire_read(&pwq->wq->lockdep_map);
  20346. lock_map_release(&pwq->wq->lockdep_map);
  20347. -
  20348. + rcu_read_unlock();
  20349. return true;
  20350. already_gone:
  20351. spin_unlock_irq(&pool->lock);
  20352. + rcu_read_unlock();
  20353. return false;
  20354. }
  20355. @@ -2727,7 +2763,7 @@
  20356. /* tell other tasks trying to grab @work to back off */
  20357. mark_work_canceling(work);
  20358. - local_irq_restore(flags);
  20359. + local_unlock_irqrestore(pendingb_lock, flags);
  20360. flush_work(work);
  20361. clear_work_data(work);
  20362. @@ -2772,10 +2808,10 @@
  20363. */
  20364. bool flush_delayed_work(struct delayed_work *dwork)
  20365. {
  20366. - local_irq_disable();
  20367. + local_lock_irq(pendingb_lock);
  20368. if (del_timer_sync(&dwork->timer))
  20369. __queue_work(dwork->cpu, dwork->wq, &dwork->work);
  20370. - local_irq_enable();
  20371. + local_unlock_irq(pendingb_lock);
  20372. return flush_work(&dwork->work);
  20373. }
  20374. EXPORT_SYMBOL(flush_delayed_work);
  20375. @@ -2810,7 +2846,7 @@
  20376. set_work_pool_and_clear_pending(&dwork->work,
  20377. get_work_pool_id(&dwork->work));
  20378. - local_irq_restore(flags);
  20379. + local_unlock_irqrestore(pendingb_lock, flags);
  20380. return ret;
  20381. }
  20382. EXPORT_SYMBOL(cancel_delayed_work);
  20383. @@ -2996,7 +3032,8 @@
  20384. const char *delim = "";
  20385. int node, written = 0;
  20386. - rcu_read_lock_sched();
  20387. + get_online_cpus();
  20388. + rcu_read_lock();
  20389. for_each_node(node) {
  20390. written += scnprintf(buf + written, PAGE_SIZE - written,
  20391. "%s%d:%d", delim, node,
  20392. @@ -3004,7 +3041,8 @@
  20393. delim = " ";
  20394. }
  20395. written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
  20396. - rcu_read_unlock_sched();
  20397. + rcu_read_unlock();
  20398. + put_online_cpus();
  20399. return written;
  20400. }
  20401. @@ -3372,7 +3410,7 @@
  20402. * put_unbound_pool - put a worker_pool
  20403. * @pool: worker_pool to put
  20404. *
  20405. - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
  20406. + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
  20407. * safe manner. get_unbound_pool() calls this function on its failure path
  20408. * and this function should be able to release pools which went through,
  20409. * successfully or not, init_worker_pool().
  20410. @@ -3426,8 +3464,8 @@
  20411. del_timer_sync(&pool->idle_timer);
  20412. del_timer_sync(&pool->mayday_timer);
  20413. - /* sched-RCU protected to allow dereferences from get_work_pool() */
  20414. - call_rcu_sched(&pool->rcu, rcu_free_pool);
  20415. + /* RCU protected to allow dereferences from get_work_pool() */
  20416. + call_rcu(&pool->rcu, rcu_free_pool);
  20417. }
  20418. /**
  20419. @@ -3532,7 +3570,7 @@
  20420. put_unbound_pool(pool);
  20421. mutex_unlock(&wq_pool_mutex);
  20422. - call_rcu_sched(&pwq->rcu, rcu_free_pwq);
  20423. + call_rcu(&pwq->rcu, rcu_free_pwq);
  20424. /*
  20425. * If we're the last pwq going away, @wq is already dead and no one
  20426. @@ -4244,7 +4282,8 @@
  20427. struct pool_workqueue *pwq;
  20428. bool ret;
  20429. - rcu_read_lock_sched();
  20430. + rcu_read_lock();
  20431. + preempt_disable();
  20432. if (cpu == WORK_CPU_UNBOUND)
  20433. cpu = smp_processor_id();
  20434. @@ -4255,7 +4294,8 @@
  20435. pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
  20436. ret = !list_empty(&pwq->delayed_works);
  20437. - rcu_read_unlock_sched();
  20438. + preempt_enable();
  20439. + rcu_read_unlock();
  20440. return ret;
  20441. }
  20442. @@ -4281,16 +4321,15 @@
  20443. if (work_pending(work))
  20444. ret |= WORK_BUSY_PENDING;
  20445. - local_irq_save(flags);
  20446. + rcu_read_lock();
  20447. pool = get_work_pool(work);
  20448. if (pool) {
  20449. - spin_lock(&pool->lock);
  20450. + spin_lock_irqsave(&pool->lock, flags);
  20451. if (find_worker_executing_work(pool, work))
  20452. ret |= WORK_BUSY_RUNNING;
  20453. - spin_unlock(&pool->lock);
  20454. + spin_unlock_irqrestore(&pool->lock, flags);
  20455. }
  20456. - local_irq_restore(flags);
  20457. -
  20458. + rcu_read_unlock();
  20459. return ret;
  20460. }
  20461. EXPORT_SYMBOL_GPL(work_busy);
  20462. @@ -4719,16 +4758,16 @@
  20463. * nr_active is monotonically decreasing. It's safe
  20464. * to peek without lock.
  20465. */
  20466. - rcu_read_lock_sched();
  20467. + rcu_read_lock();
  20468. for_each_pwq(pwq, wq) {
  20469. WARN_ON_ONCE(pwq->nr_active < 0);
  20470. if (pwq->nr_active) {
  20471. busy = true;
  20472. - rcu_read_unlock_sched();
  20473. + rcu_read_unlock();
  20474. goto out_unlock;
  20475. }
  20476. }
  20477. - rcu_read_unlock_sched();
  20478. + rcu_read_unlock();
  20479. }
  20480. out_unlock:
  20481. mutex_unlock(&wq_pool_mutex);
  20482. diff -Nur linux-3.18.10.orig/kernel/workqueue_internal.h linux-3.18.10/kernel/workqueue_internal.h
  20483. --- linux-3.18.10.orig/kernel/workqueue_internal.h 2015-03-24 02:05:12.000000000 +0100
  20484. +++ linux-3.18.10/kernel/workqueue_internal.h 2015-03-26 12:42:18.687588350 +0100
  20485. @@ -43,6 +43,7 @@
  20486. unsigned long last_active; /* L: last active timestamp */
  20487. unsigned int flags; /* X: flags */
  20488. int id; /* I: worker id */
  20489. + int sleeping; /* None */
  20490. /*
  20491. * Opaque string set with work_set_desc(). Printed out with task
  20492. @@ -68,7 +69,7 @@
  20493. * Scheduler hooks for concurrency managed workqueue. Only to be used from
  20494. * sched/core.c and workqueue.c.
  20495. */
  20496. -void wq_worker_waking_up(struct task_struct *task, int cpu);
  20497. -struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu);
  20498. +void wq_worker_running(struct task_struct *task);
  20499. +void wq_worker_sleeping(struct task_struct *task);
  20500. #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
  20501. diff -Nur linux-3.18.10.orig/lib/debugobjects.c linux-3.18.10/lib/debugobjects.c
  20502. --- linux-3.18.10.orig/lib/debugobjects.c 2015-03-24 02:05:12.000000000 +0100
  20503. +++ linux-3.18.10/lib/debugobjects.c 2015-03-26 12:42:18.687588350 +0100
  20504. @@ -309,7 +309,10 @@
  20505. struct debug_obj *obj;
  20506. unsigned long flags;
  20507. - fill_pool();
  20508. +#ifdef CONFIG_PREEMPT_RT_FULL
  20509. + if (preempt_count() == 0 && !irqs_disabled())
  20510. +#endif
  20511. + fill_pool();
  20512. db = get_bucket((unsigned long) addr);
  20513. diff -Nur linux-3.18.10.orig/lib/idr.c linux-3.18.10/lib/idr.c
  20514. --- linux-3.18.10.orig/lib/idr.c 2015-03-24 02:05:12.000000000 +0100
  20515. +++ linux-3.18.10/lib/idr.c 2015-03-26 12:42:18.687588350 +0100
  20516. @@ -31,6 +31,7 @@
  20517. #include <linux/spinlock.h>
  20518. #include <linux/percpu.h>
  20519. #include <linux/hardirq.h>
  20520. +#include <linux/locallock.h>
  20521. #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1)
  20522. #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
  20523. @@ -367,6 +368,35 @@
  20524. idr_mark_full(pa, id);
  20525. }
  20526. +#ifdef CONFIG_PREEMPT_RT_FULL
  20527. +static DEFINE_LOCAL_IRQ_LOCK(idr_lock);
  20528. +
  20529. +static inline void idr_preload_lock(void)
  20530. +{
  20531. + local_lock(idr_lock);
  20532. +}
  20533. +
  20534. +static inline void idr_preload_unlock(void)
  20535. +{
  20536. + local_unlock(idr_lock);
  20537. +}
  20538. +
  20539. +void idr_preload_end(void)
  20540. +{
  20541. + idr_preload_unlock();
  20542. +}
  20543. +EXPORT_SYMBOL(idr_preload_end);
  20544. +#else
  20545. +static inline void idr_preload_lock(void)
  20546. +{
  20547. + preempt_disable();
  20548. +}
  20549. +
  20550. +static inline void idr_preload_unlock(void)
  20551. +{
  20552. + preempt_enable();
  20553. +}
  20554. +#endif
  20555. /**
  20556. * idr_preload - preload for idr_alloc()
  20557. @@ -402,7 +432,7 @@
  20558. WARN_ON_ONCE(in_interrupt());
  20559. might_sleep_if(gfp_mask & __GFP_WAIT);
  20560. - preempt_disable();
  20561. + idr_preload_lock();
  20562. /*
  20563. * idr_alloc() is likely to succeed w/o full idr_layer buffer and
  20564. @@ -414,9 +444,9 @@
  20565. while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
  20566. struct idr_layer *new;
  20567. - preempt_enable();
  20568. + idr_preload_unlock();
  20569. new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
  20570. - preempt_disable();
  20571. + idr_preload_lock();
  20572. if (!new)
  20573. break;
  20574. diff -Nur linux-3.18.10.orig/lib/Kconfig linux-3.18.10/lib/Kconfig
  20575. --- linux-3.18.10.orig/lib/Kconfig 2015-03-24 02:05:12.000000000 +0100
  20576. +++ linux-3.18.10/lib/Kconfig 2015-03-26 12:42:18.687588350 +0100
  20577. @@ -383,6 +383,7 @@
  20578. config CPUMASK_OFFSTACK
  20579. bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
  20580. + depends on !PREEMPT_RT_FULL
  20581. help
  20582. Use dynamic allocation for cpumask_var_t, instead of putting
  20583. them on the stack. This is a bit more expensive, but avoids
  20584. diff -Nur linux-3.18.10.orig/lib/Kconfig.debug linux-3.18.10/lib/Kconfig.debug
  20585. --- linux-3.18.10.orig/lib/Kconfig.debug 2015-03-24 02:05:12.000000000 +0100
  20586. +++ linux-3.18.10/lib/Kconfig.debug 2015-03-26 12:42:18.687588350 +0100
  20587. @@ -639,7 +639,7 @@
  20588. config DEBUG_SHIRQ
  20589. bool "Debug shared IRQ handlers"
  20590. - depends on DEBUG_KERNEL
  20591. + depends on DEBUG_KERNEL && !PREEMPT_RT_BASE
  20592. help
  20593. Enable this to generate a spurious interrupt as soon as a shared
  20594. interrupt handler is registered, and just before one is deregistered.
  20595. diff -Nur linux-3.18.10.orig/lib/locking-selftest.c linux-3.18.10/lib/locking-selftest.c
  20596. --- linux-3.18.10.orig/lib/locking-selftest.c 2015-03-24 02:05:12.000000000 +0100
  20597. +++ linux-3.18.10/lib/locking-selftest.c 2015-03-26 12:42:18.687588350 +0100
  20598. @@ -590,6 +590,8 @@
  20599. #include "locking-selftest-spin-hardirq.h"
  20600. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
  20601. +#ifndef CONFIG_PREEMPT_RT_FULL
  20602. +
  20603. #include "locking-selftest-rlock-hardirq.h"
  20604. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
  20605. @@ -605,9 +607,12 @@
  20606. #include "locking-selftest-wlock-softirq.h"
  20607. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
  20608. +#endif
  20609. +
  20610. #undef E1
  20611. #undef E2
  20612. +#ifndef CONFIG_PREEMPT_RT_FULL
  20613. /*
  20614. * Enabling hardirqs with a softirq-safe lock held:
  20615. */
  20616. @@ -640,6 +645,8 @@
  20617. #undef E1
  20618. #undef E2
  20619. +#endif
  20620. +
  20621. /*
  20622. * Enabling irqs with an irq-safe lock held:
  20623. */
  20624. @@ -663,6 +670,8 @@
  20625. #include "locking-selftest-spin-hardirq.h"
  20626. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
  20627. +#ifndef CONFIG_PREEMPT_RT_FULL
  20628. +
  20629. #include "locking-selftest-rlock-hardirq.h"
  20630. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
  20631. @@ -678,6 +687,8 @@
  20632. #include "locking-selftest-wlock-softirq.h"
  20633. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
  20634. +#endif
  20635. +
  20636. #undef E1
  20637. #undef E2
  20638. @@ -709,6 +720,8 @@
  20639. #include "locking-selftest-spin-hardirq.h"
  20640. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
  20641. +#ifndef CONFIG_PREEMPT_RT_FULL
  20642. +
  20643. #include "locking-selftest-rlock-hardirq.h"
  20644. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
  20645. @@ -724,6 +737,8 @@
  20646. #include "locking-selftest-wlock-softirq.h"
  20647. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
  20648. +#endif
  20649. +
  20650. #undef E1
  20651. #undef E2
  20652. #undef E3
  20653. @@ -757,6 +772,8 @@
  20654. #include "locking-selftest-spin-hardirq.h"
  20655. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
  20656. +#ifndef CONFIG_PREEMPT_RT_FULL
  20657. +
  20658. #include "locking-selftest-rlock-hardirq.h"
  20659. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
  20660. @@ -772,10 +789,14 @@
  20661. #include "locking-selftest-wlock-softirq.h"
  20662. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
  20663. +#endif
  20664. +
  20665. #undef E1
  20666. #undef E2
  20667. #undef E3
  20668. +#ifndef CONFIG_PREEMPT_RT_FULL
  20669. +
  20670. /*
  20671. * read-lock / write-lock irq inversion.
  20672. *
  20673. @@ -838,6 +859,10 @@
  20674. #undef E2
  20675. #undef E3
  20676. +#endif
  20677. +
  20678. +#ifndef CONFIG_PREEMPT_RT_FULL
  20679. +
  20680. /*
  20681. * read-lock / write-lock recursion that is actually safe.
  20682. */
  20683. @@ -876,6 +901,8 @@
  20684. #undef E2
  20685. #undef E3
  20686. +#endif
  20687. +
  20688. /*
  20689. * read-lock / write-lock recursion that is unsafe.
  20690. */
  20691. @@ -1858,6 +1885,7 @@
  20692. printk(" --------------------------------------------------------------------------\n");
  20693. +#ifndef CONFIG_PREEMPT_RT_FULL
  20694. /*
  20695. * irq-context testcases:
  20696. */
  20697. @@ -1870,6 +1898,28 @@
  20698. DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
  20699. // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
  20700. +#else
  20701. + /* On -rt, we only do hardirq context test for raw spinlock */
  20702. + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12);
  20703. + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21);
  20704. +
  20705. + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12);
  20706. + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21);
  20707. +
  20708. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123);
  20709. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132);
  20710. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213);
  20711. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231);
  20712. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312);
  20713. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321);
  20714. +
  20715. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123);
  20716. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132);
  20717. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213);
  20718. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231);
  20719. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312);
  20720. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321);
  20721. +#endif
  20722. ww_tests();
  20723. diff -Nur linux-3.18.10.orig/lib/percpu_ida.c linux-3.18.10/lib/percpu_ida.c
  20724. --- linux-3.18.10.orig/lib/percpu_ida.c 2015-03-24 02:05:12.000000000 +0100
  20725. +++ linux-3.18.10/lib/percpu_ida.c 2015-03-26 12:42:18.687588350 +0100
  20726. @@ -29,6 +29,9 @@
  20727. #include <linux/string.h>
  20728. #include <linux/spinlock.h>
  20729. #include <linux/percpu_ida.h>
  20730. +#include <linux/locallock.h>
  20731. +
  20732. +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock);
  20733. struct percpu_ida_cpu {
  20734. /*
  20735. @@ -151,13 +154,13 @@
  20736. unsigned long flags;
  20737. int tag;
  20738. - local_irq_save(flags);
  20739. + local_lock_irqsave(irq_off_lock, flags);
  20740. tags = this_cpu_ptr(pool->tag_cpu);
  20741. /* Fastpath */
  20742. tag = alloc_local_tag(tags);
  20743. if (likely(tag >= 0)) {
  20744. - local_irq_restore(flags);
  20745. + local_unlock_irqrestore(irq_off_lock, flags);
  20746. return tag;
  20747. }
  20748. @@ -176,6 +179,7 @@
  20749. if (!tags->nr_free)
  20750. alloc_global_tags(pool, tags);
  20751. +
  20752. if (!tags->nr_free)
  20753. steal_tags(pool, tags);
  20754. @@ -187,7 +191,7 @@
  20755. }
  20756. spin_unlock(&pool->lock);
  20757. - local_irq_restore(flags);
  20758. + local_unlock_irqrestore(irq_off_lock, flags);
  20759. if (tag >= 0 || state == TASK_RUNNING)
  20760. break;
  20761. @@ -199,7 +203,7 @@
  20762. schedule();
  20763. - local_irq_save(flags);
  20764. + local_lock_irqsave(irq_off_lock, flags);
  20765. tags = this_cpu_ptr(pool->tag_cpu);
  20766. }
  20767. if (state != TASK_RUNNING)
  20768. @@ -224,7 +228,7 @@
  20769. BUG_ON(tag >= pool->nr_tags);
  20770. - local_irq_save(flags);
  20771. + local_lock_irqsave(irq_off_lock, flags);
  20772. tags = this_cpu_ptr(pool->tag_cpu);
  20773. spin_lock(&tags->lock);
  20774. @@ -256,7 +260,7 @@
  20775. spin_unlock(&pool->lock);
  20776. }
  20777. - local_irq_restore(flags);
  20778. + local_unlock_irqrestore(irq_off_lock, flags);
  20779. }
  20780. EXPORT_SYMBOL_GPL(percpu_ida_free);
  20781. @@ -348,7 +352,7 @@
  20782. struct percpu_ida_cpu *remote;
  20783. unsigned cpu, i, err = 0;
  20784. - local_irq_save(flags);
  20785. + local_lock_irqsave(irq_off_lock, flags);
  20786. for_each_possible_cpu(cpu) {
  20787. remote = per_cpu_ptr(pool->tag_cpu, cpu);
  20788. spin_lock(&remote->lock);
  20789. @@ -370,7 +374,7 @@
  20790. }
  20791. spin_unlock(&pool->lock);
  20792. out:
  20793. - local_irq_restore(flags);
  20794. + local_unlock_irqrestore(irq_off_lock, flags);
  20795. return err;
  20796. }
  20797. EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
  20798. diff -Nur linux-3.18.10.orig/lib/radix-tree.c linux-3.18.10/lib/radix-tree.c
  20799. --- linux-3.18.10.orig/lib/radix-tree.c 2015-03-24 02:05:12.000000000 +0100
  20800. +++ linux-3.18.10/lib/radix-tree.c 2015-03-26 12:42:18.687588350 +0100
  20801. @@ -195,12 +195,13 @@
  20802. * succeed in getting a node here (and never reach
  20803. * kmem_cache_alloc)
  20804. */
  20805. - rtp = this_cpu_ptr(&radix_tree_preloads);
  20806. + rtp = &get_cpu_var(radix_tree_preloads);
  20807. if (rtp->nr) {
  20808. ret = rtp->nodes[rtp->nr - 1];
  20809. rtp->nodes[rtp->nr - 1] = NULL;
  20810. rtp->nr--;
  20811. }
  20812. + put_cpu_var(radix_tree_preloads);
  20813. /*
  20814. * Update the allocation stack trace as this is more useful
  20815. * for debugging.
  20816. @@ -240,6 +241,7 @@
  20817. call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
  20818. }
  20819. +#ifndef CONFIG_PREEMPT_RT_FULL
  20820. /*
  20821. * Load up this CPU's radix_tree_node buffer with sufficient objects to
  20822. * ensure that the addition of a single element in the tree cannot fail. On
  20823. @@ -305,6 +307,7 @@
  20824. return 0;
  20825. }
  20826. EXPORT_SYMBOL(radix_tree_maybe_preload);
  20827. +#endif
  20828. /*
  20829. * Return the maximum key which can be store into a
  20830. diff -Nur linux-3.18.10.orig/lib/scatterlist.c linux-3.18.10/lib/scatterlist.c
  20831. --- linux-3.18.10.orig/lib/scatterlist.c 2015-03-24 02:05:12.000000000 +0100
  20832. +++ linux-3.18.10/lib/scatterlist.c 2015-03-26 12:42:18.687588350 +0100
  20833. @@ -592,7 +592,7 @@
  20834. flush_kernel_dcache_page(miter->page);
  20835. if (miter->__flags & SG_MITER_ATOMIC) {
  20836. - WARN_ON_ONCE(preemptible());
  20837. + WARN_ON_ONCE(!pagefault_disabled());
  20838. kunmap_atomic(miter->addr);
  20839. } else
  20840. kunmap(miter->page);
  20841. @@ -637,7 +637,7 @@
  20842. if (!sg_miter_skip(&miter, skip))
  20843. return false;
  20844. - local_irq_save(flags);
  20845. + local_irq_save_nort(flags);
  20846. while (sg_miter_next(&miter) && offset < buflen) {
  20847. unsigned int len;
  20848. @@ -654,7 +654,7 @@
  20849. sg_miter_stop(&miter);
  20850. - local_irq_restore(flags);
  20851. + local_irq_restore_nort(flags);
  20852. return offset;
  20853. }
  20854. diff -Nur linux-3.18.10.orig/lib/smp_processor_id.c linux-3.18.10/lib/smp_processor_id.c
  20855. --- linux-3.18.10.orig/lib/smp_processor_id.c 2015-03-24 02:05:12.000000000 +0100
  20856. +++ linux-3.18.10/lib/smp_processor_id.c 2015-03-26 12:42:18.687588350 +0100
  20857. @@ -39,8 +39,9 @@
  20858. if (!printk_ratelimit())
  20859. goto out_enable;
  20860. - printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n",
  20861. - what1, what2, preempt_count() - 1, current->comm, current->pid);
  20862. + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x %08x] code: %s/%d\n",
  20863. + what1, what2, preempt_count() - 1, __migrate_disabled(current),
  20864. + current->comm, current->pid);
  20865. print_symbol("caller is %s\n", (long)__builtin_return_address(0));
  20866. dump_stack();
  20867. diff -Nur linux-3.18.10.orig/mm/filemap.c linux-3.18.10/mm/filemap.c
  20868. --- linux-3.18.10.orig/mm/filemap.c 2015-03-24 02:05:12.000000000 +0100
  20869. +++ linux-3.18.10/mm/filemap.c 2015-03-26 12:42:18.687588350 +0100
  20870. @@ -168,7 +168,9 @@
  20871. if (!workingset_node_pages(node) &&
  20872. list_empty(&node->private_list)) {
  20873. node->private_data = mapping;
  20874. - list_lru_add(&workingset_shadow_nodes, &node->private_list);
  20875. + local_lock(workingset_shadow_lock);
  20876. + list_lru_add(&__workingset_shadow_nodes, &node->private_list);
  20877. + local_unlock(workingset_shadow_lock);
  20878. }
  20879. }
  20880. @@ -535,9 +537,12 @@
  20881. * node->private_list is protected by
  20882. * mapping->tree_lock.
  20883. */
  20884. - if (!list_empty(&node->private_list))
  20885. - list_lru_del(&workingset_shadow_nodes,
  20886. + if (!list_empty(&node->private_list)) {
  20887. + local_lock(workingset_shadow_lock);
  20888. + list_lru_del(&__workingset_shadow_nodes,
  20889. &node->private_list);
  20890. + local_unlock(workingset_shadow_lock);
  20891. + }
  20892. }
  20893. return 0;
  20894. }
  20895. diff -Nur linux-3.18.10.orig/mm/highmem.c linux-3.18.10/mm/highmem.c
  20896. --- linux-3.18.10.orig/mm/highmem.c 2015-03-24 02:05:12.000000000 +0100
  20897. +++ linux-3.18.10/mm/highmem.c 2015-03-26 12:42:18.687588350 +0100
  20898. @@ -29,10 +29,11 @@
  20899. #include <linux/kgdb.h>
  20900. #include <asm/tlbflush.h>
  20901. -
  20902. +#ifndef CONFIG_PREEMPT_RT_FULL
  20903. #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
  20904. DEFINE_PER_CPU(int, __kmap_atomic_idx);
  20905. #endif
  20906. +#endif
  20907. /*
  20908. * Virtual_count is not a pure "count".
  20909. @@ -107,8 +108,9 @@
  20910. unsigned long totalhigh_pages __read_mostly;
  20911. EXPORT_SYMBOL(totalhigh_pages);
  20912. -
  20913. +#ifndef CONFIG_PREEMPT_RT_FULL
  20914. EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
  20915. +#endif
  20916. unsigned int nr_free_highpages (void)
  20917. {
  20918. diff -Nur linux-3.18.10.orig/mm/Kconfig linux-3.18.10/mm/Kconfig
  20919. --- linux-3.18.10.orig/mm/Kconfig 2015-03-24 02:05:12.000000000 +0100
  20920. +++ linux-3.18.10/mm/Kconfig 2015-03-26 12:42:18.687588350 +0100
  20921. @@ -408,7 +408,7 @@
  20922. config TRANSPARENT_HUGEPAGE
  20923. bool "Transparent Hugepage Support"
  20924. - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
  20925. + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
  20926. select COMPACTION
  20927. help
  20928. Transparent Hugepages allows the kernel to use huge pages and
  20929. diff -Nur linux-3.18.10.orig/mm/memcontrol.c linux-3.18.10/mm/memcontrol.c
  20930. --- linux-3.18.10.orig/mm/memcontrol.c 2015-03-24 02:05:12.000000000 +0100
  20931. +++ linux-3.18.10/mm/memcontrol.c 2015-03-26 12:42:18.687588350 +0100
  20932. @@ -60,6 +60,8 @@
  20933. #include <net/sock.h>
  20934. #include <net/ip.h>
  20935. #include <net/tcp_memcontrol.h>
  20936. +#include <linux/locallock.h>
  20937. +
  20938. #include "slab.h"
  20939. #include <asm/uaccess.h>
  20940. @@ -87,6 +89,7 @@
  20941. #define do_swap_account 0
  20942. #endif
  20943. +static DEFINE_LOCAL_IRQ_LOCK(event_lock);
  20944. static const char * const mem_cgroup_stat_names[] = {
  20945. "cache",
  20946. @@ -2376,14 +2379,17 @@
  20947. */
  20948. static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
  20949. {
  20950. - struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
  20951. + struct memcg_stock_pcp *stock;
  20952. + int cpu = get_cpu_light();
  20953. +
  20954. + stock = &per_cpu(memcg_stock, cpu);
  20955. if (stock->cached != memcg) { /* reset if necessary */
  20956. drain_stock(stock);
  20957. stock->cached = memcg;
  20958. }
  20959. stock->nr_pages += nr_pages;
  20960. - put_cpu_var(memcg_stock);
  20961. + put_cpu_light();
  20962. }
  20963. /*
  20964. @@ -2397,7 +2403,7 @@
  20965. /* Notify other cpus that system-wide "drain" is running */
  20966. get_online_cpus();
  20967. - curcpu = get_cpu();
  20968. + curcpu = get_cpu_light();
  20969. for_each_online_cpu(cpu) {
  20970. struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
  20971. struct mem_cgroup *memcg;
  20972. @@ -2414,7 +2420,7 @@
  20973. schedule_work_on(cpu, &stock->work);
  20974. }
  20975. }
  20976. - put_cpu();
  20977. + put_cpu_light();
  20978. if (!sync)
  20979. goto out;
  20980. @@ -3419,12 +3425,12 @@
  20981. move_unlock_mem_cgroup(from, &flags);
  20982. ret = 0;
  20983. - local_irq_disable();
  20984. + local_lock_irq(event_lock);
  20985. mem_cgroup_charge_statistics(to, page, nr_pages);
  20986. memcg_check_events(to, page);
  20987. mem_cgroup_charge_statistics(from, page, -nr_pages);
  20988. memcg_check_events(from, page);
  20989. - local_irq_enable();
  20990. + local_unlock_irq(event_lock);
  20991. out_unlock:
  20992. unlock_page(page);
  20993. out:
  20994. @@ -6406,10 +6412,10 @@
  20995. VM_BUG_ON_PAGE(!PageTransHuge(page), page);
  20996. }
  20997. - local_irq_disable();
  20998. + local_lock_irq(event_lock);
  20999. mem_cgroup_charge_statistics(memcg, page, nr_pages);
  21000. memcg_check_events(memcg, page);
  21001. - local_irq_enable();
  21002. + local_unlock_irq(event_lock);
  21003. if (do_swap_account && PageSwapCache(page)) {
  21004. swp_entry_t entry = { .val = page_private(page) };
  21005. @@ -6468,14 +6474,14 @@
  21006. memcg_oom_recover(memcg);
  21007. }
  21008. - local_irq_save(flags);
  21009. + local_lock_irqsave(event_lock, flags);
  21010. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
  21011. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
  21012. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
  21013. __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
  21014. __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
  21015. memcg_check_events(memcg, dummy_page);
  21016. - local_irq_restore(flags);
  21017. + local_unlock_irqrestore(event_lock, flags);
  21018. }
  21019. static void uncharge_list(struct list_head *page_list)
  21020. diff -Nur linux-3.18.10.orig/mm/memory.c linux-3.18.10/mm/memory.c
  21021. --- linux-3.18.10.orig/mm/memory.c 2015-03-24 02:05:12.000000000 +0100
  21022. +++ linux-3.18.10/mm/memory.c 2015-03-26 12:42:18.691588355 +0100
  21023. @@ -3258,6 +3258,32 @@
  21024. return 0;
  21025. }
  21026. +#ifdef CONFIG_PREEMPT_RT_FULL
  21027. +void pagefault_disable(void)
  21028. +{
  21029. + migrate_disable();
  21030. + current->pagefault_disabled++;
  21031. + /*
  21032. + * make sure to have issued the store before a pagefault
  21033. + * can hit.
  21034. + */
  21035. + barrier();
  21036. +}
  21037. +EXPORT_SYMBOL(pagefault_disable);
  21038. +
  21039. +void pagefault_enable(void)
  21040. +{
  21041. + /*
  21042. + * make sure to issue those last loads/stores before enabling
  21043. + * the pagefault handler again.
  21044. + */
  21045. + barrier();
  21046. + current->pagefault_disabled--;
  21047. + migrate_enable();
  21048. +}
  21049. +EXPORT_SYMBOL(pagefault_enable);
  21050. +#endif
  21051. +
  21052. /*
  21053. * By the time we get here, we already hold the mm semaphore
  21054. *
  21055. diff -Nur linux-3.18.10.orig/mm/mmu_context.c linux-3.18.10/mm/mmu_context.c
  21056. --- linux-3.18.10.orig/mm/mmu_context.c 2015-03-24 02:05:12.000000000 +0100
  21057. +++ linux-3.18.10/mm/mmu_context.c 2015-03-26 12:42:18.691588355 +0100
  21058. @@ -23,6 +23,7 @@
  21059. struct task_struct *tsk = current;
  21060. task_lock(tsk);
  21061. + preempt_disable_rt();
  21062. active_mm = tsk->active_mm;
  21063. if (active_mm != mm) {
  21064. atomic_inc(&mm->mm_count);
  21065. @@ -30,6 +31,7 @@
  21066. }
  21067. tsk->mm = mm;
  21068. switch_mm(active_mm, mm, tsk);
  21069. + preempt_enable_rt();
  21070. task_unlock(tsk);
  21071. #ifdef finish_arch_post_lock_switch
  21072. finish_arch_post_lock_switch();
  21073. diff -Nur linux-3.18.10.orig/mm/page_alloc.c linux-3.18.10/mm/page_alloc.c
  21074. --- linux-3.18.10.orig/mm/page_alloc.c 2015-03-24 02:05:12.000000000 +0100
  21075. +++ linux-3.18.10/mm/page_alloc.c 2015-03-26 12:42:18.691588355 +0100
  21076. @@ -59,6 +59,7 @@
  21077. #include <linux/page-debug-flags.h>
  21078. #include <linux/hugetlb.h>
  21079. #include <linux/sched/rt.h>
  21080. +#include <linux/locallock.h>
  21081. #include <asm/sections.h>
  21082. #include <asm/tlbflush.h>
  21083. @@ -230,6 +231,18 @@
  21084. EXPORT_SYMBOL(nr_online_nodes);
  21085. #endif
  21086. +static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
  21087. +
  21088. +#ifdef CONFIG_PREEMPT_RT_BASE
  21089. +# define cpu_lock_irqsave(cpu, flags) \
  21090. + local_lock_irqsave_on(pa_lock, flags, cpu)
  21091. +# define cpu_unlock_irqrestore(cpu, flags) \
  21092. + local_unlock_irqrestore_on(pa_lock, flags, cpu)
  21093. +#else
  21094. +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
  21095. +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
  21096. +#endif
  21097. +
  21098. int page_group_by_mobility_disabled __read_mostly;
  21099. void set_pageblock_migratetype(struct page *page, int migratetype)
  21100. @@ -654,7 +667,7 @@
  21101. }
  21102. /*
  21103. - * Frees a number of pages from the PCP lists
  21104. + * Frees a number of pages which have been collected from the pcp lists.
  21105. * Assumes all pages on list are in same zone, and of same order.
  21106. * count is the number of pages to free.
  21107. *
  21108. @@ -665,18 +678,51 @@
  21109. * pinned" detection logic.
  21110. */
  21111. static void free_pcppages_bulk(struct zone *zone, int count,
  21112. - struct per_cpu_pages *pcp)
  21113. + struct list_head *list)
  21114. {
  21115. - int migratetype = 0;
  21116. - int batch_free = 0;
  21117. int to_free = count;
  21118. unsigned long nr_scanned;
  21119. + unsigned long flags;
  21120. +
  21121. + spin_lock_irqsave(&zone->lock, flags);
  21122. - spin_lock(&zone->lock);
  21123. nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
  21124. if (nr_scanned)
  21125. __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
  21126. + while (!list_empty(list)) {
  21127. + struct page *page = list_first_entry(list, struct page, lru);
  21128. + int mt; /* migratetype of the to-be-freed page */
  21129. +
  21130. + /* must delete as __free_one_page list manipulates */
  21131. + list_del(&page->lru);
  21132. +
  21133. + mt = get_freepage_migratetype(page);
  21134. + if (unlikely(has_isolate_pageblock(zone)))
  21135. + mt = get_pageblock_migratetype(page);
  21136. +
  21137. + /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
  21138. + __free_one_page(page, page_to_pfn(page), zone, 0, mt);
  21139. + trace_mm_page_pcpu_drain(page, 0, mt);
  21140. + to_free--;
  21141. + }
  21142. + WARN_ON(to_free != 0);
  21143. + spin_unlock_irqrestore(&zone->lock, flags);
  21144. +}
  21145. +
  21146. +/*
  21147. + * Moves a number of pages from the PCP lists to free list which
  21148. + * is freed outside of the locked region.
  21149. + *
  21150. + * Assumes all pages on list are in same zone, and of same order.
  21151. + * count is the number of pages to free.
  21152. + */
  21153. +static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src,
  21154. + struct list_head *dst)
  21155. +{
  21156. + int migratetype = 0;
  21157. + int batch_free = 0;
  21158. +
  21159. while (to_free) {
  21160. struct page *page;
  21161. struct list_head *list;
  21162. @@ -692,7 +738,7 @@
  21163. batch_free++;
  21164. if (++migratetype == MIGRATE_PCPTYPES)
  21165. migratetype = 0;
  21166. - list = &pcp->lists[migratetype];
  21167. + list = &src->lists[migratetype];
  21168. } while (list_empty(list));
  21169. /* This is the only non-empty list. Free them all. */
  21170. @@ -700,21 +746,11 @@
  21171. batch_free = to_free;
  21172. do {
  21173. - int mt; /* migratetype of the to-be-freed page */
  21174. -
  21175. - page = list_entry(list->prev, struct page, lru);
  21176. - /* must delete as __free_one_page list manipulates */
  21177. + page = list_last_entry(list, struct page, lru);
  21178. list_del(&page->lru);
  21179. - mt = get_freepage_migratetype(page);
  21180. - if (unlikely(has_isolate_pageblock(zone)))
  21181. - mt = get_pageblock_migratetype(page);
  21182. -
  21183. - /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
  21184. - __free_one_page(page, page_to_pfn(page), zone, 0, mt);
  21185. - trace_mm_page_pcpu_drain(page, 0, mt);
  21186. + list_add(&page->lru, dst);
  21187. } while (--to_free && --batch_free && !list_empty(list));
  21188. }
  21189. - spin_unlock(&zone->lock);
  21190. }
  21191. static void free_one_page(struct zone *zone,
  21192. @@ -723,7 +759,9 @@
  21193. int migratetype)
  21194. {
  21195. unsigned long nr_scanned;
  21196. - spin_lock(&zone->lock);
  21197. + unsigned long flags;
  21198. +
  21199. + spin_lock_irqsave(&zone->lock, flags);
  21200. nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
  21201. if (nr_scanned)
  21202. __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
  21203. @@ -733,7 +771,7 @@
  21204. migratetype = get_pfnblock_migratetype(page, pfn);
  21205. }
  21206. __free_one_page(page, pfn, zone, order, migratetype);
  21207. - spin_unlock(&zone->lock);
  21208. + spin_unlock_irqrestore(&zone->lock, flags);
  21209. }
  21210. static bool free_pages_prepare(struct page *page, unsigned int order)
  21211. @@ -773,11 +811,11 @@
  21212. return;
  21213. migratetype = get_pfnblock_migratetype(page, pfn);
  21214. - local_irq_save(flags);
  21215. + local_lock_irqsave(pa_lock, flags);
  21216. __count_vm_events(PGFREE, 1 << order);
  21217. set_freepage_migratetype(page, migratetype);
  21218. free_one_page(page_zone(page), page, pfn, order, migratetype);
  21219. - local_irq_restore(flags);
  21220. + local_unlock_irqrestore(pa_lock, flags);
  21221. }
  21222. void __init __free_pages_bootmem(struct page *page, unsigned int order)
  21223. @@ -1251,16 +1289,18 @@
  21224. void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
  21225. {
  21226. unsigned long flags;
  21227. + LIST_HEAD(dst);
  21228. int to_drain, batch;
  21229. - local_irq_save(flags);
  21230. + local_lock_irqsave(pa_lock, flags);
  21231. batch = ACCESS_ONCE(pcp->batch);
  21232. to_drain = min(pcp->count, batch);
  21233. if (to_drain > 0) {
  21234. - free_pcppages_bulk(zone, to_drain, pcp);
  21235. + isolate_pcp_pages(to_drain, pcp, &dst);
  21236. pcp->count -= to_drain;
  21237. }
  21238. - local_irq_restore(flags);
  21239. + local_unlock_irqrestore(pa_lock, flags);
  21240. + free_pcppages_bulk(zone, to_drain, &dst);
  21241. }
  21242. #endif
  21243. @@ -1279,16 +1319,21 @@
  21244. for_each_populated_zone(zone) {
  21245. struct per_cpu_pageset *pset;
  21246. struct per_cpu_pages *pcp;
  21247. + LIST_HEAD(dst);
  21248. + int count;
  21249. - local_irq_save(flags);
  21250. + cpu_lock_irqsave(cpu, flags);
  21251. pset = per_cpu_ptr(zone->pageset, cpu);
  21252. pcp = &pset->pcp;
  21253. - if (pcp->count) {
  21254. - free_pcppages_bulk(zone, pcp->count, pcp);
  21255. + count = pcp->count;
  21256. + if (count) {
  21257. + isolate_pcp_pages(count, pcp, &dst);
  21258. pcp->count = 0;
  21259. }
  21260. - local_irq_restore(flags);
  21261. + cpu_unlock_irqrestore(cpu, flags);
  21262. + if (count)
  21263. + free_pcppages_bulk(zone, count, &dst);
  21264. }
  21265. }
  21266. @@ -1341,7 +1386,12 @@
  21267. else
  21268. cpumask_clear_cpu(cpu, &cpus_with_pcps);
  21269. }
  21270. +#ifndef CONFIG_PREEMPT_RT_BASE
  21271. on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
  21272. +#else
  21273. + for_each_cpu(cpu, &cpus_with_pcps)
  21274. + drain_pages(cpu);
  21275. +#endif
  21276. }
  21277. #ifdef CONFIG_HIBERNATION
  21278. @@ -1397,7 +1447,7 @@
  21279. migratetype = get_pfnblock_migratetype(page, pfn);
  21280. set_freepage_migratetype(page, migratetype);
  21281. - local_irq_save(flags);
  21282. + local_lock_irqsave(pa_lock, flags);
  21283. __count_vm_event(PGFREE);
  21284. /*
  21285. @@ -1423,12 +1473,17 @@
  21286. pcp->count++;
  21287. if (pcp->count >= pcp->high) {
  21288. unsigned long batch = ACCESS_ONCE(pcp->batch);
  21289. - free_pcppages_bulk(zone, batch, pcp);
  21290. + LIST_HEAD(dst);
  21291. +
  21292. + isolate_pcp_pages(batch, pcp, &dst);
  21293. pcp->count -= batch;
  21294. + local_unlock_irqrestore(pa_lock, flags);
  21295. + free_pcppages_bulk(zone, batch, &dst);
  21296. + return;
  21297. }
  21298. out:
  21299. - local_irq_restore(flags);
  21300. + local_unlock_irqrestore(pa_lock, flags);
  21301. }
  21302. /*
  21303. @@ -1558,7 +1613,7 @@
  21304. struct per_cpu_pages *pcp;
  21305. struct list_head *list;
  21306. - local_irq_save(flags);
  21307. + local_lock_irqsave(pa_lock, flags);
  21308. pcp = &this_cpu_ptr(zone->pageset)->pcp;
  21309. list = &pcp->lists[migratetype];
  21310. if (list_empty(list)) {
  21311. @@ -1590,13 +1645,15 @@
  21312. */
  21313. WARN_ON_ONCE(order > 1);
  21314. }
  21315. - spin_lock_irqsave(&zone->lock, flags);
  21316. + local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
  21317. page = __rmqueue(zone, order, migratetype);
  21318. - spin_unlock(&zone->lock);
  21319. - if (!page)
  21320. + if (!page) {
  21321. + spin_unlock(&zone->lock);
  21322. goto failed;
  21323. + }
  21324. __mod_zone_freepage_state(zone, -(1 << order),
  21325. get_freepage_migratetype(page));
  21326. + spin_unlock(&zone->lock);
  21327. }
  21328. __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
  21329. @@ -1606,7 +1663,7 @@
  21330. __count_zone_vm_events(PGALLOC, zone, 1 << order);
  21331. zone_statistics(preferred_zone, zone, gfp_flags);
  21332. - local_irq_restore(flags);
  21333. + local_unlock_irqrestore(pa_lock, flags);
  21334. VM_BUG_ON_PAGE(bad_range(zone, page), page);
  21335. if (prep_new_page(page, order, gfp_flags))
  21336. @@ -1614,7 +1671,7 @@
  21337. return page;
  21338. failed:
  21339. - local_irq_restore(flags);
  21340. + local_unlock_irqrestore(pa_lock, flags);
  21341. return NULL;
  21342. }
  21343. @@ -2325,8 +2382,8 @@
  21344. count_vm_event(COMPACTSTALL);
  21345. /* Page migration frees to the PCP lists but we want merging */
  21346. - drain_pages(get_cpu());
  21347. - put_cpu();
  21348. + drain_pages(get_cpu_light());
  21349. + put_cpu_light();
  21350. page = get_page_from_freelist(gfp_mask, nodemask,
  21351. order, zonelist, high_zoneidx,
  21352. @@ -5565,6 +5622,7 @@
  21353. void __init page_alloc_init(void)
  21354. {
  21355. hotcpu_notifier(page_alloc_cpu_notify, 0);
  21356. + local_irq_lock_init(pa_lock);
  21357. }
  21358. /*
  21359. @@ -6459,7 +6517,7 @@
  21360. struct per_cpu_pageset *pset;
  21361. /* avoid races with drain_pages() */
  21362. - local_irq_save(flags);
  21363. + local_lock_irqsave(pa_lock, flags);
  21364. if (zone->pageset != &boot_pageset) {
  21365. for_each_online_cpu(cpu) {
  21366. pset = per_cpu_ptr(zone->pageset, cpu);
  21367. @@ -6468,7 +6526,7 @@
  21368. free_percpu(zone->pageset);
  21369. zone->pageset = &boot_pageset;
  21370. }
  21371. - local_irq_restore(flags);
  21372. + local_unlock_irqrestore(pa_lock, flags);
  21373. }
  21374. #ifdef CONFIG_MEMORY_HOTREMOVE
  21375. diff -Nur linux-3.18.10.orig/mm/slab.h linux-3.18.10/mm/slab.h
  21376. --- linux-3.18.10.orig/mm/slab.h 2015-03-24 02:05:12.000000000 +0100
  21377. +++ linux-3.18.10/mm/slab.h 2015-03-26 12:42:18.691588355 +0100
  21378. @@ -315,7 +315,11 @@
  21379. * The slab lists for all objects.
  21380. */
  21381. struct kmem_cache_node {
  21382. +#ifdef CONFIG_SLUB
  21383. + raw_spinlock_t list_lock;
  21384. +#else
  21385. spinlock_t list_lock;
  21386. +#endif
  21387. #ifdef CONFIG_SLAB
  21388. struct list_head slabs_partial; /* partial list first, better asm code */
  21389. diff -Nur linux-3.18.10.orig/mm/slub.c linux-3.18.10/mm/slub.c
  21390. --- linux-3.18.10.orig/mm/slub.c 2015-03-24 02:05:12.000000000 +0100
  21391. +++ linux-3.18.10/mm/slub.c 2015-03-26 12:42:18.691588355 +0100
  21392. @@ -1044,7 +1044,7 @@
  21393. {
  21394. struct kmem_cache_node *n = get_node(s, page_to_nid(page));
  21395. - spin_lock_irqsave(&n->list_lock, *flags);
  21396. + raw_spin_lock_irqsave(&n->list_lock, *flags);
  21397. slab_lock(page);
  21398. if (!check_slab(s, page))
  21399. @@ -1091,7 +1091,7 @@
  21400. fail:
  21401. slab_unlock(page);
  21402. - spin_unlock_irqrestore(&n->list_lock, *flags);
  21403. + raw_spin_unlock_irqrestore(&n->list_lock, *flags);
  21404. slab_fix(s, "Object at 0x%p not freed", object);
  21405. return NULL;
  21406. }
  21407. @@ -1219,6 +1219,12 @@
  21408. #endif /* CONFIG_SLUB_DEBUG */
  21409. +struct slub_free_list {
  21410. + raw_spinlock_t lock;
  21411. + struct list_head list;
  21412. +};
  21413. +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
  21414. +
  21415. /*
  21416. * Hooks for other subsystems that check memory allocations. In a typical
  21417. * production configuration these hooks all should produce no code at all.
  21418. @@ -1303,10 +1309,15 @@
  21419. struct page *page;
  21420. struct kmem_cache_order_objects oo = s->oo;
  21421. gfp_t alloc_gfp;
  21422. + bool enableirqs;
  21423. flags &= gfp_allowed_mask;
  21424. - if (flags & __GFP_WAIT)
  21425. + enableirqs = (flags & __GFP_WAIT) != 0;
  21426. +#ifdef CONFIG_PREEMPT_RT_FULL
  21427. + enableirqs |= system_state == SYSTEM_RUNNING;
  21428. +#endif
  21429. + if (enableirqs)
  21430. local_irq_enable();
  21431. flags |= s->allocflags;
  21432. @@ -1347,7 +1358,7 @@
  21433. kmemcheck_mark_unallocated_pages(page, pages);
  21434. }
  21435. - if (flags & __GFP_WAIT)
  21436. + if (enableirqs)
  21437. local_irq_disable();
  21438. if (!page)
  21439. return NULL;
  21440. @@ -1365,8 +1376,10 @@
  21441. void *object)
  21442. {
  21443. setup_object_debug(s, page, object);
  21444. +#ifndef CONFIG_PREEMPT_RT_FULL
  21445. if (unlikely(s->ctor))
  21446. s->ctor(object);
  21447. +#endif
  21448. }
  21449. static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
  21450. @@ -1442,6 +1455,16 @@
  21451. memcg_uncharge_slab(s, order);
  21452. }
  21453. +static void free_delayed(struct list_head *h)
  21454. +{
  21455. + while(!list_empty(h)) {
  21456. + struct page *page = list_first_entry(h, struct page, lru);
  21457. +
  21458. + list_del(&page->lru);
  21459. + __free_slab(page->slab_cache, page);
  21460. + }
  21461. +}
  21462. +
  21463. #define need_reserve_slab_rcu \
  21464. (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
  21465. @@ -1476,6 +1499,12 @@
  21466. }
  21467. call_rcu(head, rcu_free_slab);
  21468. + } else if (irqs_disabled()) {
  21469. + struct slub_free_list *f = &__get_cpu_var(slub_free_list);
  21470. +
  21471. + raw_spin_lock(&f->lock);
  21472. + list_add(&page->lru, &f->list);
  21473. + raw_spin_unlock(&f->lock);
  21474. } else
  21475. __free_slab(s, page);
  21476. }
  21477. @@ -1589,7 +1618,7 @@
  21478. if (!n || !n->nr_partial)
  21479. return NULL;
  21480. - spin_lock(&n->list_lock);
  21481. + raw_spin_lock(&n->list_lock);
  21482. list_for_each_entry_safe(page, page2, &n->partial, lru) {
  21483. void *t;
  21484. @@ -1614,7 +1643,7 @@
  21485. break;
  21486. }
  21487. - spin_unlock(&n->list_lock);
  21488. + raw_spin_unlock(&n->list_lock);
  21489. return object;
  21490. }
  21491. @@ -1860,7 +1889,7 @@
  21492. * that acquire_slab() will see a slab page that
  21493. * is frozen
  21494. */
  21495. - spin_lock(&n->list_lock);
  21496. + raw_spin_lock(&n->list_lock);
  21497. }
  21498. } else {
  21499. m = M_FULL;
  21500. @@ -1871,7 +1900,7 @@
  21501. * slabs from diagnostic functions will not see
  21502. * any frozen slabs.
  21503. */
  21504. - spin_lock(&n->list_lock);
  21505. + raw_spin_lock(&n->list_lock);
  21506. }
  21507. }
  21508. @@ -1906,7 +1935,7 @@
  21509. goto redo;
  21510. if (lock)
  21511. - spin_unlock(&n->list_lock);
  21512. + raw_spin_unlock(&n->list_lock);
  21513. if (m == M_FREE) {
  21514. stat(s, DEACTIVATE_EMPTY);
  21515. @@ -1938,10 +1967,10 @@
  21516. n2 = get_node(s, page_to_nid(page));
  21517. if (n != n2) {
  21518. if (n)
  21519. - spin_unlock(&n->list_lock);
  21520. + raw_spin_unlock(&n->list_lock);
  21521. n = n2;
  21522. - spin_lock(&n->list_lock);
  21523. + raw_spin_lock(&n->list_lock);
  21524. }
  21525. do {
  21526. @@ -1970,7 +1999,7 @@
  21527. }
  21528. if (n)
  21529. - spin_unlock(&n->list_lock);
  21530. + raw_spin_unlock(&n->list_lock);
  21531. while (discard_page) {
  21532. page = discard_page;
  21533. @@ -2008,14 +2037,21 @@
  21534. pobjects = oldpage->pobjects;
  21535. pages = oldpage->pages;
  21536. if (drain && pobjects > s->cpu_partial) {
  21537. + struct slub_free_list *f;
  21538. unsigned long flags;
  21539. + LIST_HEAD(tofree);
  21540. /*
  21541. * partial array is full. Move the existing
  21542. * set to the per node partial list.
  21543. */
  21544. local_irq_save(flags);
  21545. unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
  21546. + f = &__get_cpu_var(slub_free_list);
  21547. + raw_spin_lock(&f->lock);
  21548. + list_splice_init(&f->list, &tofree);
  21549. + raw_spin_unlock(&f->lock);
  21550. local_irq_restore(flags);
  21551. + free_delayed(&tofree);
  21552. oldpage = NULL;
  21553. pobjects = 0;
  21554. pages = 0;
  21555. @@ -2079,7 +2115,22 @@
  21556. static void flush_all(struct kmem_cache *s)
  21557. {
  21558. + LIST_HEAD(tofree);
  21559. + int cpu;
  21560. +
  21561. on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
  21562. + for_each_online_cpu(cpu) {
  21563. + struct slub_free_list *f;
  21564. +
  21565. + if (!has_cpu_slab(cpu, s))
  21566. + continue;
  21567. +
  21568. + f = &per_cpu(slub_free_list, cpu);
  21569. + raw_spin_lock_irq(&f->lock);
  21570. + list_splice_init(&f->list, &tofree);
  21571. + raw_spin_unlock_irq(&f->lock);
  21572. + free_delayed(&tofree);
  21573. + }
  21574. }
  21575. /*
  21576. @@ -2115,10 +2166,10 @@
  21577. unsigned long x = 0;
  21578. struct page *page;
  21579. - spin_lock_irqsave(&n->list_lock, flags);
  21580. + raw_spin_lock_irqsave(&n->list_lock, flags);
  21581. list_for_each_entry(page, &n->partial, lru)
  21582. x += get_count(page);
  21583. - spin_unlock_irqrestore(&n->list_lock, flags);
  21584. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  21585. return x;
  21586. }
  21587. #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
  21588. @@ -2255,9 +2306,11 @@
  21589. static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  21590. unsigned long addr, struct kmem_cache_cpu *c)
  21591. {
  21592. + struct slub_free_list *f;
  21593. void *freelist;
  21594. struct page *page;
  21595. unsigned long flags;
  21596. + LIST_HEAD(tofree);
  21597. local_irq_save(flags);
  21598. #ifdef CONFIG_PREEMPT
  21599. @@ -2325,7 +2378,13 @@
  21600. VM_BUG_ON(!c->page->frozen);
  21601. c->freelist = get_freepointer(s, freelist);
  21602. c->tid = next_tid(c->tid);
  21603. +out:
  21604. + f = &__get_cpu_var(slub_free_list);
  21605. + raw_spin_lock(&f->lock);
  21606. + list_splice_init(&f->list, &tofree);
  21607. + raw_spin_unlock(&f->lock);
  21608. local_irq_restore(flags);
  21609. + free_delayed(&tofree);
  21610. return freelist;
  21611. new_slab:
  21612. @@ -2342,8 +2401,7 @@
  21613. if (unlikely(!freelist)) {
  21614. slab_out_of_memory(s, gfpflags, node);
  21615. - local_irq_restore(flags);
  21616. - return NULL;
  21617. + goto out;
  21618. }
  21619. page = c->page;
  21620. @@ -2358,8 +2416,7 @@
  21621. deactivate_slab(s, page, get_freepointer(s, freelist));
  21622. c->page = NULL;
  21623. c->freelist = NULL;
  21624. - local_irq_restore(flags);
  21625. - return freelist;
  21626. + goto out;
  21627. }
  21628. /*
  21629. @@ -2444,6 +2501,10 @@
  21630. if (unlikely(gfpflags & __GFP_ZERO) && object)
  21631. memset(object, 0, s->object_size);
  21632. +#ifdef CONFIG_PREEMPT_RT_FULL
  21633. + if (unlikely(s->ctor) && object)
  21634. + s->ctor(object);
  21635. +#endif
  21636. slab_post_alloc_hook(s, gfpflags, object);
  21637. @@ -2531,7 +2592,7 @@
  21638. do {
  21639. if (unlikely(n)) {
  21640. - spin_unlock_irqrestore(&n->list_lock, flags);
  21641. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  21642. n = NULL;
  21643. }
  21644. prior = page->freelist;
  21645. @@ -2563,7 +2624,7 @@
  21646. * Otherwise the list_lock will synchronize with
  21647. * other processors updating the list of slabs.
  21648. */
  21649. - spin_lock_irqsave(&n->list_lock, flags);
  21650. + raw_spin_lock_irqsave(&n->list_lock, flags);
  21651. }
  21652. }
  21653. @@ -2605,7 +2666,7 @@
  21654. add_partial(n, page, DEACTIVATE_TO_TAIL);
  21655. stat(s, FREE_ADD_PARTIAL);
  21656. }
  21657. - spin_unlock_irqrestore(&n->list_lock, flags);
  21658. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  21659. return;
  21660. slab_empty:
  21661. @@ -2620,7 +2681,7 @@
  21662. remove_full(s, n, page);
  21663. }
  21664. - spin_unlock_irqrestore(&n->list_lock, flags);
  21665. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  21666. stat(s, FREE_SLAB);
  21667. discard_slab(s, page);
  21668. }
  21669. @@ -2816,7 +2877,7 @@
  21670. init_kmem_cache_node(struct kmem_cache_node *n)
  21671. {
  21672. n->nr_partial = 0;
  21673. - spin_lock_init(&n->list_lock);
  21674. + raw_spin_lock_init(&n->list_lock);
  21675. INIT_LIST_HEAD(&n->partial);
  21676. #ifdef CONFIG_SLUB_DEBUG
  21677. atomic_long_set(&n->nr_slabs, 0);
  21678. @@ -3373,7 +3434,7 @@
  21679. for (i = 0; i < objects; i++)
  21680. INIT_LIST_HEAD(slabs_by_inuse + i);
  21681. - spin_lock_irqsave(&n->list_lock, flags);
  21682. + raw_spin_lock_irqsave(&n->list_lock, flags);
  21683. /*
  21684. * Build lists indexed by the items in use in each slab.
  21685. @@ -3394,7 +3455,7 @@
  21686. for (i = objects - 1; i > 0; i--)
  21687. list_splice(slabs_by_inuse + i, n->partial.prev);
  21688. - spin_unlock_irqrestore(&n->list_lock, flags);
  21689. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  21690. /* Release empty slabs */
  21691. list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
  21692. @@ -3567,6 +3628,12 @@
  21693. {
  21694. static __initdata struct kmem_cache boot_kmem_cache,
  21695. boot_kmem_cache_node;
  21696. + int cpu;
  21697. +
  21698. + for_each_possible_cpu(cpu) {
  21699. + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
  21700. + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
  21701. + }
  21702. if (debug_guardpage_minorder())
  21703. slub_max_order = 0;
  21704. @@ -3815,7 +3882,7 @@
  21705. struct page *page;
  21706. unsigned long flags;
  21707. - spin_lock_irqsave(&n->list_lock, flags);
  21708. + raw_spin_lock_irqsave(&n->list_lock, flags);
  21709. list_for_each_entry(page, &n->partial, lru) {
  21710. validate_slab_slab(s, page, map);
  21711. @@ -3837,7 +3904,7 @@
  21712. s->name, count, atomic_long_read(&n->nr_slabs));
  21713. out:
  21714. - spin_unlock_irqrestore(&n->list_lock, flags);
  21715. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  21716. return count;
  21717. }
  21718. @@ -4025,12 +4092,12 @@
  21719. if (!atomic_long_read(&n->nr_slabs))
  21720. continue;
  21721. - spin_lock_irqsave(&n->list_lock, flags);
  21722. + raw_spin_lock_irqsave(&n->list_lock, flags);
  21723. list_for_each_entry(page, &n->partial, lru)
  21724. process_slab(&t, s, page, alloc, map);
  21725. list_for_each_entry(page, &n->full, lru)
  21726. process_slab(&t, s, page, alloc, map);
  21727. - spin_unlock_irqrestore(&n->list_lock, flags);
  21728. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  21729. }
  21730. for (i = 0; i < t.count; i++) {
  21731. diff -Nur linux-3.18.10.orig/mm/swap.c linux-3.18.10/mm/swap.c
  21732. --- linux-3.18.10.orig/mm/swap.c 2015-03-24 02:05:12.000000000 +0100
  21733. +++ linux-3.18.10/mm/swap.c 2015-03-26 12:42:18.691588355 +0100
  21734. @@ -31,6 +31,7 @@
  21735. #include <linux/memcontrol.h>
  21736. #include <linux/gfp.h>
  21737. #include <linux/uio.h>
  21738. +#include <linux/locallock.h>
  21739. #include "internal.h"
  21740. @@ -44,6 +45,9 @@
  21741. static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
  21742. static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
  21743. +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
  21744. +static DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
  21745. +
  21746. /*
  21747. * This path almost never happens for VM activity - pages are normally
  21748. * freed via pagevecs. But it gets used by networking.
  21749. @@ -473,11 +477,11 @@
  21750. unsigned long flags;
  21751. page_cache_get(page);
  21752. - local_irq_save(flags);
  21753. + local_lock_irqsave(rotate_lock, flags);
  21754. pvec = this_cpu_ptr(&lru_rotate_pvecs);
  21755. if (!pagevec_add(pvec, page))
  21756. pagevec_move_tail(pvec);
  21757. - local_irq_restore(flags);
  21758. + local_unlock_irqrestore(rotate_lock, flags);
  21759. }
  21760. }
  21761. @@ -528,12 +532,13 @@
  21762. void activate_page(struct page *page)
  21763. {
  21764. if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  21765. - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
  21766. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  21767. + activate_page_pvecs);
  21768. page_cache_get(page);
  21769. if (!pagevec_add(pvec, page))
  21770. pagevec_lru_move_fn(pvec, __activate_page, NULL);
  21771. - put_cpu_var(activate_page_pvecs);
  21772. + put_locked_var(swapvec_lock, activate_page_pvecs);
  21773. }
  21774. }
  21775. @@ -559,7 +564,7 @@
  21776. static void __lru_cache_activate_page(struct page *page)
  21777. {
  21778. - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  21779. + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
  21780. int i;
  21781. /*
  21782. @@ -581,7 +586,7 @@
  21783. }
  21784. }
  21785. - put_cpu_var(lru_add_pvec);
  21786. + put_locked_var(swapvec_lock, lru_add_pvec);
  21787. }
  21788. /*
  21789. @@ -620,13 +625,13 @@
  21790. static void __lru_cache_add(struct page *page)
  21791. {
  21792. - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  21793. + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
  21794. page_cache_get(page);
  21795. if (!pagevec_space(pvec))
  21796. __pagevec_lru_add(pvec);
  21797. pagevec_add(pvec, page);
  21798. - put_cpu_var(lru_add_pvec);
  21799. + put_locked_var(swapvec_lock, lru_add_pvec);
  21800. }
  21801. /**
  21802. @@ -806,9 +811,9 @@
  21803. unsigned long flags;
  21804. /* No harm done if a racing interrupt already did this */
  21805. - local_irq_save(flags);
  21806. + local_lock_irqsave(rotate_lock, flags);
  21807. pagevec_move_tail(pvec);
  21808. - local_irq_restore(flags);
  21809. + local_unlock_irqrestore(rotate_lock, flags);
  21810. }
  21811. pvec = &per_cpu(lru_deactivate_pvecs, cpu);
  21812. @@ -836,18 +841,19 @@
  21813. return;
  21814. if (likely(get_page_unless_zero(page))) {
  21815. - struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
  21816. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  21817. + lru_deactivate_pvecs);
  21818. if (!pagevec_add(pvec, page))
  21819. pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
  21820. - put_cpu_var(lru_deactivate_pvecs);
  21821. + put_locked_var(swapvec_lock, lru_deactivate_pvecs);
  21822. }
  21823. }
  21824. void lru_add_drain(void)
  21825. {
  21826. - lru_add_drain_cpu(get_cpu());
  21827. - put_cpu();
  21828. + lru_add_drain_cpu(local_lock_cpu(swapvec_lock));
  21829. + local_unlock_cpu(swapvec_lock);
  21830. }
  21831. static void lru_add_drain_per_cpu(struct work_struct *dummy)
  21832. diff -Nur linux-3.18.10.orig/mm/truncate.c linux-3.18.10/mm/truncate.c
  21833. --- linux-3.18.10.orig/mm/truncate.c 2015-03-24 02:05:12.000000000 +0100
  21834. +++ linux-3.18.10/mm/truncate.c 2015-03-26 12:42:18.691588355 +0100
  21835. @@ -56,8 +56,11 @@
  21836. * protected by mapping->tree_lock.
  21837. */
  21838. if (!workingset_node_shadows(node) &&
  21839. - !list_empty(&node->private_list))
  21840. - list_lru_del(&workingset_shadow_nodes, &node->private_list);
  21841. + !list_empty(&node->private_list)) {
  21842. + local_lock(workingset_shadow_lock);
  21843. + list_lru_del(&__workingset_shadow_nodes, &node->private_list);
  21844. + local_unlock(workingset_shadow_lock);
  21845. + }
  21846. __radix_tree_delete_node(&mapping->page_tree, node);
  21847. unlock:
  21848. spin_unlock_irq(&mapping->tree_lock);
  21849. diff -Nur linux-3.18.10.orig/mm/vmalloc.c linux-3.18.10/mm/vmalloc.c
  21850. --- linux-3.18.10.orig/mm/vmalloc.c 2015-03-24 02:05:12.000000000 +0100
  21851. +++ linux-3.18.10/mm/vmalloc.c 2015-03-26 12:42:18.691588355 +0100
  21852. @@ -798,7 +798,7 @@
  21853. struct vmap_block *vb;
  21854. struct vmap_area *va;
  21855. unsigned long vb_idx;
  21856. - int node, err;
  21857. + int node, err, cpu;
  21858. node = numa_node_id();
  21859. @@ -836,11 +836,12 @@
  21860. BUG_ON(err);
  21861. radix_tree_preload_end();
  21862. - vbq = &get_cpu_var(vmap_block_queue);
  21863. + cpu = get_cpu_light();
  21864. + vbq = &__get_cpu_var(vmap_block_queue);
  21865. spin_lock(&vbq->lock);
  21866. list_add_rcu(&vb->free_list, &vbq->free);
  21867. spin_unlock(&vbq->lock);
  21868. - put_cpu_var(vmap_block_queue);
  21869. + put_cpu_light();
  21870. return vb;
  21871. }
  21872. @@ -908,6 +909,7 @@
  21873. struct vmap_block *vb;
  21874. unsigned long addr = 0;
  21875. unsigned int order;
  21876. + int cpu = 0;
  21877. BUG_ON(size & ~PAGE_MASK);
  21878. BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
  21879. @@ -923,7 +925,8 @@
  21880. again:
  21881. rcu_read_lock();
  21882. - vbq = &get_cpu_var(vmap_block_queue);
  21883. + cpu = get_cpu_light();
  21884. + vbq = &__get_cpu_var(vmap_block_queue);
  21885. list_for_each_entry_rcu(vb, &vbq->free, free_list) {
  21886. int i;
  21887. @@ -947,7 +950,7 @@
  21888. spin_unlock(&vb->lock);
  21889. }
  21890. - put_cpu_var(vmap_block_queue);
  21891. + put_cpu_light();
  21892. rcu_read_unlock();
  21893. if (!addr) {
  21894. diff -Nur linux-3.18.10.orig/mm/vmstat.c linux-3.18.10/mm/vmstat.c
  21895. --- linux-3.18.10.orig/mm/vmstat.c 2015-03-24 02:05:12.000000000 +0100
  21896. +++ linux-3.18.10/mm/vmstat.c 2015-03-26 12:42:18.691588355 +0100
  21897. @@ -221,6 +221,7 @@
  21898. long x;
  21899. long t;
  21900. + preempt_disable_rt();
  21901. x = delta + __this_cpu_read(*p);
  21902. t = __this_cpu_read(pcp->stat_threshold);
  21903. @@ -230,6 +231,7 @@
  21904. x = 0;
  21905. }
  21906. __this_cpu_write(*p, x);
  21907. + preempt_enable_rt();
  21908. }
  21909. EXPORT_SYMBOL(__mod_zone_page_state);
  21910. @@ -262,6 +264,7 @@
  21911. s8 __percpu *p = pcp->vm_stat_diff + item;
  21912. s8 v, t;
  21913. + preempt_disable_rt();
  21914. v = __this_cpu_inc_return(*p);
  21915. t = __this_cpu_read(pcp->stat_threshold);
  21916. if (unlikely(v > t)) {
  21917. @@ -270,6 +273,7 @@
  21918. zone_page_state_add(v + overstep, zone, item);
  21919. __this_cpu_write(*p, -overstep);
  21920. }
  21921. + preempt_enable_rt();
  21922. }
  21923. void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
  21924. @@ -284,6 +288,7 @@
  21925. s8 __percpu *p = pcp->vm_stat_diff + item;
  21926. s8 v, t;
  21927. + preempt_disable_rt();
  21928. v = __this_cpu_dec_return(*p);
  21929. t = __this_cpu_read(pcp->stat_threshold);
  21930. if (unlikely(v < - t)) {
  21931. @@ -292,6 +297,7 @@
  21932. zone_page_state_add(v - overstep, zone, item);
  21933. __this_cpu_write(*p, overstep);
  21934. }
  21935. + preempt_enable_rt();
  21936. }
  21937. void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
  21938. diff -Nur linux-3.18.10.orig/mm/workingset.c linux-3.18.10/mm/workingset.c
  21939. --- linux-3.18.10.orig/mm/workingset.c 2015-03-24 02:05:12.000000000 +0100
  21940. +++ linux-3.18.10/mm/workingset.c 2015-03-26 12:42:18.691588355 +0100
  21941. @@ -264,7 +264,8 @@
  21942. * point where they would still be useful.
  21943. */
  21944. -struct list_lru workingset_shadow_nodes;
  21945. +struct list_lru __workingset_shadow_nodes;
  21946. +DEFINE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
  21947. static unsigned long count_shadow_nodes(struct shrinker *shrinker,
  21948. struct shrink_control *sc)
  21949. @@ -274,9 +275,9 @@
  21950. unsigned long pages;
  21951. /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
  21952. - local_irq_disable();
  21953. - shadow_nodes = list_lru_count_node(&workingset_shadow_nodes, sc->nid);
  21954. - local_irq_enable();
  21955. + local_lock_irq(workingset_shadow_lock);
  21956. + shadow_nodes = list_lru_count_node(&__workingset_shadow_nodes, sc->nid);
  21957. + local_unlock_irq(workingset_shadow_lock);
  21958. pages = node_present_pages(sc->nid);
  21959. /*
  21960. @@ -362,9 +363,9 @@
  21961. spin_unlock(&mapping->tree_lock);
  21962. ret = LRU_REMOVED_RETRY;
  21963. out:
  21964. - local_irq_enable();
  21965. + local_unlock_irq(workingset_shadow_lock);
  21966. cond_resched();
  21967. - local_irq_disable();
  21968. + local_lock_irq(workingset_shadow_lock);
  21969. spin_lock(lru_lock);
  21970. return ret;
  21971. }
  21972. @@ -375,10 +376,10 @@
  21973. unsigned long ret;
  21974. /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
  21975. - local_irq_disable();
  21976. - ret = list_lru_walk_node(&workingset_shadow_nodes, sc->nid,
  21977. + local_lock_irq(workingset_shadow_lock);
  21978. + ret = list_lru_walk_node(&__workingset_shadow_nodes, sc->nid,
  21979. shadow_lru_isolate, NULL, &sc->nr_to_scan);
  21980. - local_irq_enable();
  21981. + local_unlock_irq(workingset_shadow_lock);
  21982. return ret;
  21983. }
  21984. @@ -399,7 +400,7 @@
  21985. {
  21986. int ret;
  21987. - ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);
  21988. + ret = list_lru_init_key(&__workingset_shadow_nodes, &shadow_nodes_key);
  21989. if (ret)
  21990. goto err;
  21991. ret = register_shrinker(&workingset_shadow_shrinker);
  21992. @@ -407,7 +408,7 @@
  21993. goto err_list_lru;
  21994. return 0;
  21995. err_list_lru:
  21996. - list_lru_destroy(&workingset_shadow_nodes);
  21997. + list_lru_destroy(&__workingset_shadow_nodes);
  21998. err:
  21999. return ret;
  22000. }
  22001. diff -Nur linux-3.18.10.orig/net/core/dev.c linux-3.18.10/net/core/dev.c
  22002. --- linux-3.18.10.orig/net/core/dev.c 2015-03-24 02:05:12.000000000 +0100
  22003. +++ linux-3.18.10/net/core/dev.c 2015-03-26 12:42:18.691588355 +0100
  22004. @@ -182,6 +182,7 @@
  22005. static DEFINE_HASHTABLE(napi_hash, 8);
  22006. static seqcount_t devnet_rename_seq;
  22007. +static DEFINE_MUTEX(devnet_rename_mutex);
  22008. static inline void dev_base_seq_inc(struct net *net)
  22009. {
  22010. @@ -203,14 +204,14 @@
  22011. static inline void rps_lock(struct softnet_data *sd)
  22012. {
  22013. #ifdef CONFIG_RPS
  22014. - spin_lock(&sd->input_pkt_queue.lock);
  22015. + raw_spin_lock(&sd->input_pkt_queue.raw_lock);
  22016. #endif
  22017. }
  22018. static inline void rps_unlock(struct softnet_data *sd)
  22019. {
  22020. #ifdef CONFIG_RPS
  22021. - spin_unlock(&sd->input_pkt_queue.lock);
  22022. + raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
  22023. #endif
  22024. }
  22025. @@ -832,7 +833,8 @@
  22026. strcpy(name, dev->name);
  22027. rcu_read_unlock();
  22028. if (read_seqcount_retry(&devnet_rename_seq, seq)) {
  22029. - cond_resched();
  22030. + mutex_lock(&devnet_rename_mutex);
  22031. + mutex_unlock(&devnet_rename_mutex);
  22032. goto retry;
  22033. }
  22034. @@ -1101,20 +1103,17 @@
  22035. if (dev->flags & IFF_UP)
  22036. return -EBUSY;
  22037. - write_seqcount_begin(&devnet_rename_seq);
  22038. + mutex_lock(&devnet_rename_mutex);
  22039. + __raw_write_seqcount_begin(&devnet_rename_seq);
  22040. - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
  22041. - write_seqcount_end(&devnet_rename_seq);
  22042. - return 0;
  22043. - }
  22044. + if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
  22045. + goto outunlock;
  22046. memcpy(oldname, dev->name, IFNAMSIZ);
  22047. err = dev_get_valid_name(net, dev, newname);
  22048. - if (err < 0) {
  22049. - write_seqcount_end(&devnet_rename_seq);
  22050. - return err;
  22051. - }
  22052. + if (err < 0)
  22053. + goto outunlock;
  22054. if (oldname[0] && !strchr(oldname, '%'))
  22055. netdev_info(dev, "renamed from %s\n", oldname);
  22056. @@ -1127,11 +1126,12 @@
  22057. if (ret) {
  22058. memcpy(dev->name, oldname, IFNAMSIZ);
  22059. dev->name_assign_type = old_assign_type;
  22060. - write_seqcount_end(&devnet_rename_seq);
  22061. - return ret;
  22062. + err = ret;
  22063. + goto outunlock;
  22064. }
  22065. - write_seqcount_end(&devnet_rename_seq);
  22066. + __raw_write_seqcount_end(&devnet_rename_seq);
  22067. + mutex_unlock(&devnet_rename_mutex);
  22068. netdev_adjacent_rename_links(dev, oldname);
  22069. @@ -1152,7 +1152,8 @@
  22070. /* err >= 0 after dev_alloc_name() or stores the first errno */
  22071. if (err >= 0) {
  22072. err = ret;
  22073. - write_seqcount_begin(&devnet_rename_seq);
  22074. + mutex_lock(&devnet_rename_mutex);
  22075. + __raw_write_seqcount_begin(&devnet_rename_seq);
  22076. memcpy(dev->name, oldname, IFNAMSIZ);
  22077. memcpy(oldname, newname, IFNAMSIZ);
  22078. dev->name_assign_type = old_assign_type;
  22079. @@ -1165,6 +1166,11 @@
  22080. }
  22081. return err;
  22082. +
  22083. +outunlock:
  22084. + __raw_write_seqcount_end(&devnet_rename_seq);
  22085. + mutex_unlock(&devnet_rename_mutex);
  22086. + return err;
  22087. }
  22088. /**
  22089. @@ -2160,6 +2166,7 @@
  22090. sd->output_queue_tailp = &q->next_sched;
  22091. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  22092. local_irq_restore(flags);
  22093. + preempt_check_resched_rt();
  22094. }
  22095. void __netif_schedule(struct Qdisc *q)
  22096. @@ -2241,6 +2248,7 @@
  22097. __this_cpu_write(softnet_data.completion_queue, skb);
  22098. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  22099. local_irq_restore(flags);
  22100. + preempt_check_resched_rt();
  22101. }
  22102. EXPORT_SYMBOL(__dev_kfree_skb_irq);
  22103. @@ -3336,6 +3344,7 @@
  22104. rps_unlock(sd);
  22105. local_irq_restore(flags);
  22106. + preempt_check_resched_rt();
  22107. atomic_long_inc(&skb->dev->rx_dropped);
  22108. kfree_skb(skb);
  22109. @@ -3354,7 +3363,7 @@
  22110. struct rps_dev_flow voidflow, *rflow = &voidflow;
  22111. int cpu;
  22112. - preempt_disable();
  22113. + migrate_disable();
  22114. rcu_read_lock();
  22115. cpu = get_rps_cpu(skb->dev, skb, &rflow);
  22116. @@ -3364,13 +3373,13 @@
  22117. ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
  22118. rcu_read_unlock();
  22119. - preempt_enable();
  22120. + migrate_enable();
  22121. } else
  22122. #endif
  22123. {
  22124. unsigned int qtail;
  22125. - ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
  22126. - put_cpu();
  22127. + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
  22128. + put_cpu_light();
  22129. }
  22130. return ret;
  22131. }
  22132. @@ -3404,16 +3413,44 @@
  22133. trace_netif_rx_ni_entry(skb);
  22134. - preempt_disable();
  22135. + local_bh_disable();
  22136. err = netif_rx_internal(skb);
  22137. - if (local_softirq_pending())
  22138. - do_softirq();
  22139. - preempt_enable();
  22140. + local_bh_enable();
  22141. return err;
  22142. }
  22143. EXPORT_SYMBOL(netif_rx_ni);
  22144. +#ifdef CONFIG_PREEMPT_RT_FULL
  22145. +/*
  22146. + * RT runs ksoftirqd as a real time thread and the root_lock is a
  22147. + * "sleeping spinlock". If the trylock fails then we can go into an
  22148. + * infinite loop when ksoftirqd preempted the task which actually
  22149. + * holds the lock, because we requeue q and raise NET_TX softirq
  22150. + * causing ksoftirqd to loop forever.
  22151. + *
  22152. + * It's safe to use spin_lock on RT here as softirqs run in thread
  22153. + * context and cannot deadlock against the thread which is holding
  22154. + * root_lock.
  22155. + *
  22156. + * On !RT the trylock might fail, but there we bail out from the
  22157. + * softirq loop after 10 attempts which we can't do on RT. And the
  22158. + * task holding root_lock cannot be preempted, so the only downside of
  22159. + * that trylock is that we need 10 loops to decide that we should have
  22160. + * given up in the first one :)
  22161. + */
  22162. +static inline int take_root_lock(spinlock_t *lock)
  22163. +{
  22164. + spin_lock(lock);
  22165. + return 1;
  22166. +}
  22167. +#else
  22168. +static inline int take_root_lock(spinlock_t *lock)
  22169. +{
  22170. + return spin_trylock(lock);
  22171. +}
  22172. +#endif
  22173. +
  22174. static void net_tx_action(struct softirq_action *h)
  22175. {
  22176. struct softnet_data *sd = this_cpu_ptr(&softnet_data);
  22177. @@ -3455,7 +3492,7 @@
  22178. head = head->next_sched;
  22179. root_lock = qdisc_lock(q);
  22180. - if (spin_trylock(root_lock)) {
  22181. + if (take_root_lock(root_lock)) {
  22182. smp_mb__before_atomic();
  22183. clear_bit(__QDISC_STATE_SCHED,
  22184. &q->state);
  22185. @@ -3848,7 +3885,7 @@
  22186. skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
  22187. if (skb->dev == dev) {
  22188. __skb_unlink(skb, &sd->input_pkt_queue);
  22189. - kfree_skb(skb);
  22190. + __skb_queue_tail(&sd->tofree_queue, skb);
  22191. input_queue_head_incr(sd);
  22192. }
  22193. }
  22194. @@ -3857,10 +3894,13 @@
  22195. skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
  22196. if (skb->dev == dev) {
  22197. __skb_unlink(skb, &sd->process_queue);
  22198. - kfree_skb(skb);
  22199. + __skb_queue_tail(&sd->tofree_queue, skb);
  22200. input_queue_head_incr(sd);
  22201. }
  22202. }
  22203. +
  22204. + if (!skb_queue_empty(&sd->tofree_queue))
  22205. + raise_softirq_irqoff(NET_RX_SOFTIRQ);
  22206. }
  22207. static int napi_gro_complete(struct sk_buff *skb)
  22208. @@ -4323,6 +4363,7 @@
  22209. } else
  22210. #endif
  22211. local_irq_enable();
  22212. + preempt_check_resched_rt();
  22213. }
  22214. static int process_backlog(struct napi_struct *napi, int quota)
  22215. @@ -4394,6 +4435,7 @@
  22216. local_irq_save(flags);
  22217. ____napi_schedule(this_cpu_ptr(&softnet_data), n);
  22218. local_irq_restore(flags);
  22219. + preempt_check_resched_rt();
  22220. }
  22221. EXPORT_SYMBOL(__napi_schedule);
  22222. @@ -4516,10 +4558,17 @@
  22223. struct softnet_data *sd = this_cpu_ptr(&softnet_data);
  22224. unsigned long time_limit = jiffies + 2;
  22225. int budget = netdev_budget;
  22226. + struct sk_buff *skb;
  22227. void *have;
  22228. local_irq_disable();
  22229. + while ((skb = __skb_dequeue(&sd->tofree_queue))) {
  22230. + local_irq_enable();
  22231. + kfree_skb(skb);
  22232. + local_irq_disable();
  22233. + }
  22234. +
  22235. while (!list_empty(&sd->poll_list)) {
  22236. struct napi_struct *n;
  22237. int work, weight;
  22238. @@ -7008,6 +7057,7 @@
  22239. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  22240. local_irq_enable();
  22241. + preempt_check_resched_rt();
  22242. /* Process offline CPU's input_pkt_queue */
  22243. while ((skb = __skb_dequeue(&oldsd->process_queue))) {
  22244. @@ -7018,6 +7068,9 @@
  22245. netif_rx_internal(skb);
  22246. input_queue_head_incr(oldsd);
  22247. }
  22248. + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
  22249. + kfree_skb(skb);
  22250. + }
  22251. return NOTIFY_OK;
  22252. }
  22253. @@ -7319,8 +7372,9 @@
  22254. for_each_possible_cpu(i) {
  22255. struct softnet_data *sd = &per_cpu(softnet_data, i);
  22256. - skb_queue_head_init(&sd->input_pkt_queue);
  22257. - skb_queue_head_init(&sd->process_queue);
  22258. + skb_queue_head_init_raw(&sd->input_pkt_queue);
  22259. + skb_queue_head_init_raw(&sd->process_queue);
  22260. + skb_queue_head_init_raw(&sd->tofree_queue);
  22261. INIT_LIST_HEAD(&sd->poll_list);
  22262. sd->output_queue_tailp = &sd->output_queue;
  22263. #ifdef CONFIG_RPS
  22264. diff -Nur linux-3.18.10.orig/net/core/skbuff.c linux-3.18.10/net/core/skbuff.c
  22265. --- linux-3.18.10.orig/net/core/skbuff.c 2015-03-24 02:05:12.000000000 +0100
  22266. +++ linux-3.18.10/net/core/skbuff.c 2015-03-26 12:42:18.691588355 +0100
  22267. @@ -63,6 +63,7 @@
  22268. #include <linux/errqueue.h>
  22269. #include <linux/prefetch.h>
  22270. #include <linux/if_vlan.h>
  22271. +#include <linux/locallock.h>
  22272. #include <net/protocol.h>
  22273. #include <net/dst.h>
  22274. @@ -336,6 +337,7 @@
  22275. unsigned int pagecnt_bias;
  22276. };
  22277. static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
  22278. +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock);
  22279. static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  22280. {
  22281. @@ -344,7 +346,7 @@
  22282. int order;
  22283. unsigned long flags;
  22284. - local_irq_save(flags);
  22285. + local_lock_irqsave(netdev_alloc_lock, flags);
  22286. nc = this_cpu_ptr(&netdev_alloc_cache);
  22287. if (unlikely(!nc->frag.page)) {
  22288. refill:
  22289. @@ -389,7 +391,7 @@
  22290. nc->frag.offset += fragsz;
  22291. nc->pagecnt_bias--;
  22292. end:
  22293. - local_irq_restore(flags);
  22294. + local_unlock_irqrestore(netdev_alloc_lock, flags);
  22295. return data;
  22296. }
  22297. diff -Nur linux-3.18.10.orig/net/core/sock.c linux-3.18.10/net/core/sock.c
  22298. --- linux-3.18.10.orig/net/core/sock.c 2015-03-24 02:05:12.000000000 +0100
  22299. +++ linux-3.18.10/net/core/sock.c 2015-03-26 12:42:18.691588355 +0100
  22300. @@ -2326,12 +2326,11 @@
  22301. if (sk->sk_lock.owned)
  22302. __lock_sock(sk);
  22303. sk->sk_lock.owned = 1;
  22304. - spin_unlock(&sk->sk_lock.slock);
  22305. + spin_unlock_bh(&sk->sk_lock.slock);
  22306. /*
  22307. * The sk_lock has mutex_lock() semantics here:
  22308. */
  22309. mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
  22310. - local_bh_enable();
  22311. }
  22312. EXPORT_SYMBOL(lock_sock_nested);
  22313. diff -Nur linux-3.18.10.orig/net/ipv4/icmp.c linux-3.18.10/net/ipv4/icmp.c
  22314. --- linux-3.18.10.orig/net/ipv4/icmp.c 2015-03-24 02:05:12.000000000 +0100
  22315. +++ linux-3.18.10/net/ipv4/icmp.c 2015-03-26 12:42:18.691588355 +0100
  22316. @@ -69,6 +69,7 @@
  22317. #include <linux/jiffies.h>
  22318. #include <linux/kernel.h>
  22319. #include <linux/fcntl.h>
  22320. +#include <linux/sysrq.h>
  22321. #include <linux/socket.h>
  22322. #include <linux/in.h>
  22323. #include <linux/inet.h>
  22324. @@ -864,6 +865,30 @@
  22325. }
  22326. /*
  22327. + * 32bit and 64bit have different timestamp length, so we check for
  22328. + * the cookie at offset 20 and verify it is repeated at offset 50
  22329. + */
  22330. +#define CO_POS0 20
  22331. +#define CO_POS1 50
  22332. +#define CO_SIZE sizeof(int)
  22333. +#define ICMP_SYSRQ_SIZE 57
  22334. +
  22335. +/*
  22336. + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
  22337. + * pattern and if it matches send the next byte as a trigger to sysrq.
  22338. + */
  22339. +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb)
  22340. +{
  22341. + int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
  22342. + char *p = skb->data;
  22343. +
  22344. + if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
  22345. + !memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
  22346. + p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
  22347. + handle_sysrq(p[CO_POS0 + CO_SIZE]);
  22348. +}
  22349. +
  22350. +/*
  22351. * Handle ICMP_ECHO ("ping") requests.
  22352. *
  22353. * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
  22354. @@ -890,6 +915,11 @@
  22355. icmp_param.data_len = skb->len;
  22356. icmp_param.head_len = sizeof(struct icmphdr);
  22357. icmp_reply(&icmp_param, skb);
  22358. +
  22359. + if (skb->len == ICMP_SYSRQ_SIZE &&
  22360. + net->ipv4.sysctl_icmp_echo_sysrq) {
  22361. + icmp_check_sysrq(net, skb);
  22362. + }
  22363. }
  22364. }
  22365. diff -Nur linux-3.18.10.orig/net/ipv4/sysctl_net_ipv4.c linux-3.18.10/net/ipv4/sysctl_net_ipv4.c
  22366. --- linux-3.18.10.orig/net/ipv4/sysctl_net_ipv4.c 2015-03-24 02:05:12.000000000 +0100
  22367. +++ linux-3.18.10/net/ipv4/sysctl_net_ipv4.c 2015-03-26 12:42:18.691588355 +0100
  22368. @@ -779,6 +779,13 @@
  22369. .proc_handler = proc_dointvec
  22370. },
  22371. {
  22372. + .procname = "icmp_echo_sysrq",
  22373. + .data = &init_net.ipv4.sysctl_icmp_echo_sysrq,
  22374. + .maxlen = sizeof(int),
  22375. + .mode = 0644,
  22376. + .proc_handler = proc_dointvec
  22377. + },
  22378. + {
  22379. .procname = "icmp_ignore_bogus_error_responses",
  22380. .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
  22381. .maxlen = sizeof(int),
  22382. diff -Nur linux-3.18.10.orig/net/mac80211/rx.c linux-3.18.10/net/mac80211/rx.c
  22383. --- linux-3.18.10.orig/net/mac80211/rx.c 2015-03-24 02:05:12.000000000 +0100
  22384. +++ linux-3.18.10/net/mac80211/rx.c 2015-03-26 12:42:18.695588359 +0100
  22385. @@ -3356,7 +3356,7 @@
  22386. struct ieee80211_supported_band *sband;
  22387. struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
  22388. - WARN_ON_ONCE(softirq_count() == 0);
  22389. + WARN_ON_ONCE_NONRT(softirq_count() == 0);
  22390. if (WARN_ON(status->band >= IEEE80211_NUM_BANDS))
  22391. goto drop;
  22392. diff -Nur linux-3.18.10.orig/net/netfilter/core.c linux-3.18.10/net/netfilter/core.c
  22393. --- linux-3.18.10.orig/net/netfilter/core.c 2015-03-24 02:05:12.000000000 +0100
  22394. +++ linux-3.18.10/net/netfilter/core.c 2015-03-26 12:42:18.695588359 +0100
  22395. @@ -21,11 +21,17 @@
  22396. #include <linux/proc_fs.h>
  22397. #include <linux/mutex.h>
  22398. #include <linux/slab.h>
  22399. +#include <linux/locallock.h>
  22400. #include <net/net_namespace.h>
  22401. #include <net/sock.h>
  22402. #include "nf_internals.h"
  22403. +#ifdef CONFIG_PREEMPT_RT_BASE
  22404. +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
  22405. +EXPORT_PER_CPU_SYMBOL(xt_write_lock);
  22406. +#endif
  22407. +
  22408. static DEFINE_MUTEX(afinfo_mutex);
  22409. const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
  22410. diff -Nur linux-3.18.10.orig/net/packet/af_packet.c linux-3.18.10/net/packet/af_packet.c
  22411. --- linux-3.18.10.orig/net/packet/af_packet.c 2015-03-24 02:05:12.000000000 +0100
  22412. +++ linux-3.18.10/net/packet/af_packet.c 2015-03-26 12:42:18.695588359 +0100
  22413. @@ -63,6 +63,7 @@
  22414. #include <linux/if_packet.h>
  22415. #include <linux/wireless.h>
  22416. #include <linux/kernel.h>
  22417. +#include <linux/delay.h>
  22418. #include <linux/kmod.h>
  22419. #include <linux/slab.h>
  22420. #include <linux/vmalloc.h>
  22421. @@ -692,7 +693,7 @@
  22422. if (BLOCK_NUM_PKTS(pbd)) {
  22423. while (atomic_read(&pkc->blk_fill_in_prog)) {
  22424. /* Waiting for skb_copy_bits to finish... */
  22425. - cpu_relax();
  22426. + cpu_chill();
  22427. }
  22428. }
  22429. @@ -943,7 +944,7 @@
  22430. if (!(status & TP_STATUS_BLK_TMO)) {
  22431. while (atomic_read(&pkc->blk_fill_in_prog)) {
  22432. /* Waiting for skb_copy_bits to finish... */
  22433. - cpu_relax();
  22434. + cpu_chill();
  22435. }
  22436. }
  22437. prb_close_block(pkc, pbd, po, status);
  22438. diff -Nur linux-3.18.10.orig/net/rds/ib_rdma.c linux-3.18.10/net/rds/ib_rdma.c
  22439. --- linux-3.18.10.orig/net/rds/ib_rdma.c 2015-03-24 02:05:12.000000000 +0100
  22440. +++ linux-3.18.10/net/rds/ib_rdma.c 2015-03-26 12:42:18.695588359 +0100
  22441. @@ -34,6 +34,7 @@
  22442. #include <linux/slab.h>
  22443. #include <linux/rculist.h>
  22444. #include <linux/llist.h>
  22445. +#include <linux/delay.h>
  22446. #include "rds.h"
  22447. #include "ib.h"
  22448. @@ -286,7 +287,7 @@
  22449. for_each_online_cpu(cpu) {
  22450. flag = &per_cpu(clean_list_grace, cpu);
  22451. while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
  22452. - cpu_relax();
  22453. + cpu_chill();
  22454. }
  22455. }
  22456. diff -Nur linux-3.18.10.orig/net/sched/sch_generic.c linux-3.18.10/net/sched/sch_generic.c
  22457. --- linux-3.18.10.orig/net/sched/sch_generic.c 2015-03-24 02:05:12.000000000 +0100
  22458. +++ linux-3.18.10/net/sched/sch_generic.c 2015-03-26 12:42:18.695588359 +0100
  22459. @@ -894,7 +894,7 @@
  22460. /* Wait for outstanding qdisc_run calls. */
  22461. list_for_each_entry(dev, head, close_list)
  22462. while (some_qdisc_is_busy(dev))
  22463. - yield();
  22464. + msleep(1);
  22465. }
  22466. void dev_deactivate(struct net_device *dev)
  22467. diff -Nur linux-3.18.10.orig/net/sunrpc/svc_xprt.c linux-3.18.10/net/sunrpc/svc_xprt.c
  22468. --- linux-3.18.10.orig/net/sunrpc/svc_xprt.c 2015-03-24 02:05:12.000000000 +0100
  22469. +++ linux-3.18.10/net/sunrpc/svc_xprt.c 2015-03-26 12:42:18.695588359 +0100
  22470. @@ -357,7 +357,7 @@
  22471. return;
  22472. }
  22473. - cpu = get_cpu();
  22474. + cpu = get_cpu_light();
  22475. pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
  22476. spin_lock_bh(&pool->sp_lock);
  22477. @@ -390,7 +390,7 @@
  22478. }
  22479. spin_unlock_bh(&pool->sp_lock);
  22480. - put_cpu();
  22481. + put_cpu_light();
  22482. }
  22483. /*
  22484. diff -Nur linux-3.18.10.orig/scripts/mkcompile_h linux-3.18.10/scripts/mkcompile_h
  22485. --- linux-3.18.10.orig/scripts/mkcompile_h 2015-03-24 02:05:12.000000000 +0100
  22486. +++ linux-3.18.10/scripts/mkcompile_h 2015-03-26 12:42:18.695588359 +0100
  22487. @@ -4,7 +4,8 @@
  22488. ARCH=$2
  22489. SMP=$3
  22490. PREEMPT=$4
  22491. -CC=$5
  22492. +RT=$5
  22493. +CC=$6
  22494. vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
  22495. @@ -57,6 +58,7 @@
  22496. CONFIG_FLAGS=""
  22497. if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
  22498. if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
  22499. +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi
  22500. UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
  22501. # Truncate to maximum length
  22502. diff -Nur linux-3.18.10.orig/sound/core/pcm_native.c linux-3.18.10/sound/core/pcm_native.c
  22503. --- linux-3.18.10.orig/sound/core/pcm_native.c 2015-03-24 02:05:12.000000000 +0100
  22504. +++ linux-3.18.10/sound/core/pcm_native.c 2015-03-26 12:42:18.695588359 +0100
  22505. @@ -104,7 +104,7 @@
  22506. void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream)
  22507. {
  22508. if (!substream->pcm->nonatomic)
  22509. - local_irq_disable();
  22510. + local_irq_disable_nort();
  22511. snd_pcm_stream_lock(substream);
  22512. }
  22513. EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq);
  22514. @@ -113,7 +113,7 @@
  22515. {
  22516. snd_pcm_stream_unlock(substream);
  22517. if (!substream->pcm->nonatomic)
  22518. - local_irq_enable();
  22519. + local_irq_enable_nort();
  22520. }
  22521. EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq);
  22522. @@ -121,7 +121,7 @@
  22523. {
  22524. unsigned long flags = 0;
  22525. if (!substream->pcm->nonatomic)
  22526. - local_irq_save(flags);
  22527. + local_irq_save_nort(flags);
  22528. snd_pcm_stream_lock(substream);
  22529. return flags;
  22530. }
  22531. @@ -132,7 +132,7 @@
  22532. {
  22533. snd_pcm_stream_unlock(substream);
  22534. if (!substream->pcm->nonatomic)
  22535. - local_irq_restore(flags);
  22536. + local_irq_restore_nort(flags);
  22537. }
  22538. EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore);