patch-realtime 732 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678156791568015681156821568315684156851568615687156881568915690156911569215693156941569515696156971569815699157001570115702157031570415705157061570715708157091571015711157121571315714157151571615717157181571915720157211572215723157241572515726157271572815729157301573115732157331573415735157361573715738157391574015741157421574315744157451574615747157481574915750157511575215753157541575515756157571575815759157601576115762157631576415765157661576715768157691577015771157721577315774157751577615777157781577915780157811578215783157841578515786157871578815789157901579115792157931579415795157961579715798157991580015801158021580315804158051580615807158081580915810158111581215813158141581515816158171581815819158201582115822158231582415825158261582715828158291583015831158321583315834158351583615837158381583915840158411584215843158441584515846158471584815849158501585115852158531585415855158561585715858158591586015861158621586315864158651586615867158681586915870158711587215873158741587515876158771587815879158801588115882158831588415885158861588715888158891589015891158921589315894158951589615897158981589915900159011590215903159041590515906159071590815909159101591115912159131591415915159161591715918159191592015921159221592315924159251592615927159281592915930159311593215933159341593515936159371593815939159401594115942159431594415945159461594715948159491595015951159521595315954159551595615957159581595915960159611596215963159641596515966159671596815969159701597115972159731597415975159761597715978159791598015981159821598315984159851598615987159881598915990159911599215993159941599515996159971599815999160001600116002160031600416005160061600716008160091601016011160121601316014160151601616017160181601916020160211602216023160241602516026160271602816029160301603116032160331603416035160361603716038160391604016041160421604316044160451604616047160481604916050160511605216053160541605516056160571605816059160601606116062160631606416065160661606716068160691607016071160721607316074160751607616077160781607916080160811608216083160841608516086160871608816089160901609116092160931609416095160961609716098160991610016101161021610316104161051610616107161081610916110161111611216113161141611516116161171611816119161201612116122161231612416125161261612716128161291613016131161321613316134161351613616137161381613916140161411614216143161441614516146161471614816149161501615116152161531615416155161561615716158161591616016161161621616316164161651616616167161681616916170161711617216173161741617516176161771617816179161801618116182161831618416185161861618716188161891619016191161921619316194161951619616197161981619916200162011620216203162041620516206162071620816209162101621116212162131621416215162161621716218162191622016221162221622316224162251622616227162281622916230162311623216233162341623516236162371623816239162401624116242162431624416245162461624716248162491625016251162521625316254162551625616257162581625916260162611626216263162641626516266162671626816269162701627116272162731627416275162761627716278162791628016281162821628316284162851628616287162881628916290162911629216293162941629516296162971629816299163001630116302163031630416305163061630716308163091631016311163121631316314163151631616317163181631916320163211632216323163241632516326163271632816329163301633116332163331633416335163361633716338163391634016341163421634316344163451634616347163481634916350163511635216353163541635516356163571635816359163601636116362163631636416365163661636716368163691637016371163721637316374163751637616377163781637916380163811638216383163841638516386163871638816389163901639116392163931639416395163961639716398163991640016401164021640316404164051640616407164081640916410164111641216413164141641516416164171641816419164201642116422164231642416425164261642716428164291643016431164321643316434164351643616437164381643916440164411644216443164441644516446164471644816449164501645116452164531645416455164561645716458164591646016461164621646316464164651646616467164681646916470164711647216473164741647516476164771647816479164801648116482164831648416485164861648716488164891649016491164921649316494164951649616497164981649916500165011650216503165041650516506165071650816509165101651116512165131651416515165161651716518165191652016521165221652316524165251652616527165281652916530165311653216533165341653516536165371653816539165401654116542165431654416545165461654716548165491655016551165521655316554165551655616557165581655916560165611656216563165641656516566165671656816569165701657116572165731657416575165761657716578165791658016581165821658316584165851658616587165881658916590165911659216593165941659516596165971659816599166001660116602166031660416605166061660716608166091661016611166121661316614166151661616617166181661916620166211662216623166241662516626166271662816629166301663116632166331663416635166361663716638166391664016641166421664316644166451664616647166481664916650166511665216653166541665516656166571665816659166601666116662166631666416665166661666716668166691667016671166721667316674166751667616677166781667916680166811668216683166841668516686166871668816689166901669116692166931669416695166961669716698166991670016701167021670316704167051670616707167081670916710167111671216713167141671516716167171671816719167201672116722167231672416725167261672716728167291673016731167321673316734167351673616737167381673916740167411674216743167441674516746167471674816749167501675116752167531675416755167561675716758167591676016761167621676316764167651676616767167681676916770167711677216773167741677516776167771677816779167801678116782167831678416785167861678716788167891679016791167921679316794167951679616797167981679916800168011680216803168041680516806168071680816809168101681116812168131681416815168161681716818168191682016821168221682316824168251682616827168281682916830168311683216833168341683516836168371683816839168401684116842168431684416845168461684716848168491685016851168521685316854168551685616857168581685916860168611686216863168641686516866168671686816869168701687116872168731687416875168761687716878168791688016881168821688316884168851688616887168881688916890168911689216893168941689516896168971689816899169001690116902169031690416905169061690716908169091691016911169121691316914169151691616917169181691916920169211692216923169241692516926169271692816929169301693116932169331693416935169361693716938169391694016941169421694316944169451694616947169481694916950169511695216953169541695516956169571695816959169601696116962169631696416965169661696716968169691697016971169721697316974169751697616977169781697916980169811698216983169841698516986169871698816989169901699116992169931699416995169961699716998169991700017001170021700317004170051700617007170081700917010170111701217013170141701517016170171701817019170201702117022170231702417025170261702717028170291703017031170321703317034170351703617037170381703917040170411704217043170441704517046170471704817049170501705117052170531705417055170561705717058170591706017061170621706317064170651706617067170681706917070170711707217073170741707517076170771707817079170801708117082170831708417085170861708717088170891709017091170921709317094170951709617097170981709917100171011710217103171041710517106171071710817109171101711117112171131711417115171161711717118171191712017121171221712317124171251712617127171281712917130171311713217133171341713517136171371713817139171401714117142171431714417145171461714717148171491715017151171521715317154171551715617157171581715917160171611716217163171641716517166171671716817169171701717117172171731717417175171761717717178171791718017181171821718317184171851718617187171881718917190171911719217193171941719517196171971719817199172001720117202172031720417205172061720717208172091721017211172121721317214172151721617217172181721917220172211722217223172241722517226172271722817229172301723117232172331723417235172361723717238172391724017241172421724317244172451724617247172481724917250172511725217253172541725517256172571725817259172601726117262172631726417265172661726717268172691727017271172721727317274172751727617277172781727917280172811728217283172841728517286172871728817289172901729117292172931729417295172961729717298172991730017301173021730317304173051730617307173081730917310173111731217313173141731517316173171731817319173201732117322173231732417325173261732717328173291733017331173321733317334173351733617337173381733917340173411734217343173441734517346173471734817349173501735117352173531735417355173561735717358173591736017361173621736317364173651736617367173681736917370173711737217373173741737517376173771737817379173801738117382173831738417385173861738717388173891739017391173921739317394173951739617397173981739917400174011740217403174041740517406174071740817409174101741117412174131741417415174161741717418174191742017421174221742317424174251742617427174281742917430174311743217433174341743517436174371743817439174401744117442174431744417445174461744717448174491745017451174521745317454174551745617457174581745917460174611746217463174641746517466174671746817469174701747117472174731747417475174761747717478174791748017481174821748317484174851748617487174881748917490174911749217493174941749517496174971749817499175001750117502175031750417505175061750717508175091751017511175121751317514175151751617517175181751917520175211752217523175241752517526175271752817529175301753117532175331753417535175361753717538175391754017541175421754317544175451754617547175481754917550175511755217553175541755517556175571755817559175601756117562175631756417565175661756717568175691757017571175721757317574175751757617577175781757917580175811758217583175841758517586175871758817589175901759117592175931759417595175961759717598175991760017601176021760317604176051760617607176081760917610176111761217613176141761517616176171761817619176201762117622176231762417625176261762717628176291763017631176321763317634176351763617637176381763917640176411764217643176441764517646176471764817649176501765117652176531765417655176561765717658176591766017661176621766317664176651766617667176681766917670176711767217673176741767517676176771767817679176801768117682176831768417685176861768717688176891769017691176921769317694176951769617697176981769917700177011770217703177041770517706177071770817709177101771117712177131771417715177161771717718177191772017721177221772317724177251772617727177281772917730177311773217733177341773517736177371773817739177401774117742177431774417745177461774717748177491775017751177521775317754177551775617757177581775917760177611776217763177641776517766177671776817769177701777117772177731777417775177761777717778177791778017781177821778317784177851778617787177881778917790177911779217793177941779517796177971779817799178001780117802178031780417805178061780717808178091781017811178121781317814178151781617817178181781917820178211782217823178241782517826178271782817829178301783117832178331783417835178361783717838178391784017841178421784317844178451784617847178481784917850178511785217853178541785517856178571785817859178601786117862178631786417865178661786717868178691787017871178721787317874178751787617877178781787917880178811788217883178841788517886178871788817889178901789117892178931789417895178961789717898178991790017901179021790317904179051790617907179081790917910179111791217913179141791517916179171791817919179201792117922179231792417925179261792717928179291793017931179321793317934179351793617937179381793917940179411794217943179441794517946179471794817949179501795117952179531795417955179561795717958179591796017961179621796317964179651796617967179681796917970179711797217973179741797517976179771797817979179801798117982179831798417985179861798717988179891799017991179921799317994179951799617997179981799918000180011800218003180041800518006180071800818009180101801118012180131801418015180161801718018180191802018021180221802318024180251802618027180281802918030180311803218033180341803518036180371803818039180401804118042180431804418045180461804718048180491805018051180521805318054180551805618057180581805918060180611806218063180641806518066180671806818069180701807118072180731807418075180761807718078180791808018081180821808318084180851808618087180881808918090180911809218093180941809518096180971809818099181001810118102181031810418105181061810718108181091811018111181121811318114181151811618117181181811918120181211812218123181241812518126181271812818129181301813118132181331813418135181361813718138181391814018141181421814318144181451814618147181481814918150181511815218153181541815518156181571815818159181601816118162181631816418165181661816718168181691817018171181721817318174181751817618177181781817918180181811818218183181841818518186181871818818189181901819118192181931819418195181961819718198181991820018201182021820318204182051820618207182081820918210182111821218213182141821518216182171821818219182201822118222182231822418225182261822718228182291823018231182321823318234182351823618237182381823918240182411824218243182441824518246182471824818249182501825118252182531825418255182561825718258182591826018261182621826318264182651826618267182681826918270182711827218273182741827518276182771827818279182801828118282182831828418285182861828718288182891829018291182921829318294182951829618297182981829918300183011830218303183041830518306183071830818309183101831118312183131831418315183161831718318183191832018321183221832318324183251832618327183281832918330183311833218333183341833518336183371833818339183401834118342183431834418345183461834718348183491835018351183521835318354183551835618357183581835918360183611836218363183641836518366183671836818369183701837118372183731837418375183761837718378183791838018381183821838318384183851838618387183881838918390183911839218393183941839518396183971839818399184001840118402184031840418405184061840718408184091841018411184121841318414184151841618417184181841918420184211842218423184241842518426184271842818429184301843118432184331843418435184361843718438184391844018441184421844318444184451844618447184481844918450184511845218453184541845518456184571845818459184601846118462184631846418465184661846718468184691847018471184721847318474184751847618477184781847918480184811848218483184841848518486184871848818489184901849118492184931849418495184961849718498184991850018501185021850318504185051850618507185081850918510185111851218513185141851518516185171851818519185201852118522185231852418525185261852718528185291853018531185321853318534185351853618537185381853918540185411854218543185441854518546185471854818549185501855118552185531855418555185561855718558185591856018561185621856318564185651856618567185681856918570185711857218573185741857518576185771857818579185801858118582185831858418585185861858718588185891859018591185921859318594185951859618597185981859918600186011860218603186041860518606186071860818609186101861118612186131861418615186161861718618186191862018621186221862318624186251862618627186281862918630186311863218633186341863518636186371863818639186401864118642186431864418645186461864718648186491865018651186521865318654186551865618657186581865918660186611866218663186641866518666186671866818669186701867118672186731867418675186761867718678186791868018681186821868318684186851868618687186881868918690186911869218693186941869518696186971869818699187001870118702187031870418705187061870718708187091871018711187121871318714187151871618717187181871918720187211872218723187241872518726187271872818729187301873118732187331873418735187361873718738187391874018741187421874318744187451874618747187481874918750187511875218753187541875518756187571875818759187601876118762187631876418765187661876718768187691877018771187721877318774187751877618777187781877918780187811878218783187841878518786187871878818789187901879118792187931879418795187961879718798187991880018801188021880318804188051880618807188081880918810188111881218813188141881518816188171881818819188201882118822188231882418825188261882718828188291883018831188321883318834188351883618837188381883918840188411884218843188441884518846188471884818849188501885118852188531885418855188561885718858188591886018861188621886318864188651886618867188681886918870188711887218873188741887518876188771887818879188801888118882188831888418885188861888718888188891889018891188921889318894188951889618897188981889918900189011890218903189041890518906189071890818909189101891118912189131891418915189161891718918189191892018921189221892318924189251892618927189281892918930189311893218933189341893518936189371893818939189401894118942189431894418945189461894718948189491895018951189521895318954189551895618957189581895918960189611896218963189641896518966189671896818969189701897118972189731897418975189761897718978189791898018981189821898318984189851898618987189881898918990189911899218993189941899518996189971899818999190001900119002190031900419005190061900719008190091901019011190121901319014190151901619017190181901919020190211902219023190241902519026190271902819029190301903119032190331903419035190361903719038190391904019041190421904319044190451904619047190481904919050190511905219053190541905519056190571905819059190601906119062190631906419065190661906719068190691907019071190721907319074190751907619077190781907919080190811908219083190841908519086190871908819089190901909119092190931909419095190961909719098190991910019101191021910319104191051910619107191081910919110191111911219113191141911519116191171911819119191201912119122191231912419125191261912719128191291913019131191321913319134191351913619137191381913919140191411914219143191441914519146191471914819149191501915119152191531915419155191561915719158191591916019161191621916319164191651916619167191681916919170191711917219173191741917519176191771917819179191801918119182191831918419185191861918719188191891919019191191921919319194191951919619197191981919919200192011920219203192041920519206192071920819209192101921119212192131921419215192161921719218192191922019221192221922319224192251922619227192281922919230192311923219233192341923519236192371923819239192401924119242192431924419245192461924719248192491925019251192521925319254192551925619257192581925919260192611926219263192641926519266192671926819269192701927119272192731927419275192761927719278192791928019281192821928319284192851928619287192881928919290192911929219293192941929519296192971929819299193001930119302193031930419305193061930719308193091931019311193121931319314193151931619317193181931919320193211932219323193241932519326193271932819329193301933119332193331933419335193361933719338193391934019341193421934319344193451934619347193481934919350193511935219353193541935519356193571935819359193601936119362193631936419365193661936719368193691937019371193721937319374193751937619377193781937919380193811938219383193841938519386193871938819389193901939119392193931939419395193961939719398193991940019401194021940319404194051940619407194081940919410194111941219413194141941519416194171941819419194201942119422194231942419425194261942719428194291943019431194321943319434194351943619437194381943919440194411944219443194441944519446194471944819449194501945119452194531945419455194561945719458194591946019461194621946319464194651946619467194681946919470194711947219473194741947519476194771947819479194801948119482194831948419485194861948719488194891949019491194921949319494194951949619497194981949919500195011950219503195041950519506195071950819509195101951119512195131951419515195161951719518195191952019521195221952319524195251952619527195281952919530195311953219533195341953519536195371953819539195401954119542195431954419545195461954719548195491955019551195521955319554195551955619557195581955919560195611956219563195641956519566195671956819569195701957119572195731957419575195761957719578195791958019581195821958319584195851958619587195881958919590195911959219593195941959519596195971959819599196001960119602196031960419605196061960719608196091961019611196121961319614196151961619617196181961919620196211962219623196241962519626196271962819629196301963119632196331963419635196361963719638196391964019641196421964319644196451964619647196481964919650196511965219653196541965519656196571965819659196601966119662196631966419665196661966719668196691967019671196721967319674196751967619677196781967919680196811968219683196841968519686196871968819689196901969119692196931969419695196961969719698196991970019701197021970319704197051970619707197081970919710197111971219713197141971519716197171971819719197201972119722197231972419725197261972719728197291973019731197321973319734197351973619737197381973919740197411974219743197441974519746197471974819749197501975119752197531975419755197561975719758197591976019761197621976319764197651976619767197681976919770197711977219773197741977519776197771977819779197801978119782197831978419785197861978719788197891979019791197921979319794197951979619797197981979919800198011980219803198041980519806198071980819809198101981119812198131981419815198161981719818198191982019821198221982319824198251982619827198281982919830198311983219833198341983519836198371983819839198401984119842198431984419845198461984719848198491985019851198521985319854198551985619857198581985919860198611986219863198641986519866198671986819869198701987119872198731987419875198761987719878198791988019881198821988319884198851988619887198881988919890198911989219893198941989519896198971989819899199001990119902199031990419905199061990719908199091991019911199121991319914199151991619917199181991919920199211992219923199241992519926199271992819929199301993119932199331993419935199361993719938199391994019941199421994319944199451994619947199481994919950199511995219953199541995519956199571995819959199601996119962199631996419965199661996719968199691997019971199721997319974199751997619977199781997919980199811998219983199841998519986199871998819989199901999119992199931999419995199961999719998199992000020001200022000320004200052000620007200082000920010200112001220013200142001520016200172001820019200202002120022200232002420025200262002720028200292003020031200322003320034200352003620037200382003920040200412004220043200442004520046200472004820049200502005120052200532005420055200562005720058200592006020061200622006320064200652006620067200682006920070200712007220073200742007520076200772007820079200802008120082200832008420085200862008720088200892009020091200922009320094200952009620097200982009920100201012010220103201042010520106201072010820109201102011120112201132011420115201162011720118201192012020121201222012320124201252012620127201282012920130201312013220133201342013520136201372013820139201402014120142201432014420145201462014720148201492015020151201522015320154201552015620157201582015920160201612016220163201642016520166201672016820169201702017120172201732017420175201762017720178201792018020181201822018320184201852018620187201882018920190201912019220193201942019520196201972019820199202002020120202202032020420205202062020720208202092021020211202122021320214202152021620217202182021920220202212022220223202242022520226202272022820229202302023120232202332023420235202362023720238202392024020241202422024320244202452024620247202482024920250202512025220253202542025520256202572025820259202602026120262202632026420265202662026720268202692027020271202722027320274202752027620277202782027920280202812028220283202842028520286202872028820289202902029120292202932029420295202962029720298202992030020301203022030320304203052030620307203082030920310203112031220313203142031520316203172031820319203202032120322203232032420325203262032720328203292033020331203322033320334203352033620337203382033920340203412034220343203442034520346203472034820349203502035120352203532035420355203562035720358203592036020361203622036320364203652036620367203682036920370203712037220373203742037520376203772037820379203802038120382203832038420385203862038720388203892039020391203922039320394203952039620397203982039920400204012040220403204042040520406204072040820409204102041120412204132041420415204162041720418204192042020421204222042320424204252042620427204282042920430204312043220433204342043520436204372043820439204402044120442204432044420445204462044720448204492045020451204522045320454204552045620457204582045920460204612046220463204642046520466204672046820469204702047120472204732047420475204762047720478204792048020481204822048320484204852048620487204882048920490204912049220493204942049520496204972049820499205002050120502205032050420505205062050720508205092051020511205122051320514205152051620517205182051920520205212052220523205242052520526205272052820529205302053120532205332053420535205362053720538205392054020541205422054320544205452054620547205482054920550205512055220553205542055520556205572055820559205602056120562205632056420565205662056720568205692057020571205722057320574205752057620577205782057920580205812058220583205842058520586205872058820589205902059120592205932059420595205962059720598205992060020601206022060320604206052060620607206082060920610206112061220613206142061520616206172061820619206202062120622206232062420625206262062720628206292063020631206322063320634206352063620637206382063920640206412064220643206442064520646206472064820649206502065120652206532065420655206562065720658206592066020661206622066320664206652066620667206682066920670206712067220673206742067520676206772067820679206802068120682206832068420685206862068720688206892069020691206922069320694206952069620697206982069920700207012070220703207042070520706207072070820709207102071120712207132071420715207162071720718207192072020721207222072320724207252072620727207282072920730207312073220733207342073520736207372073820739207402074120742207432074420745207462074720748207492075020751207522075320754207552075620757207582075920760207612076220763207642076520766207672076820769207702077120772207732077420775207762077720778207792078020781207822078320784207852078620787207882078920790207912079220793207942079520796207972079820799208002080120802208032080420805208062080720808208092081020811208122081320814208152081620817208182081920820208212082220823208242082520826208272082820829208302083120832208332083420835208362083720838208392084020841208422084320844208452084620847208482084920850208512085220853208542085520856208572085820859208602086120862208632086420865208662086720868208692087020871208722087320874208752087620877208782087920880208812088220883208842088520886208872088820889208902089120892208932089420895208962089720898208992090020901209022090320904209052090620907209082090920910209112091220913209142091520916209172091820919209202092120922209232092420925209262092720928209292093020931209322093320934209352093620937209382093920940209412094220943209442094520946209472094820949209502095120952209532095420955209562095720958209592096020961209622096320964209652096620967209682096920970209712097220973209742097520976209772097820979209802098120982209832098420985209862098720988209892099020991209922099320994209952099620997209982099921000210012100221003210042100521006210072100821009210102101121012210132101421015210162101721018210192102021021210222102321024210252102621027210282102921030210312103221033210342103521036210372103821039210402104121042210432104421045210462104721048210492105021051210522105321054210552105621057210582105921060210612106221063210642106521066210672106821069210702107121072210732107421075210762107721078210792108021081210822108321084210852108621087210882108921090210912109221093210942109521096210972109821099211002110121102211032110421105211062110721108211092111021111211122111321114211152111621117211182111921120211212112221123211242112521126211272112821129211302113121132211332113421135211362113721138211392114021141211422114321144211452114621147211482114921150211512115221153211542115521156211572115821159211602116121162211632116421165211662116721168211692117021171211722117321174211752117621177211782117921180211812118221183211842118521186211872118821189211902119121192211932119421195211962119721198211992120021201212022120321204212052120621207212082120921210212112121221213212142121521216212172121821219212202122121222212232122421225212262122721228212292123021231212322123321234212352123621237212382123921240212412124221243212442124521246212472124821249212502125121252212532125421255212562125721258212592126021261212622126321264212652126621267212682126921270212712127221273212742127521276212772127821279212802128121282212832128421285212862128721288212892129021291212922129321294212952129621297212982129921300213012130221303213042130521306213072130821309213102131121312213132131421315213162131721318213192132021321213222132321324213252132621327213282132921330213312133221333213342133521336213372133821339213402134121342213432134421345213462134721348213492135021351213522135321354213552135621357213582135921360213612136221363213642136521366213672136821369213702137121372213732137421375213762137721378213792138021381213822138321384213852138621387213882138921390213912139221393213942139521396213972139821399214002140121402214032140421405214062140721408214092141021411214122141321414214152141621417214182141921420214212142221423214242142521426214272142821429214302143121432214332143421435214362143721438214392144021441214422144321444214452144621447214482144921450214512145221453214542145521456214572145821459214602146121462214632146421465214662146721468214692147021471214722147321474214752147621477214782147921480214812148221483214842148521486214872148821489214902149121492214932149421495214962149721498214992150021501215022150321504215052150621507215082150921510215112151221513215142151521516215172151821519215202152121522215232152421525215262152721528215292153021531215322153321534215352153621537215382153921540215412154221543215442154521546215472154821549215502155121552215532155421555215562155721558215592156021561215622156321564215652156621567215682156921570215712157221573215742157521576215772157821579215802158121582215832158421585215862158721588215892159021591215922159321594215952159621597215982159921600216012160221603216042160521606216072160821609216102161121612216132161421615216162161721618216192162021621216222162321624216252162621627216282162921630216312163221633216342163521636216372163821639216402164121642216432164421645216462164721648216492165021651216522165321654216552165621657216582165921660216612166221663216642166521666216672166821669216702167121672216732167421675216762167721678216792168021681216822168321684216852168621687216882168921690216912169221693216942169521696216972169821699217002170121702217032170421705217062170721708217092171021711217122171321714217152171621717217182171921720217212172221723217242172521726217272172821729217302173121732217332173421735217362173721738217392174021741217422174321744217452174621747217482174921750217512175221753217542175521756217572175821759217602176121762217632176421765217662176721768217692177021771217722177321774217752177621777217782177921780217812178221783217842178521786217872178821789217902179121792217932179421795217962179721798217992180021801218022180321804218052180621807218082180921810218112181221813218142181521816218172181821819218202182121822218232182421825218262182721828218292183021831218322183321834218352183621837218382183921840218412184221843218442184521846218472184821849218502185121852218532185421855218562185721858218592186021861218622186321864218652186621867218682186921870218712187221873218742187521876218772187821879218802188121882218832188421885218862188721888218892189021891218922189321894218952189621897218982189921900219012190221903219042190521906219072190821909219102191121912219132191421915219162191721918219192192021921219222192321924219252192621927219282192921930219312193221933219342193521936219372193821939219402194121942219432194421945219462194721948219492195021951219522195321954219552195621957219582195921960219612196221963219642196521966219672196821969219702197121972219732197421975219762197721978219792198021981219822198321984219852198621987219882198921990219912199221993219942199521996219972199821999220002200122002220032200422005220062200722008220092201022011220122201322014220152201622017220182201922020220212202222023220242202522026220272202822029220302203122032220332203422035220362203722038220392204022041220422204322044220452204622047220482204922050220512205222053220542205522056220572205822059220602206122062220632206422065220662206722068220692207022071220722207322074220752207622077220782207922080220812208222083220842208522086220872208822089220902209122092220932209422095220962209722098220992210022101221022210322104221052210622107221082210922110221112211222113221142211522116221172211822119221202212122122221232212422125221262212722128221292213022131221322213322134221352213622137221382213922140221412214222143221442214522146221472214822149221502215122152221532215422155221562215722158221592216022161221622216322164221652216622167221682216922170221712217222173221742217522176221772217822179221802218122182221832218422185221862218722188221892219022191221922219322194221952219622197221982219922200222012220222203222042220522206222072220822209222102221122212222132221422215222162221722218222192222022221222222222322224222252222622227222282222922230222312223222233222342223522236222372223822239222402224122242222432224422245222462224722248222492225022251222522225322254222552225622257222582225922260222612226222263222642226522266222672226822269222702227122272222732227422275222762227722278222792228022281222822228322284222852228622287222882228922290222912229222293222942229522296222972229822299223002230122302223032230422305223062230722308223092231022311223122231322314223152231622317223182231922320223212232222323223242232522326223272232822329223302233122332223332233422335223362233722338223392234022341223422234322344223452234622347223482234922350223512235222353223542235522356223572235822359223602236122362223632236422365223662236722368223692237022371223722237322374223752237622377223782237922380223812238222383223842238522386223872238822389223902239122392223932239422395223962239722398223992240022401224022240322404224052240622407224082240922410224112241222413224142241522416224172241822419224202242122422224232242422425224262242722428224292243022431224322243322434224352243622437224382243922440224412244222443224442244522446224472244822449224502245122452224532245422455224562245722458224592246022461224622246322464224652246622467224682246922470224712247222473224742247522476224772247822479224802248122482224832248422485224862248722488224892249022491224922249322494224952249622497224982249922500225012250222503225042250522506225072250822509225102251122512225132251422515225162251722518225192252022521225222252322524225252252622527225282252922530225312253222533225342253522536225372253822539225402254122542225432254422545225462254722548225492255022551225522255322554225552255622557225582255922560225612256222563225642256522566225672256822569225702257122572225732257422575225762257722578225792258022581225822258322584225852258622587225882258922590225912259222593225942259522596225972259822599226002260122602226032260422605226062260722608226092261022611226122261322614226152261622617226182261922620226212262222623226242262522626226272262822629226302263122632226332263422635226362263722638226392264022641226422264322644226452264622647226482264922650226512265222653226542265522656226572265822659226602266122662226632266422665226662266722668226692267022671226722267322674226752267622677226782267922680226812268222683226842268522686226872268822689226902269122692226932269422695226962269722698226992270022701227022270322704227052270622707227082270922710227112271222713227142271522716227172271822719227202272122722227232272422725227262272722728227292273022731227322273322734227352273622737227382273922740227412274222743227442274522746227472274822749227502275122752227532275422755227562275722758227592276022761227622276322764227652276622767227682276922770227712277222773227742277522776227772277822779227802278122782227832278422785227862278722788227892279022791227922279322794227952279622797227982279922800228012280222803228042280522806228072280822809228102281122812228132281422815228162281722818228192282022821228222282322824228252282622827228282282922830228312283222833228342283522836228372283822839228402284122842228432284422845228462284722848228492285022851228522285322854228552285622857228582285922860228612286222863228642286522866228672286822869228702287122872228732287422875228762287722878228792288022881228822288322884228852288622887228882288922890228912289222893228942289522896228972289822899229002290122902229032290422905229062290722908229092291022911229122291322914229152291622917229182291922920229212292222923229242292522926229272292822929229302293122932229332293422935229362293722938229392294022941229422294322944229452294622947229482294922950229512295222953229542295522956229572295822959229602296122962229632296422965229662296722968229692297022971229722297322974229752297622977229782297922980229812298222983229842298522986229872298822989229902299122992229932299422995229962299722998229992300023001230022300323004230052300623007230082300923010230112301223013230142301523016230172301823019230202302123022230232302423025230262302723028230292303023031230322303323034230352303623037230382303923040230412304223043230442304523046230472304823049230502305123052230532305423055230562305723058230592306023061230622306323064230652306623067230682306923070230712307223073230742307523076230772307823079230802308123082230832308423085230862308723088230892309023091230922309323094230952309623097230982309923100231012310223103231042310523106231072310823109231102311123112231132311423115231162311723118231192312023121231222312323124231252312623127231282312923130231312313223133231342313523136231372313823139231402314123142231432314423145231462314723148231492315023151231522315323154231552315623157231582315923160231612316223163231642316523166231672316823169231702317123172231732317423175231762317723178231792318023181231822318323184231852318623187231882318923190231912319223193231942319523196231972319823199232002320123202232032320423205232062320723208232092321023211232122321323214232152321623217232182321923220232212322223223232242322523226232272322823229232302323123232232332323423235232362323723238232392324023241232422324323244232452324623247232482324923250232512325223253232542325523256232572325823259232602326123262232632326423265232662326723268232692327023271232722327323274232752327623277232782327923280232812328223283232842328523286232872328823289232902329123292232932329423295232962329723298232992330023301233022330323304233052330623307233082330923310233112331223313233142331523316233172331823319233202332123322233232332423325233262332723328233292333023331233322333323334233352333623337233382333923340233412334223343233442334523346233472334823349233502335123352233532335423355233562335723358233592336023361233622336323364233652336623367233682336923370233712337223373233742337523376233772337823379233802338123382233832338423385233862338723388233892339023391233922339323394233952339623397233982339923400234012340223403234042340523406234072340823409234102341123412234132341423415234162341723418234192342023421234222342323424234252342623427234282342923430234312343223433234342343523436234372343823439234402344123442234432344423445234462344723448234492345023451234522345323454234552345623457234582345923460234612346223463234642346523466234672346823469234702347123472234732347423475234762347723478234792348023481234822348323484234852348623487234882348923490234912349223493234942349523496234972349823499235002350123502235032350423505235062350723508235092351023511235122351323514235152351623517235182351923520235212352223523235242352523526235272352823529235302353123532235332353423535235362353723538235392354023541235422354323544235452354623547235482354923550235512355223553235542355523556235572355823559235602356123562235632356423565235662356723568235692357023571235722357323574235752357623577235782357923580235812358223583235842358523586235872358823589235902359123592235932359423595235962359723598235992360023601236022360323604236052360623607236082360923610236112361223613236142361523616236172361823619236202362123622236232362423625236262362723628236292363023631236322363323634236352363623637236382363923640236412364223643236442364523646236472364823649236502365123652236532365423655236562365723658236592366023661236622366323664236652366623667236682366923670236712367223673236742367523676236772367823679236802368123682236832368423685236862368723688236892369023691236922369323694236952369623697236982369923700237012370223703237042370523706237072370823709237102371123712237132371423715237162371723718237192372023721237222372323724237252372623727237282372923730237312373223733237342373523736237372373823739237402374123742237432374423745237462374723748237492375023751237522375323754237552375623757237582375923760237612376223763237642376523766237672376823769237702377123772237732377423775237762377723778237792378023781237822378323784237852378623787237882378923790237912379223793237942379523796237972379823799238002380123802238032380423805238062380723808238092381023811238122381323814238152381623817238182381923820238212382223823238242382523826238272382823829238302383123832238332383423835238362383723838238392384023841238422384323844238452384623847238482384923850238512385223853238542385523856238572385823859238602386123862238632386423865238662386723868238692387023871238722387323874238752387623877238782387923880238812388223883238842388523886238872388823889238902389123892238932389423895238962389723898238992390023901239022390323904239052390623907239082390923910239112391223913239142391523916239172391823919239202392123922239232392423925239262392723928239292393023931239322393323934239352393623937239382393923940239412394223943239442394523946239472394823949239502395123952239532395423955239562395723958239592396023961239622396323964239652396623967239682396923970239712397223973239742397523976239772397823979239802398123982239832398423985239862398723988239892399023991239922399323994239952399623997239982399924000240012400224003240042400524006240072400824009240102401124012240132401424015240162401724018240192402024021240222402324024240252402624027240282402924030240312403224033240342403524036240372403824039240402404124042240432404424045240462404724048240492405024051240522405324054240552405624057240582405924060240612406224063240642406524066240672406824069240702407124072240732407424075240762407724078240792408024081240822408324084240852408624087240882408924090240912409224093240942409524096240972409824099241002410124102241032410424105241062410724108241092411024111241122411324114241152411624117241182411924120241212412224123241242412524126241272412824129241302413124132241332413424135241362413724138241392414024141241422414324144241452414624147241482414924150241512415224153241542415524156241572415824159241602416124162241632416424165241662416724168241692417024171241722417324174241752417624177241782417924180241812418224183241842418524186241872418824189241902419124192241932419424195241962419724198241992420024201242022420324204242052420624207242082420924210242112421224213242142421524216242172421824219242202422124222242232422424225242262422724228242292423024231242322423324234242352423624237242382423924240242412424224243242442424524246242472424824249242502425124252242532425424255242562425724258242592426024261242622426324264242652426624267242682426924270242712427224273242742427524276242772427824279242802428124282242832428424285242862428724288242892429024291242922429324294242952429624297242982429924300243012430224303243042430524306243072430824309243102431124312243132431424315243162431724318243192432024321243222432324324243252432624327243282432924330243312433224333243342433524336243372433824339243402434124342243432434424345243462434724348243492435024351243522435324354243552435624357243582435924360243612436224363243642436524366243672436824369243702437124372243732437424375243762437724378243792438024381243822438324384243852438624387243882438924390243912439224393243942439524396243972439824399244002440124402244032440424405244062440724408244092441024411244122441324414244152441624417244182441924420244212442224423244242442524426244272442824429244302443124432244332443424435244362443724438244392444024441244422444324444244452444624447244482444924450244512445224453244542445524456244572445824459244602446124462244632446424465244662446724468244692447024471244722447324474244752447624477244782447924480244812448224483244842448524486244872448824489244902449124492244932449424495244962449724498244992450024501245022450324504245052450624507245082450924510245112451224513245142451524516245172451824519245202452124522245232452424525245262452724528245292453024531245322453324534245352453624537245382453924540245412454224543245442454524546245472454824549245502455124552245532455424555245562455724558245592456024561245622456324564245652456624567245682456924570245712457224573245742457524576245772457824579245802458124582245832458424585245862458724588245892459024591245922459324594245952459624597245982459924600246012460224603246042460524606246072460824609246102461124612246132461424615246162461724618246192462024621246222462324624246252462624627246282462924630246312463224633246342463524636246372463824639246402464124642246432464424645246462464724648246492465024651246522465324654246552465624657246582465924660246612466224663246642466524666246672466824669246702467124672246732467424675246762467724678246792468024681246822468324684246852468624687246882468924690246912469224693246942469524696246972469824699247002470124702247032470424705247062470724708247092471024711247122471324714247152471624717247182471924720247212472224723247242472524726247272472824729247302473124732247332473424735247362473724738247392474024741247422474324744247452474624747247482474924750247512475224753247542475524756247572475824759247602476124762247632476424765247662476724768247692477024771247722477324774247752477624777247782477924780247812478224783247842478524786247872478824789247902479124792247932479424795247962479724798247992480024801248022480324804248052480624807248082480924810248112481224813248142481524816248172481824819248202482124822248232482424825248262482724828248292483024831248322483324834248352483624837248382483924840248412484224843248442484524846248472484824849248502485124852248532485424855248562485724858248592486024861248622486324864248652486624867248682486924870248712487224873248742487524876248772487824879248802488124882248832488424885248862488724888248892489024891248922489324894248952489624897248982489924900249012490224903249042490524906249072490824909249102491124912249132491424915249162491724918249192492024921249222492324924249252492624927249282492924930249312493224933249342493524936249372493824939249402494124942249432494424945249462494724948249492495024951249522495324954249552495624957249582495924960249612496224963249642496524966249672496824969249702497124972249732497424975249762497724978249792498024981249822498324984249852498624987249882498924990249912499224993249942499524996249972499824999250002500125002250032500425005250062500725008250092501025011250122501325014250152501625017250182501925020250212502225023250242502525026250272502825029250302503125032250332503425035250362503725038250392504025041250422504325044250452504625047250482504925050250512505225053250542505525056250572505825059250602506125062250632506425065250662506725068250692507025071250722507325074250752507625077250782507925080250812508225083250842508525086250872508825089250902509125092250932509425095250962509725098250992510025101251022510325104251052510625107251082510925110251112511225113251142511525116251172511825119251202512125122251232512425125251262512725128251292513025131251322513325134251352513625137251382513925140251412514225143251442514525146251472514825149251502515125152251532515425155251562515725158251592516025161251622516325164251652516625167251682516925170251712517225173251742517525176251772517825179251802518125182251832518425185251862518725188251892519025191251922519325194251952519625197251982519925200252012520225203252042520525206252072520825209252102521125212252132521425215252162521725218252192522025221252222522325224252252522625227252282522925230252312523225233252342523525236252372523825239252402524125242252432524425245252462524725248252492525025251252522525325254252552525625257252582525925260252612526225263252642526525266252672526825269252702527125272252732527425275252762527725278252792528025281252822528325284252852528625287252882528925290252912529225293252942529525296252972529825299253002530125302253032530425305253062530725308253092531025311253122531325314253152531625317253182531925320253212532225323253242532525326253272532825329253302533125332253332533425335253362533725338253392534025341253422534325344253452534625347253482534925350253512535225353253542535525356253572535825359253602536125362253632536425365253662536725368253692537025371253722537325374253752537625377253782537925380253812538225383253842538525386253872538825389253902539125392253932539425395253962539725398253992540025401254022540325404254052540625407254082540925410254112541225413254142541525416254172541825419254202542125422254232542425425254262542725428254292543025431254322543325434254352543625437254382543925440254412544225443254442544525446254472544825449254502545125452254532545425455254562545725458254592546025461254622546325464254652546625467254682546925470254712547225473254742547525476254772547825479254802548125482254832548425485254862548725488254892549025491254922549325494254952549625497254982549925500255012550225503255042550525506255072550825509255102551125512255132551425515255162551725518255192552025521255222552325524255252552625527255282552925530255312553225533255342553525536255372553825539255402554125542255432554425545255462554725548255492555025551255522555325554255552555625557255582555925560255612556225563255642556525566255672556825569255702557125572255732557425575255762557725578255792558025581255822558325584255852558625587255882558925590255912559225593255942559525596255972559825599256002560125602256032560425605256062560725608256092561025611256122561325614
  1. diff -Nur linux-4.8.15.orig/arch/arm/include/asm/switch_to.h linux-4.8.15/arch/arm/include/asm/switch_to.h
  2. --- linux-4.8.15.orig/arch/arm/include/asm/switch_to.h 2016-12-15 17:50:48.000000000 +0100
  3. +++ linux-4.8.15/arch/arm/include/asm/switch_to.h 2017-01-01 17:07:11.543137886 +0100
  4. @@ -3,6 +3,13 @@
  5. #include <linux/thread_info.h>
  6. +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
  7. +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
  8. +#else
  9. +static inline void
  10. +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
  11. +#endif
  12. +
  13. /*
  14. * For v7 SMP cores running a preemptible kernel we may be pre-empted
  15. * during a TLB maintenance operation, so execute an inner-shareable dsb
  16. @@ -25,6 +32,7 @@
  17. #define switch_to(prev,next,last) \
  18. do { \
  19. __complete_pending_tlbi(); \
  20. + switch_kmaps(prev, next); \
  21. last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
  22. } while (0)
  23. diff -Nur linux-4.8.15.orig/arch/arm/include/asm/thread_info.h linux-4.8.15/arch/arm/include/asm/thread_info.h
  24. --- linux-4.8.15.orig/arch/arm/include/asm/thread_info.h 2016-12-15 17:50:48.000000000 +0100
  25. +++ linux-4.8.15/arch/arm/include/asm/thread_info.h 2017-01-01 17:07:11.547138137 +0100
  26. @@ -49,6 +49,7 @@
  27. struct thread_info {
  28. unsigned long flags; /* low level flags */
  29. int preempt_count; /* 0 => preemptable, <0 => bug */
  30. + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
  31. mm_segment_t addr_limit; /* address limit */
  32. struct task_struct *task; /* main task structure */
  33. __u32 cpu; /* cpu */
  34. @@ -142,7 +143,8 @@
  35. #define TIF_SYSCALL_TRACE 4 /* syscall trace active */
  36. #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */
  37. #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
  38. -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */
  39. +#define TIF_SECCOMP 8 /* seccomp syscall filtering active */
  40. +#define TIF_NEED_RESCHED_LAZY 7
  41. #define TIF_NOHZ 12 /* in adaptive nohz mode */
  42. #define TIF_USING_IWMMXT 17
  43. @@ -152,6 +154,7 @@
  44. #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
  45. #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
  46. #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
  47. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  48. #define _TIF_UPROBE (1 << TIF_UPROBE)
  49. #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
  50. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  51. @@ -167,7 +170,8 @@
  52. * Change these and you break ASM code in entry-common.S
  53. */
  54. #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
  55. - _TIF_NOTIFY_RESUME | _TIF_UPROBE)
  56. + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  57. + _TIF_NEED_RESCHED_LAZY)
  58. #endif /* __KERNEL__ */
  59. #endif /* __ASM_ARM_THREAD_INFO_H */
  60. diff -Nur linux-4.8.15.orig/arch/arm/Kconfig linux-4.8.15/arch/arm/Kconfig
  61. --- linux-4.8.15.orig/arch/arm/Kconfig 2016-12-15 17:50:48.000000000 +0100
  62. +++ linux-4.8.15/arch/arm/Kconfig 2017-01-01 17:07:11.487134269 +0100
  63. @@ -36,7 +36,7 @@
  64. select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
  65. select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
  66. select HAVE_ARCH_HARDENED_USERCOPY
  67. - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
  68. + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT_BASE
  69. select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
  70. select HAVE_ARCH_MMAP_RND_BITS if MMU
  71. select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
  72. @@ -75,6 +75,7 @@
  73. select HAVE_PERF_EVENTS
  74. select HAVE_PERF_REGS
  75. select HAVE_PERF_USER_STACK_DUMP
  76. + select HAVE_PREEMPT_LAZY
  77. select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
  78. select HAVE_REGS_AND_STACK_ACCESS_API
  79. select HAVE_SYSCALL_TRACEPOINTS
  80. diff -Nur linux-4.8.15.orig/arch/arm/kernel/asm-offsets.c linux-4.8.15/arch/arm/kernel/asm-offsets.c
  81. --- linux-4.8.15.orig/arch/arm/kernel/asm-offsets.c 2016-12-15 17:50:48.000000000 +0100
  82. +++ linux-4.8.15/arch/arm/kernel/asm-offsets.c 2017-01-01 17:07:11.587140711 +0100
  83. @@ -65,6 +65,7 @@
  84. BLANK();
  85. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  86. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  87. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  88. DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
  89. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  90. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  91. diff -Nur linux-4.8.15.orig/arch/arm/kernel/entry-armv.S linux-4.8.15/arch/arm/kernel/entry-armv.S
  92. --- linux-4.8.15.orig/arch/arm/kernel/entry-armv.S 2016-12-15 17:50:48.000000000 +0100
  93. +++ linux-4.8.15/arch/arm/kernel/entry-armv.S 2017-01-01 17:07:11.611142259 +0100
  94. @@ -220,11 +220,18 @@
  95. #ifdef CONFIG_PREEMPT
  96. ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
  97. - ldr r0, [tsk, #TI_FLAGS] @ get flags
  98. teq r8, #0 @ if preempt count != 0
  99. + bne 1f @ return from exeption
  100. + ldr r0, [tsk, #TI_FLAGS] @ get flags
  101. + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
  102. + blne svc_preempt @ preempt!
  103. +
  104. + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
  105. + teq r8, #0 @ if preempt lazy count != 0
  106. movne r0, #0 @ force flags to 0
  107. - tst r0, #_TIF_NEED_RESCHED
  108. + tst r0, #_TIF_NEED_RESCHED_LAZY
  109. blne svc_preempt
  110. +1:
  111. #endif
  112. svc_exit r5, irq = 1 @ return from exception
  113. @@ -239,8 +246,14 @@
  114. 1: bl preempt_schedule_irq @ irq en/disable is done inside
  115. ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
  116. tst r0, #_TIF_NEED_RESCHED
  117. + bne 1b
  118. + tst r0, #_TIF_NEED_RESCHED_LAZY
  119. reteq r8 @ go again
  120. - b 1b
  121. + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
  122. + teq r0, #0 @ if preempt lazy count != 0
  123. + beq 1b
  124. + ret r8 @ go again
  125. +
  126. #endif
  127. __und_fault:
  128. diff -Nur linux-4.8.15.orig/arch/arm/kernel/entry-common.S linux-4.8.15/arch/arm/kernel/entry-common.S
  129. --- linux-4.8.15.orig/arch/arm/kernel/entry-common.S 2016-12-15 17:50:48.000000000 +0100
  130. +++ linux-4.8.15/arch/arm/kernel/entry-common.S 2017-01-01 17:07:11.615142513 +0100
  131. @@ -36,7 +36,9 @@
  132. UNWIND(.cantunwind )
  133. disable_irq_notrace @ disable interrupts
  134. ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
  135. - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
  136. + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
  137. + bne fast_work_pending
  138. + tst r1, #_TIF_SECCOMP
  139. bne fast_work_pending
  140. /* perform architecture specific actions before user return */
  141. @@ -62,8 +64,11 @@
  142. str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
  143. disable_irq_notrace @ disable interrupts
  144. ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
  145. - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
  146. + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
  147. + bne do_slower_path
  148. + tst r1, #_TIF_SECCOMP
  149. beq no_work_pending
  150. +do_slower_path:
  151. UNWIND(.fnend )
  152. ENDPROC(ret_fast_syscall)
  153. diff -Nur linux-4.8.15.orig/arch/arm/kernel/patch.c linux-4.8.15/arch/arm/kernel/patch.c
  154. --- linux-4.8.15.orig/arch/arm/kernel/patch.c 2016-12-15 17:50:48.000000000 +0100
  155. +++ linux-4.8.15/arch/arm/kernel/patch.c 2017-01-01 17:07:11.623143039 +0100
  156. @@ -15,7 +15,7 @@
  157. unsigned int insn;
  158. };
  159. -static DEFINE_SPINLOCK(patch_lock);
  160. +static DEFINE_RAW_SPINLOCK(patch_lock);
  161. static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
  162. __acquires(&patch_lock)
  163. @@ -32,7 +32,7 @@
  164. return addr;
  165. if (flags)
  166. - spin_lock_irqsave(&patch_lock, *flags);
  167. + raw_spin_lock_irqsave(&patch_lock, *flags);
  168. else
  169. __acquire(&patch_lock);
  170. @@ -47,7 +47,7 @@
  171. clear_fixmap(fixmap);
  172. if (flags)
  173. - spin_unlock_irqrestore(&patch_lock, *flags);
  174. + raw_spin_unlock_irqrestore(&patch_lock, *flags);
  175. else
  176. __release(&patch_lock);
  177. }
  178. diff -Nur linux-4.8.15.orig/arch/arm/kernel/process.c linux-4.8.15/arch/arm/kernel/process.c
  179. --- linux-4.8.15.orig/arch/arm/kernel/process.c 2016-12-15 17:50:48.000000000 +0100
  180. +++ linux-4.8.15/arch/arm/kernel/process.c 2017-01-01 17:07:11.631143548 +0100
  181. @@ -323,6 +323,30 @@
  182. }
  183. #ifdef CONFIG_MMU
  184. +/*
  185. + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not
  186. + * initialized by pgtable_page_ctor() then a coredump of the vector page will
  187. + * fail.
  188. + */
  189. +static int __init vectors_user_mapping_init_page(void)
  190. +{
  191. + struct page *page;
  192. + unsigned long addr = 0xffff0000;
  193. + pgd_t *pgd;
  194. + pud_t *pud;
  195. + pmd_t *pmd;
  196. +
  197. + pgd = pgd_offset_k(addr);
  198. + pud = pud_offset(pgd, addr);
  199. + pmd = pmd_offset(pud, addr);
  200. + page = pmd_page(*(pmd));
  201. +
  202. + pgtable_page_ctor(page);
  203. +
  204. + return 0;
  205. +}
  206. +late_initcall(vectors_user_mapping_init_page);
  207. +
  208. #ifdef CONFIG_KUSER_HELPERS
  209. /*
  210. * The vectors page is always readable from user space for the
  211. diff -Nur linux-4.8.15.orig/arch/arm/kernel/signal.c linux-4.8.15/arch/arm/kernel/signal.c
  212. --- linux-4.8.15.orig/arch/arm/kernel/signal.c 2016-12-15 17:50:48.000000000 +0100
  213. +++ linux-4.8.15/arch/arm/kernel/signal.c 2017-01-01 17:07:11.659145353 +0100
  214. @@ -572,7 +572,8 @@
  215. */
  216. trace_hardirqs_off();
  217. do {
  218. - if (likely(thread_flags & _TIF_NEED_RESCHED)) {
  219. + if (likely(thread_flags & (_TIF_NEED_RESCHED |
  220. + _TIF_NEED_RESCHED_LAZY))) {
  221. schedule();
  222. } else {
  223. if (unlikely(!user_mode(regs)))
  224. diff -Nur linux-4.8.15.orig/arch/arm/kernel/smp.c linux-4.8.15/arch/arm/kernel/smp.c
  225. --- linux-4.8.15.orig/arch/arm/kernel/smp.c 2016-12-15 17:50:48.000000000 +0100
  226. +++ linux-4.8.15/arch/arm/kernel/smp.c 2017-01-01 17:07:11.671146131 +0100
  227. @@ -234,8 +234,6 @@
  228. flush_cache_louis();
  229. local_flush_tlb_all();
  230. - clear_tasks_mm_cpumask(cpu);
  231. -
  232. return 0;
  233. }
  234. @@ -251,6 +249,9 @@
  235. pr_err("CPU%u: cpu didn't die\n", cpu);
  236. return;
  237. }
  238. +
  239. + clear_tasks_mm_cpumask(cpu);
  240. +
  241. pr_notice("CPU%u: shutdown\n", cpu);
  242. /*
  243. diff -Nur linux-4.8.15.orig/arch/arm/kernel/unwind.c linux-4.8.15/arch/arm/kernel/unwind.c
  244. --- linux-4.8.15.orig/arch/arm/kernel/unwind.c 2016-12-15 17:50:48.000000000 +0100
  245. +++ linux-4.8.15/arch/arm/kernel/unwind.c 2017-01-01 17:07:11.671146131 +0100
  246. @@ -93,7 +93,7 @@
  247. static const struct unwind_idx *__origin_unwind_idx;
  248. extern const struct unwind_idx __stop_unwind_idx[];
  249. -static DEFINE_SPINLOCK(unwind_lock);
  250. +static DEFINE_RAW_SPINLOCK(unwind_lock);
  251. static LIST_HEAD(unwind_tables);
  252. /* Convert a prel31 symbol to an absolute address */
  253. @@ -201,7 +201,7 @@
  254. /* module unwind tables */
  255. struct unwind_table *table;
  256. - spin_lock_irqsave(&unwind_lock, flags);
  257. + raw_spin_lock_irqsave(&unwind_lock, flags);
  258. list_for_each_entry(table, &unwind_tables, list) {
  259. if (addr >= table->begin_addr &&
  260. addr < table->end_addr) {
  261. @@ -213,7 +213,7 @@
  262. break;
  263. }
  264. }
  265. - spin_unlock_irqrestore(&unwind_lock, flags);
  266. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  267. }
  268. pr_debug("%s: idx = %p\n", __func__, idx);
  269. @@ -529,9 +529,9 @@
  270. tab->begin_addr = text_addr;
  271. tab->end_addr = text_addr + text_size;
  272. - spin_lock_irqsave(&unwind_lock, flags);
  273. + raw_spin_lock_irqsave(&unwind_lock, flags);
  274. list_add_tail(&tab->list, &unwind_tables);
  275. - spin_unlock_irqrestore(&unwind_lock, flags);
  276. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  277. return tab;
  278. }
  279. @@ -543,9 +543,9 @@
  280. if (!tab)
  281. return;
  282. - spin_lock_irqsave(&unwind_lock, flags);
  283. + raw_spin_lock_irqsave(&unwind_lock, flags);
  284. list_del(&tab->list);
  285. - spin_unlock_irqrestore(&unwind_lock, flags);
  286. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  287. kfree(tab);
  288. }
  289. diff -Nur linux-4.8.15.orig/arch/arm/kvm/arm.c linux-4.8.15/arch/arm/kvm/arm.c
  290. --- linux-4.8.15.orig/arch/arm/kvm/arm.c 2016-12-15 17:50:48.000000000 +0100
  291. +++ linux-4.8.15/arch/arm/kvm/arm.c 2017-01-01 17:07:11.675146381 +0100
  292. @@ -584,7 +584,7 @@
  293. * involves poking the GIC, which must be done in a
  294. * non-preemptible context.
  295. */
  296. - preempt_disable();
  297. + migrate_disable();
  298. kvm_pmu_flush_hwstate(vcpu);
  299. kvm_timer_flush_hwstate(vcpu);
  300. kvm_vgic_flush_hwstate(vcpu);
  301. @@ -605,7 +605,7 @@
  302. kvm_pmu_sync_hwstate(vcpu);
  303. kvm_timer_sync_hwstate(vcpu);
  304. kvm_vgic_sync_hwstate(vcpu);
  305. - preempt_enable();
  306. + migrate_enable();
  307. continue;
  308. }
  309. @@ -661,7 +661,7 @@
  310. kvm_vgic_sync_hwstate(vcpu);
  311. - preempt_enable();
  312. + migrate_enable();
  313. ret = handle_exit(vcpu, run, ret);
  314. }
  315. diff -Nur linux-4.8.15.orig/arch/arm/mach-exynos/platsmp.c linux-4.8.15/arch/arm/mach-exynos/platsmp.c
  316. --- linux-4.8.15.orig/arch/arm/mach-exynos/platsmp.c 2016-12-15 17:50:48.000000000 +0100
  317. +++ linux-4.8.15/arch/arm/mach-exynos/platsmp.c 2017-01-01 17:07:11.707148443 +0100
  318. @@ -229,7 +229,7 @@
  319. return (void __iomem *)(S5P_VA_SCU);
  320. }
  321. -static DEFINE_SPINLOCK(boot_lock);
  322. +static DEFINE_RAW_SPINLOCK(boot_lock);
  323. static void exynos_secondary_init(unsigned int cpu)
  324. {
  325. @@ -242,8 +242,8 @@
  326. /*
  327. * Synchronise with the boot thread.
  328. */
  329. - spin_lock(&boot_lock);
  330. - spin_unlock(&boot_lock);
  331. + raw_spin_lock(&boot_lock);
  332. + raw_spin_unlock(&boot_lock);
  333. }
  334. int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr)
  335. @@ -307,7 +307,7 @@
  336. * Set synchronisation state between this boot processor
  337. * and the secondary one
  338. */
  339. - spin_lock(&boot_lock);
  340. + raw_spin_lock(&boot_lock);
  341. /*
  342. * The secondary processor is waiting to be released from
  343. @@ -334,7 +334,7 @@
  344. if (timeout == 0) {
  345. printk(KERN_ERR "cpu1 power enable failed");
  346. - spin_unlock(&boot_lock);
  347. + raw_spin_unlock(&boot_lock);
  348. return -ETIMEDOUT;
  349. }
  350. }
  351. @@ -380,7 +380,7 @@
  352. * calibrations, then wait for it to finish
  353. */
  354. fail:
  355. - spin_unlock(&boot_lock);
  356. + raw_spin_unlock(&boot_lock);
  357. return pen_release != -1 ? ret : 0;
  358. }
  359. diff -Nur linux-4.8.15.orig/arch/arm/mach-hisi/platmcpm.c linux-4.8.15/arch/arm/mach-hisi/platmcpm.c
  360. --- linux-4.8.15.orig/arch/arm/mach-hisi/platmcpm.c 2016-12-15 17:50:48.000000000 +0100
  361. +++ linux-4.8.15/arch/arm/mach-hisi/platmcpm.c 2017-01-01 17:07:11.747151017 +0100
  362. @@ -61,7 +61,7 @@
  363. static void __iomem *sysctrl, *fabric;
  364. static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
  365. -static DEFINE_SPINLOCK(boot_lock);
  366. +static DEFINE_RAW_SPINLOCK(boot_lock);
  367. static u32 fabric_phys_addr;
  368. /*
  369. * [0]: bootwrapper physical address
  370. @@ -113,7 +113,7 @@
  371. if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
  372. return -EINVAL;
  373. - spin_lock_irq(&boot_lock);
  374. + raw_spin_lock_irq(&boot_lock);
  375. if (hip04_cpu_table[cluster][cpu])
  376. goto out;
  377. @@ -147,7 +147,7 @@
  378. out:
  379. hip04_cpu_table[cluster][cpu]++;
  380. - spin_unlock_irq(&boot_lock);
  381. + raw_spin_unlock_irq(&boot_lock);
  382. return 0;
  383. }
  384. @@ -162,11 +162,11 @@
  385. cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
  386. cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
  387. - spin_lock(&boot_lock);
  388. + raw_spin_lock(&boot_lock);
  389. hip04_cpu_table[cluster][cpu]--;
  390. if (hip04_cpu_table[cluster][cpu] == 1) {
  391. /* A power_up request went ahead of us. */
  392. - spin_unlock(&boot_lock);
  393. + raw_spin_unlock(&boot_lock);
  394. return;
  395. } else if (hip04_cpu_table[cluster][cpu] > 1) {
  396. pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
  397. @@ -174,7 +174,7 @@
  398. }
  399. last_man = hip04_cluster_is_down(cluster);
  400. - spin_unlock(&boot_lock);
  401. + raw_spin_unlock(&boot_lock);
  402. if (last_man) {
  403. /* Since it's Cortex A15, disable L2 prefetching. */
  404. asm volatile(
  405. @@ -203,7 +203,7 @@
  406. cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
  407. count = TIMEOUT_MSEC / POLL_MSEC;
  408. - spin_lock_irq(&boot_lock);
  409. + raw_spin_lock_irq(&boot_lock);
  410. for (tries = 0; tries < count; tries++) {
  411. if (hip04_cpu_table[cluster][cpu])
  412. goto err;
  413. @@ -211,10 +211,10 @@
  414. data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
  415. if (data & CORE_WFI_STATUS(cpu))
  416. break;
  417. - spin_unlock_irq(&boot_lock);
  418. + raw_spin_unlock_irq(&boot_lock);
  419. /* Wait for clean L2 when the whole cluster is down. */
  420. msleep(POLL_MSEC);
  421. - spin_lock_irq(&boot_lock);
  422. + raw_spin_lock_irq(&boot_lock);
  423. }
  424. if (tries >= count)
  425. goto err;
  426. @@ -231,10 +231,10 @@
  427. goto err;
  428. if (hip04_cluster_is_down(cluster))
  429. hip04_set_snoop_filter(cluster, 0);
  430. - spin_unlock_irq(&boot_lock);
  431. + raw_spin_unlock_irq(&boot_lock);
  432. return 1;
  433. err:
  434. - spin_unlock_irq(&boot_lock);
  435. + raw_spin_unlock_irq(&boot_lock);
  436. return 0;
  437. }
  438. #endif
  439. diff -Nur linux-4.8.15.orig/arch/arm/mach-omap2/omap-smp.c linux-4.8.15/arch/arm/mach-omap2/omap-smp.c
  440. --- linux-4.8.15.orig/arch/arm/mach-omap2/omap-smp.c 2016-12-15 17:50:48.000000000 +0100
  441. +++ linux-4.8.15/arch/arm/mach-omap2/omap-smp.c 2017-01-01 17:07:11.763152049 +0100
  442. @@ -64,7 +64,7 @@
  443. .startup_addr = omap5_secondary_startup,
  444. };
  445. -static DEFINE_SPINLOCK(boot_lock);
  446. +static DEFINE_RAW_SPINLOCK(boot_lock);
  447. void __iomem *omap4_get_scu_base(void)
  448. {
  449. @@ -131,8 +131,8 @@
  450. /*
  451. * Synchronise with the boot thread.
  452. */
  453. - spin_lock(&boot_lock);
  454. - spin_unlock(&boot_lock);
  455. + raw_spin_lock(&boot_lock);
  456. + raw_spin_unlock(&boot_lock);
  457. }
  458. static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
  459. @@ -146,7 +146,7 @@
  460. * Set synchronisation state between this boot processor
  461. * and the secondary one
  462. */
  463. - spin_lock(&boot_lock);
  464. + raw_spin_lock(&boot_lock);
  465. /*
  466. * Update the AuxCoreBoot0 with boot state for secondary core.
  467. @@ -223,7 +223,7 @@
  468. * Now the secondary core is starting up let it run its
  469. * calibrations, then wait for it to finish
  470. */
  471. - spin_unlock(&boot_lock);
  472. + raw_spin_unlock(&boot_lock);
  473. return 0;
  474. }
  475. diff -Nur linux-4.8.15.orig/arch/arm/mach-prima2/platsmp.c linux-4.8.15/arch/arm/mach-prima2/platsmp.c
  476. --- linux-4.8.15.orig/arch/arm/mach-prima2/platsmp.c 2016-12-15 17:50:48.000000000 +0100
  477. +++ linux-4.8.15/arch/arm/mach-prima2/platsmp.c 2017-01-01 17:07:11.795154111 +0100
  478. @@ -22,7 +22,7 @@
  479. static void __iomem *clk_base;
  480. -static DEFINE_SPINLOCK(boot_lock);
  481. +static DEFINE_RAW_SPINLOCK(boot_lock);
  482. static void sirfsoc_secondary_init(unsigned int cpu)
  483. {
  484. @@ -36,8 +36,8 @@
  485. /*
  486. * Synchronise with the boot thread.
  487. */
  488. - spin_lock(&boot_lock);
  489. - spin_unlock(&boot_lock);
  490. + raw_spin_lock(&boot_lock);
  491. + raw_spin_unlock(&boot_lock);
  492. }
  493. static const struct of_device_id clk_ids[] = {
  494. @@ -75,7 +75,7 @@
  495. /* make sure write buffer is drained */
  496. mb();
  497. - spin_lock(&boot_lock);
  498. + raw_spin_lock(&boot_lock);
  499. /*
  500. * The secondary processor is waiting to be released from
  501. @@ -107,7 +107,7 @@
  502. * now the secondary core is starting up let it run its
  503. * calibrations, then wait for it to finish
  504. */
  505. - spin_unlock(&boot_lock);
  506. + raw_spin_unlock(&boot_lock);
  507. return pen_release != -1 ? -ENOSYS : 0;
  508. }
  509. diff -Nur linux-4.8.15.orig/arch/arm/mach-qcom/platsmp.c linux-4.8.15/arch/arm/mach-qcom/platsmp.c
  510. --- linux-4.8.15.orig/arch/arm/mach-qcom/platsmp.c 2016-12-15 17:50:48.000000000 +0100
  511. +++ linux-4.8.15/arch/arm/mach-qcom/platsmp.c 2017-01-01 17:07:11.803154626 +0100
  512. @@ -46,7 +46,7 @@
  513. extern void secondary_startup_arm(void);
  514. -static DEFINE_SPINLOCK(boot_lock);
  515. +static DEFINE_RAW_SPINLOCK(boot_lock);
  516. #ifdef CONFIG_HOTPLUG_CPU
  517. static void qcom_cpu_die(unsigned int cpu)
  518. @@ -60,8 +60,8 @@
  519. /*
  520. * Synchronise with the boot thread.
  521. */
  522. - spin_lock(&boot_lock);
  523. - spin_unlock(&boot_lock);
  524. + raw_spin_lock(&boot_lock);
  525. + raw_spin_unlock(&boot_lock);
  526. }
  527. static int scss_release_secondary(unsigned int cpu)
  528. @@ -284,7 +284,7 @@
  529. * set synchronisation state between this boot processor
  530. * and the secondary one
  531. */
  532. - spin_lock(&boot_lock);
  533. + raw_spin_lock(&boot_lock);
  534. /*
  535. * Send the secondary CPU a soft interrupt, thereby causing
  536. @@ -297,7 +297,7 @@
  537. * now the secondary core is starting up let it run its
  538. * calibrations, then wait for it to finish
  539. */
  540. - spin_unlock(&boot_lock);
  541. + raw_spin_unlock(&boot_lock);
  542. return ret;
  543. }
  544. diff -Nur linux-4.8.15.orig/arch/arm/mach-spear/platsmp.c linux-4.8.15/arch/arm/mach-spear/platsmp.c
  545. --- linux-4.8.15.orig/arch/arm/mach-spear/platsmp.c 2016-12-15 17:50:48.000000000 +0100
  546. +++ linux-4.8.15/arch/arm/mach-spear/platsmp.c 2017-01-01 17:07:11.847157467 +0100
  547. @@ -32,7 +32,7 @@
  548. sync_cache_w(&pen_release);
  549. }
  550. -static DEFINE_SPINLOCK(boot_lock);
  551. +static DEFINE_RAW_SPINLOCK(boot_lock);
  552. static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
  553. @@ -47,8 +47,8 @@
  554. /*
  555. * Synchronise with the boot thread.
  556. */
  557. - spin_lock(&boot_lock);
  558. - spin_unlock(&boot_lock);
  559. + raw_spin_lock(&boot_lock);
  560. + raw_spin_unlock(&boot_lock);
  561. }
  562. static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
  563. @@ -59,7 +59,7 @@
  564. * set synchronisation state between this boot processor
  565. * and the secondary one
  566. */
  567. - spin_lock(&boot_lock);
  568. + raw_spin_lock(&boot_lock);
  569. /*
  570. * The secondary processor is waiting to be released from
  571. @@ -84,7 +84,7 @@
  572. * now the secondary core is starting up let it run its
  573. * calibrations, then wait for it to finish
  574. */
  575. - spin_unlock(&boot_lock);
  576. + raw_spin_unlock(&boot_lock);
  577. return pen_release != -1 ? -ENOSYS : 0;
  578. }
  579. diff -Nur linux-4.8.15.orig/arch/arm/mach-sti/platsmp.c linux-4.8.15/arch/arm/mach-sti/platsmp.c
  580. --- linux-4.8.15.orig/arch/arm/mach-sti/platsmp.c 2016-12-15 17:50:48.000000000 +0100
  581. +++ linux-4.8.15/arch/arm/mach-sti/platsmp.c 2017-01-01 17:07:11.851157720 +0100
  582. @@ -35,7 +35,7 @@
  583. sync_cache_w(&pen_release);
  584. }
  585. -static DEFINE_SPINLOCK(boot_lock);
  586. +static DEFINE_RAW_SPINLOCK(boot_lock);
  587. static void sti_secondary_init(unsigned int cpu)
  588. {
  589. @@ -48,8 +48,8 @@
  590. /*
  591. * Synchronise with the boot thread.
  592. */
  593. - spin_lock(&boot_lock);
  594. - spin_unlock(&boot_lock);
  595. + raw_spin_lock(&boot_lock);
  596. + raw_spin_unlock(&boot_lock);
  597. }
  598. static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
  599. @@ -60,7 +60,7 @@
  600. * set synchronisation state between this boot processor
  601. * and the secondary one
  602. */
  603. - spin_lock(&boot_lock);
  604. + raw_spin_lock(&boot_lock);
  605. /*
  606. * The secondary processor is waiting to be released from
  607. @@ -91,7 +91,7 @@
  608. * now the secondary core is starting up let it run its
  609. * calibrations, then wait for it to finish
  610. */
  611. - spin_unlock(&boot_lock);
  612. + raw_spin_unlock(&boot_lock);
  613. return pen_release != -1 ? -ENOSYS : 0;
  614. }
  615. diff -Nur linux-4.8.15.orig/arch/arm/mm/fault.c linux-4.8.15/arch/arm/mm/fault.c
  616. --- linux-4.8.15.orig/arch/arm/mm/fault.c 2016-12-15 17:50:48.000000000 +0100
  617. +++ linux-4.8.15/arch/arm/mm/fault.c 2017-01-01 17:07:11.879159524 +0100
  618. @@ -430,6 +430,9 @@
  619. if (addr < TASK_SIZE)
  620. return do_page_fault(addr, fsr, regs);
  621. + if (interrupts_enabled(regs))
  622. + local_irq_enable();
  623. +
  624. if (user_mode(regs))
  625. goto bad_area;
  626. @@ -497,6 +500,9 @@
  627. static int
  628. do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
  629. {
  630. + if (interrupts_enabled(regs))
  631. + local_irq_enable();
  632. +
  633. do_bad_area(addr, fsr, regs);
  634. return 0;
  635. }
  636. diff -Nur linux-4.8.15.orig/arch/arm/mm/highmem.c linux-4.8.15/arch/arm/mm/highmem.c
  637. --- linux-4.8.15.orig/arch/arm/mm/highmem.c 2016-12-15 17:50:48.000000000 +0100
  638. +++ linux-4.8.15/arch/arm/mm/highmem.c 2017-01-01 17:07:11.879159524 +0100
  639. @@ -34,6 +34,11 @@
  640. return *ptep;
  641. }
  642. +static unsigned int fixmap_idx(int type)
  643. +{
  644. + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  645. +}
  646. +
  647. void *kmap(struct page *page)
  648. {
  649. might_sleep();
  650. @@ -54,12 +59,13 @@
  651. void *kmap_atomic(struct page *page)
  652. {
  653. + pte_t pte = mk_pte(page, kmap_prot);
  654. unsigned int idx;
  655. unsigned long vaddr;
  656. void *kmap;
  657. int type;
  658. - preempt_disable();
  659. + preempt_disable_nort();
  660. pagefault_disable();
  661. if (!PageHighMem(page))
  662. return page_address(page);
  663. @@ -79,7 +85,7 @@
  664. type = kmap_atomic_idx_push();
  665. - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  666. + idx = fixmap_idx(type);
  667. vaddr = __fix_to_virt(idx);
  668. #ifdef CONFIG_DEBUG_HIGHMEM
  669. /*
  670. @@ -93,7 +99,10 @@
  671. * in place, so the contained TLB flush ensures the TLB is updated
  672. * with the new mapping.
  673. */
  674. - set_fixmap_pte(idx, mk_pte(page, kmap_prot));
  675. +#ifdef CONFIG_PREEMPT_RT_FULL
  676. + current->kmap_pte[type] = pte;
  677. +#endif
  678. + set_fixmap_pte(idx, pte);
  679. return (void *)vaddr;
  680. }
  681. @@ -106,44 +115,75 @@
  682. if (kvaddr >= (void *)FIXADDR_START) {
  683. type = kmap_atomic_idx();
  684. - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  685. + idx = fixmap_idx(type);
  686. if (cache_is_vivt())
  687. __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
  688. +#ifdef CONFIG_PREEMPT_RT_FULL
  689. + current->kmap_pte[type] = __pte(0);
  690. +#endif
  691. #ifdef CONFIG_DEBUG_HIGHMEM
  692. BUG_ON(vaddr != __fix_to_virt(idx));
  693. - set_fixmap_pte(idx, __pte(0));
  694. #else
  695. (void) idx; /* to kill a warning */
  696. #endif
  697. + set_fixmap_pte(idx, __pte(0));
  698. kmap_atomic_idx_pop();
  699. } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
  700. /* this address was obtained through kmap_high_get() */
  701. kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
  702. }
  703. pagefault_enable();
  704. - preempt_enable();
  705. + preempt_enable_nort();
  706. }
  707. EXPORT_SYMBOL(__kunmap_atomic);
  708. void *kmap_atomic_pfn(unsigned long pfn)
  709. {
  710. + pte_t pte = pfn_pte(pfn, kmap_prot);
  711. unsigned long vaddr;
  712. int idx, type;
  713. struct page *page = pfn_to_page(pfn);
  714. - preempt_disable();
  715. + preempt_disable_nort();
  716. pagefault_disable();
  717. if (!PageHighMem(page))
  718. return page_address(page);
  719. type = kmap_atomic_idx_push();
  720. - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  721. + idx = fixmap_idx(type);
  722. vaddr = __fix_to_virt(idx);
  723. #ifdef CONFIG_DEBUG_HIGHMEM
  724. BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
  725. #endif
  726. - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
  727. +#ifdef CONFIG_PREEMPT_RT_FULL
  728. + current->kmap_pte[type] = pte;
  729. +#endif
  730. + set_fixmap_pte(idx, pte);
  731. return (void *)vaddr;
  732. }
  733. +#if defined CONFIG_PREEMPT_RT_FULL
  734. +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
  735. +{
  736. + int i;
  737. +
  738. + /*
  739. + * Clear @prev's kmap_atomic mappings
  740. + */
  741. + for (i = 0; i < prev_p->kmap_idx; i++) {
  742. + int idx = fixmap_idx(i);
  743. +
  744. + set_fixmap_pte(idx, __pte(0));
  745. + }
  746. + /*
  747. + * Restore @next_p's kmap_atomic mappings
  748. + */
  749. + for (i = 0; i < next_p->kmap_idx; i++) {
  750. + int idx = fixmap_idx(i);
  751. +
  752. + if (!pte_none(next_p->kmap_pte[i]))
  753. + set_fixmap_pte(idx, next_p->kmap_pte[i]);
  754. + }
  755. +}
  756. +#endif
  757. diff -Nur linux-4.8.15.orig/arch/arm/plat-versatile/platsmp.c linux-4.8.15/arch/arm/plat-versatile/platsmp.c
  758. --- linux-4.8.15.orig/arch/arm/plat-versatile/platsmp.c 2016-12-15 17:50:48.000000000 +0100
  759. +++ linux-4.8.15/arch/arm/plat-versatile/platsmp.c 2017-01-01 17:07:11.939163389 +0100
  760. @@ -32,7 +32,7 @@
  761. sync_cache_w(&pen_release);
  762. }
  763. -static DEFINE_SPINLOCK(boot_lock);
  764. +static DEFINE_RAW_SPINLOCK(boot_lock);
  765. void versatile_secondary_init(unsigned int cpu)
  766. {
  767. @@ -45,8 +45,8 @@
  768. /*
  769. * Synchronise with the boot thread.
  770. */
  771. - spin_lock(&boot_lock);
  772. - spin_unlock(&boot_lock);
  773. + raw_spin_lock(&boot_lock);
  774. + raw_spin_unlock(&boot_lock);
  775. }
  776. int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
  777. @@ -57,7 +57,7 @@
  778. * Set synchronisation state between this boot processor
  779. * and the secondary one
  780. */
  781. - spin_lock(&boot_lock);
  782. + raw_spin_lock(&boot_lock);
  783. /*
  784. * This is really belt and braces; we hold unintended secondary
  785. @@ -87,7 +87,7 @@
  786. * now the secondary core is starting up let it run its
  787. * calibrations, then wait for it to finish
  788. */
  789. - spin_unlock(&boot_lock);
  790. + raw_spin_unlock(&boot_lock);
  791. return pen_release != -1 ? -ENOSYS : 0;
  792. }
  793. diff -Nur linux-4.8.15.orig/arch/arm64/include/asm/thread_info.h linux-4.8.15/arch/arm64/include/asm/thread_info.h
  794. --- linux-4.8.15.orig/arch/arm64/include/asm/thread_info.h 2016-12-15 17:50:48.000000000 +0100
  795. +++ linux-4.8.15/arch/arm64/include/asm/thread_info.h 2017-01-01 17:07:12.027169062 +0100
  796. @@ -49,6 +49,7 @@
  797. mm_segment_t addr_limit; /* address limit */
  798. struct task_struct *task; /* main task structure */
  799. int preempt_count; /* 0 => preemptable, <0 => bug */
  800. + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
  801. int cpu; /* cpu */
  802. };
  803. @@ -109,6 +110,7 @@
  804. #define TIF_NEED_RESCHED 1
  805. #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
  806. #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
  807. +#define TIF_NEED_RESCHED_LAZY 4
  808. #define TIF_NOHZ 7
  809. #define TIF_SYSCALL_TRACE 8
  810. #define TIF_SYSCALL_AUDIT 9
  811. @@ -124,6 +126,7 @@
  812. #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
  813. #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
  814. #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
  815. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  816. #define _TIF_NOHZ (1 << TIF_NOHZ)
  817. #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
  818. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  819. @@ -132,7 +135,8 @@
  820. #define _TIF_32BIT (1 << TIF_32BIT)
  821. #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
  822. - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
  823. + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
  824. + _TIF_NEED_RESCHED_LAZY)
  825. #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
  826. _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
  827. diff -Nur linux-4.8.15.orig/arch/arm64/Kconfig linux-4.8.15/arch/arm64/Kconfig
  828. --- linux-4.8.15.orig/arch/arm64/Kconfig 2016-12-15 17:50:48.000000000 +0100
  829. +++ linux-4.8.15/arch/arm64/Kconfig 2017-01-01 17:07:11.979165970 +0100
  830. @@ -90,6 +90,7 @@
  831. select HAVE_PERF_EVENTS
  832. select HAVE_PERF_REGS
  833. select HAVE_PERF_USER_STACK_DUMP
  834. + select HAVE_PREEMPT_LAZY
  835. select HAVE_REGS_AND_STACK_ACCESS_API
  836. select HAVE_RCU_TABLE_FREE
  837. select HAVE_SYSCALL_TRACEPOINTS
  838. @@ -689,7 +690,7 @@
  839. config XEN
  840. bool "Xen guest support on ARM64"
  841. - depends on ARM64 && OF
  842. + depends on ARM64 && OF && !PREEMPT_RT_FULL
  843. select SWIOTLB_XEN
  844. select PARAVIRT
  845. help
  846. diff -Nur linux-4.8.15.orig/arch/arm64/kernel/asm-offsets.c linux-4.8.15/arch/arm64/kernel/asm-offsets.c
  847. --- linux-4.8.15.orig/arch/arm64/kernel/asm-offsets.c 2016-12-15 17:50:48.000000000 +0100
  848. +++ linux-4.8.15/arch/arm64/kernel/asm-offsets.c 2017-01-01 17:07:12.079172403 +0100
  849. @@ -37,6 +37,7 @@
  850. BLANK();
  851. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  852. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  853. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  854. DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
  855. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  856. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  857. diff -Nur linux-4.8.15.orig/arch/arm64/kernel/entry.S linux-4.8.15/arch/arm64/kernel/entry.S
  858. --- linux-4.8.15.orig/arch/arm64/kernel/entry.S 2016-12-15 17:50:48.000000000 +0100
  859. +++ linux-4.8.15/arch/arm64/kernel/entry.S 2017-01-01 17:07:12.083172658 +0100
  860. @@ -434,11 +434,16 @@
  861. #ifdef CONFIG_PREEMPT
  862. ldr w24, [tsk, #TI_PREEMPT] // get preempt count
  863. - cbnz w24, 1f // preempt count != 0
  864. + cbnz w24, 2f // preempt count != 0
  865. ldr x0, [tsk, #TI_FLAGS] // get flags
  866. - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
  867. - bl el1_preempt
  868. + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
  869. +
  870. + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count
  871. + cbnz w24, 2f // preempt lazy count != 0
  872. + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling?
  873. 1:
  874. + bl el1_preempt
  875. +2:
  876. #endif
  877. #ifdef CONFIG_TRACE_IRQFLAGS
  878. bl trace_hardirqs_on
  879. @@ -452,6 +457,7 @@
  880. 1: bl preempt_schedule_irq // irq en/disable is done inside
  881. ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
  882. tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
  883. + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling?
  884. ret x24
  885. #endif
  886. @@ -708,6 +714,7 @@
  887. */
  888. work_pending:
  889. tbnz x1, #TIF_NEED_RESCHED, work_resched
  890. + tbnz x1, #TIF_NEED_RESCHED_LAZY, work_resched
  891. /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
  892. mov x0, sp // 'regs'
  893. enable_irq // enable interrupts for do_notify_resume()
  894. diff -Nur linux-4.8.15.orig/arch/Kconfig linux-4.8.15/arch/Kconfig
  895. --- linux-4.8.15.orig/arch/Kconfig 2016-12-15 17:50:48.000000000 +0100
  896. +++ linux-4.8.15/arch/Kconfig 2017-01-01 17:07:11.431130672 +0100
  897. @@ -9,6 +9,7 @@
  898. tristate "OProfile system profiling"
  899. depends on PROFILING
  900. depends on HAVE_OPROFILE
  901. + depends on !PREEMPT_RT_FULL
  902. select RING_BUFFER
  903. select RING_BUFFER_ALLOW_SWAP
  904. help
  905. @@ -52,6 +53,7 @@
  906. config JUMP_LABEL
  907. bool "Optimize very unlikely/likely branches"
  908. depends on HAVE_ARCH_JUMP_LABEL
  909. + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST)
  910. help
  911. This option enables a transparent branch optimization that
  912. makes certain almost-always-true or almost-always-false branch
  913. diff -Nur linux-4.8.15.orig/arch/mips/Kconfig linux-4.8.15/arch/mips/Kconfig
  914. --- linux-4.8.15.orig/arch/mips/Kconfig 2016-12-15 17:50:48.000000000 +0100
  915. +++ linux-4.8.15/arch/mips/Kconfig 2017-01-01 17:07:12.275185025 +0100
  916. @@ -2480,7 +2480,7 @@
  917. #
  918. config HIGHMEM
  919. bool "High Memory Support"
  920. - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA
  921. + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL
  922. config CPU_SUPPORTS_HIGHMEM
  923. bool
  924. diff -Nur linux-4.8.15.orig/arch/powerpc/include/asm/thread_info.h linux-4.8.15/arch/powerpc/include/asm/thread_info.h
  925. --- linux-4.8.15.orig/arch/powerpc/include/asm/thread_info.h 2016-12-15 17:50:48.000000000 +0100
  926. +++ linux-4.8.15/arch/powerpc/include/asm/thread_info.h 2017-01-01 17:07:12.499199453 +0100
  927. @@ -43,6 +43,8 @@
  928. int cpu; /* cpu we're on */
  929. int preempt_count; /* 0 => preemptable,
  930. <0 => BUG */
  931. + int preempt_lazy_count; /* 0 => preemptable,
  932. + <0 => BUG */
  933. unsigned long local_flags; /* private flags for thread */
  934. #ifdef CONFIG_LIVEPATCH
  935. unsigned long *livepatch_sp;
  936. @@ -88,8 +90,7 @@
  937. #define TIF_SYSCALL_TRACE 0 /* syscall trace active */
  938. #define TIF_SIGPENDING 1 /* signal pending */
  939. #define TIF_NEED_RESCHED 2 /* rescheduling necessary */
  940. -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
  941. - TIF_NEED_RESCHED */
  942. +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */
  943. #define TIF_32BIT 4 /* 32 bit binary */
  944. #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
  945. #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
  946. @@ -107,6 +108,8 @@
  947. #if defined(CONFIG_PPC64)
  948. #define TIF_ELF2ABI 18 /* function descriptors must die! */
  949. #endif
  950. +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling
  951. + TIF_NEED_RESCHED */
  952. /* as above, but as bit values */
  953. #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
  954. @@ -125,14 +128,16 @@
  955. #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
  956. #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
  957. #define _TIF_NOHZ (1<<TIF_NOHZ)
  958. +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
  959. #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
  960. _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
  961. _TIF_NOHZ)
  962. #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
  963. _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  964. - _TIF_RESTORE_TM)
  965. + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY)
  966. #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
  967. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  968. /* Bits in local_flags */
  969. /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
  970. diff -Nur linux-4.8.15.orig/arch/powerpc/Kconfig linux-4.8.15/arch/powerpc/Kconfig
  971. --- linux-4.8.15.orig/arch/powerpc/Kconfig 2016-12-15 17:50:48.000000000 +0100
  972. +++ linux-4.8.15/arch/powerpc/Kconfig 2017-01-01 17:07:12.435195331 +0100
  973. @@ -57,10 +57,11 @@
  974. config RWSEM_GENERIC_SPINLOCK
  975. bool
  976. + default y if PREEMPT_RT_FULL
  977. config RWSEM_XCHGADD_ALGORITHM
  978. bool
  979. - default y
  980. + default y if !PREEMPT_RT_FULL
  981. config GENERIC_LOCKBREAK
  982. bool
  983. @@ -140,6 +141,7 @@
  984. select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
  985. select GENERIC_STRNCPY_FROM_USER
  986. select GENERIC_STRNLEN_USER
  987. + select HAVE_PREEMPT_LAZY
  988. select HAVE_MOD_ARCH_SPECIFIC
  989. select MODULES_USE_ELF_RELA
  990. select CLONE_BACKWARDS
  991. @@ -326,7 +328,7 @@
  992. config HIGHMEM
  993. bool "High memory support"
  994. - depends on PPC32
  995. + depends on PPC32 && !PREEMPT_RT_FULL
  996. source kernel/Kconfig.hz
  997. source kernel/Kconfig.preempt
  998. diff -Nur linux-4.8.15.orig/arch/powerpc/kernel/asm-offsets.c linux-4.8.15/arch/powerpc/kernel/asm-offsets.c
  999. --- linux-4.8.15.orig/arch/powerpc/kernel/asm-offsets.c 2016-12-15 17:50:48.000000000 +0100
  1000. +++ linux-4.8.15/arch/powerpc/kernel/asm-offsets.c 2017-01-01 17:07:12.519200744 +0100
  1001. @@ -156,6 +156,7 @@
  1002. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  1003. DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
  1004. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  1005. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  1006. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  1007. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  1008. diff -Nur linux-4.8.15.orig/arch/powerpc/kernel/entry_32.S linux-4.8.15/arch/powerpc/kernel/entry_32.S
  1009. --- linux-4.8.15.orig/arch/powerpc/kernel/entry_32.S 2016-12-15 17:50:48.000000000 +0100
  1010. +++ linux-4.8.15/arch/powerpc/kernel/entry_32.S 2017-01-01 17:07:12.531201514 +0100
  1011. @@ -835,7 +835,14 @@
  1012. cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
  1013. bne restore
  1014. andi. r8,r8,_TIF_NEED_RESCHED
  1015. + bne+ 1f
  1016. + lwz r0,TI_PREEMPT_LAZY(r9)
  1017. + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
  1018. + bne restore
  1019. + lwz r0,TI_FLAGS(r9)
  1020. + andi. r0,r0,_TIF_NEED_RESCHED_LAZY
  1021. beq+ restore
  1022. +1:
  1023. lwz r3,_MSR(r1)
  1024. andi. r0,r3,MSR_EE /* interrupts off? */
  1025. beq restore /* don't schedule if so */
  1026. @@ -846,11 +853,11 @@
  1027. */
  1028. bl trace_hardirqs_off
  1029. #endif
  1030. -1: bl preempt_schedule_irq
  1031. +2: bl preempt_schedule_irq
  1032. CURRENT_THREAD_INFO(r9, r1)
  1033. lwz r3,TI_FLAGS(r9)
  1034. - andi. r0,r3,_TIF_NEED_RESCHED
  1035. - bne- 1b
  1036. + andi. r0,r3,_TIF_NEED_RESCHED_MASK
  1037. + bne- 2b
  1038. #ifdef CONFIG_TRACE_IRQFLAGS
  1039. /* And now, to properly rebalance the above, we tell lockdep they
  1040. * are being turned back on, which will happen when we return
  1041. @@ -1171,7 +1178,7 @@
  1042. #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
  1043. do_work: /* r10 contains MSR_KERNEL here */
  1044. - andi. r0,r9,_TIF_NEED_RESCHED
  1045. + andi. r0,r9,_TIF_NEED_RESCHED_MASK
  1046. beq do_user_signal
  1047. do_resched: /* r10 contains MSR_KERNEL here */
  1048. @@ -1192,7 +1199,7 @@
  1049. MTMSRD(r10) /* disable interrupts */
  1050. CURRENT_THREAD_INFO(r9, r1)
  1051. lwz r9,TI_FLAGS(r9)
  1052. - andi. r0,r9,_TIF_NEED_RESCHED
  1053. + andi. r0,r9,_TIF_NEED_RESCHED_MASK
  1054. bne- do_resched
  1055. andi. r0,r9,_TIF_USER_WORK_MASK
  1056. beq restore_user
  1057. diff -Nur linux-4.8.15.orig/arch/powerpc/kernel/entry_64.S linux-4.8.15/arch/powerpc/kernel/entry_64.S
  1058. --- linux-4.8.15.orig/arch/powerpc/kernel/entry_64.S 2016-12-15 17:50:48.000000000 +0100
  1059. +++ linux-4.8.15/arch/powerpc/kernel/entry_64.S 2017-01-01 17:07:12.535201773 +0100
  1060. @@ -657,7 +657,7 @@
  1061. bl restore_math
  1062. b restore
  1063. #endif
  1064. -1: andi. r0,r4,_TIF_NEED_RESCHED
  1065. +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK
  1066. beq 2f
  1067. bl restore_interrupts
  1068. SCHEDULE_USER
  1069. @@ -719,10 +719,18 @@
  1070. #ifdef CONFIG_PREEMPT
  1071. /* Check if we need to preempt */
  1072. + lwz r8,TI_PREEMPT(r9)
  1073. + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */
  1074. + bne restore
  1075. andi. r0,r4,_TIF_NEED_RESCHED
  1076. + bne+ check_count
  1077. +
  1078. + andi. r0,r4,_TIF_NEED_RESCHED_LAZY
  1079. beq+ restore
  1080. + lwz r8,TI_PREEMPT_LAZY(r9)
  1081. +
  1082. /* Check that preempt_count() == 0 and interrupts are enabled */
  1083. - lwz r8,TI_PREEMPT(r9)
  1084. +check_count:
  1085. cmpwi cr1,r8,0
  1086. ld r0,SOFTE(r1)
  1087. cmpdi r0,0
  1088. @@ -739,7 +747,7 @@
  1089. /* Re-test flags and eventually loop */
  1090. CURRENT_THREAD_INFO(r9, r1)
  1091. ld r4,TI_FLAGS(r9)
  1092. - andi. r0,r4,_TIF_NEED_RESCHED
  1093. + andi. r0,r4,_TIF_NEED_RESCHED_MASK
  1094. bne 1b
  1095. /*
  1096. diff -Nur linux-4.8.15.orig/arch/powerpc/kernel/irq.c linux-4.8.15/arch/powerpc/kernel/irq.c
  1097. --- linux-4.8.15.orig/arch/powerpc/kernel/irq.c 2016-12-15 17:50:48.000000000 +0100
  1098. +++ linux-4.8.15/arch/powerpc/kernel/irq.c 2017-01-01 17:07:12.539202039 +0100
  1099. @@ -633,6 +633,7 @@
  1100. }
  1101. }
  1102. +#ifndef CONFIG_PREEMPT_RT_FULL
  1103. void do_softirq_own_stack(void)
  1104. {
  1105. struct thread_info *curtp, *irqtp;
  1106. @@ -650,6 +651,7 @@
  1107. if (irqtp->flags)
  1108. set_bits(irqtp->flags, &curtp->flags);
  1109. }
  1110. +#endif
  1111. irq_hw_number_t virq_to_hw(unsigned int virq)
  1112. {
  1113. diff -Nur linux-4.8.15.orig/arch/powerpc/kernel/misc_32.S linux-4.8.15/arch/powerpc/kernel/misc_32.S
  1114. --- linux-4.8.15.orig/arch/powerpc/kernel/misc_32.S 2016-12-15 17:50:48.000000000 +0100
  1115. +++ linux-4.8.15/arch/powerpc/kernel/misc_32.S 2017-01-01 17:07:12.543202293 +0100
  1116. @@ -40,6 +40,7 @@
  1117. * We store the saved ksp_limit in the unused part
  1118. * of the STACK_FRAME_OVERHEAD
  1119. */
  1120. +#ifndef CONFIG_PREEMPT_RT_FULL
  1121. _GLOBAL(call_do_softirq)
  1122. mflr r0
  1123. stw r0,4(r1)
  1124. @@ -56,6 +57,7 @@
  1125. stw r10,THREAD+KSP_LIMIT(r2)
  1126. mtlr r0
  1127. blr
  1128. +#endif
  1129. /*
  1130. * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
  1131. diff -Nur linux-4.8.15.orig/arch/powerpc/kernel/misc_64.S linux-4.8.15/arch/powerpc/kernel/misc_64.S
  1132. --- linux-4.8.15.orig/arch/powerpc/kernel/misc_64.S 2016-12-15 17:50:48.000000000 +0100
  1133. +++ linux-4.8.15/arch/powerpc/kernel/misc_64.S 2017-01-01 17:07:12.543202293 +0100
  1134. @@ -30,6 +30,7 @@
  1135. .text
  1136. +#ifndef CONFIG_PREEMPT_RT_FULL
  1137. _GLOBAL(call_do_softirq)
  1138. mflr r0
  1139. std r0,16(r1)
  1140. @@ -40,6 +41,7 @@
  1141. ld r0,16(r1)
  1142. mtlr r0
  1143. blr
  1144. +#endif
  1145. _GLOBAL(call_do_irq)
  1146. mflr r0
  1147. diff -Nur linux-4.8.15.orig/arch/powerpc/kvm/Kconfig linux-4.8.15/arch/powerpc/kvm/Kconfig
  1148. --- linux-4.8.15.orig/arch/powerpc/kvm/Kconfig 2016-12-15 17:50:48.000000000 +0100
  1149. +++ linux-4.8.15/arch/powerpc/kvm/Kconfig 2017-01-01 17:07:12.587205132 +0100
  1150. @@ -172,6 +172,7 @@
  1151. config KVM_MPIC
  1152. bool "KVM in-kernel MPIC emulation"
  1153. depends on KVM && E500
  1154. + depends on !PREEMPT_RT_FULL
  1155. select HAVE_KVM_IRQCHIP
  1156. select HAVE_KVM_IRQFD
  1157. select HAVE_KVM_IRQ_ROUTING
  1158. diff -Nur linux-4.8.15.orig/arch/powerpc/platforms/ps3/device-init.c linux-4.8.15/arch/powerpc/platforms/ps3/device-init.c
  1159. --- linux-4.8.15.orig/arch/powerpc/platforms/ps3/device-init.c 2016-12-15 17:50:48.000000000 +0100
  1160. +++ linux-4.8.15/arch/powerpc/platforms/ps3/device-init.c 2017-01-01 17:07:12.623207443 +0100
  1161. @@ -752,7 +752,7 @@
  1162. }
  1163. pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op);
  1164. - res = wait_event_interruptible(dev->done.wait,
  1165. + res = swait_event_interruptible(dev->done.wait,
  1166. dev->done.done || kthread_should_stop());
  1167. if (kthread_should_stop())
  1168. res = -EINTR;
  1169. diff -Nur linux-4.8.15.orig/arch/sh/kernel/irq.c linux-4.8.15/arch/sh/kernel/irq.c
  1170. --- linux-4.8.15.orig/arch/sh/kernel/irq.c 2016-12-15 17:50:48.000000000 +0100
  1171. +++ linux-4.8.15/arch/sh/kernel/irq.c 2017-01-01 17:07:12.687211566 +0100
  1172. @@ -147,6 +147,7 @@
  1173. hardirq_ctx[cpu] = NULL;
  1174. }
  1175. +#ifndef CONFIG_PREEMPT_RT_FULL
  1176. void do_softirq_own_stack(void)
  1177. {
  1178. struct thread_info *curctx;
  1179. @@ -174,6 +175,7 @@
  1180. "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
  1181. );
  1182. }
  1183. +#endif
  1184. #else
  1185. static inline void handle_one_irq(unsigned int irq)
  1186. {
  1187. diff -Nur linux-4.8.15.orig/arch/sparc/Kconfig linux-4.8.15/arch/sparc/Kconfig
  1188. --- linux-4.8.15.orig/arch/sparc/Kconfig 2016-12-15 17:50:48.000000000 +0100
  1189. +++ linux-4.8.15/arch/sparc/Kconfig 2017-01-01 17:07:12.723213881 +0100
  1190. @@ -187,12 +187,10 @@
  1191. source kernel/Kconfig.hz
  1192. config RWSEM_GENERIC_SPINLOCK
  1193. - bool
  1194. - default y if SPARC32
  1195. + def_bool PREEMPT_RT_FULL
  1196. config RWSEM_XCHGADD_ALGORITHM
  1197. - bool
  1198. - default y if SPARC64
  1199. + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
  1200. config GENERIC_HWEIGHT
  1201. bool
  1202. diff -Nur linux-4.8.15.orig/arch/sparc/kernel/irq_64.c linux-4.8.15/arch/sparc/kernel/irq_64.c
  1203. --- linux-4.8.15.orig/arch/sparc/kernel/irq_64.c 2016-12-15 17:50:48.000000000 +0100
  1204. +++ linux-4.8.15/arch/sparc/kernel/irq_64.c 2017-01-01 17:07:12.763216468 +0100
  1205. @@ -854,6 +854,7 @@
  1206. set_irq_regs(old_regs);
  1207. }
  1208. +#ifndef CONFIG_PREEMPT_RT_FULL
  1209. void do_softirq_own_stack(void)
  1210. {
  1211. void *orig_sp, *sp = softirq_stack[smp_processor_id()];
  1212. @@ -868,6 +869,7 @@
  1213. __asm__ __volatile__("mov %0, %%sp"
  1214. : : "r" (orig_sp));
  1215. }
  1216. +#endif
  1217. #ifdef CONFIG_HOTPLUG_CPU
  1218. void fixup_irqs(void)
  1219. diff -Nur linux-4.8.15.orig/arch/x86/crypto/aesni-intel_glue.c linux-4.8.15/arch/x86/crypto/aesni-intel_glue.c
  1220. --- linux-4.8.15.orig/arch/x86/crypto/aesni-intel_glue.c 2016-12-15 17:50:48.000000000 +0100
  1221. +++ linux-4.8.15/arch/x86/crypto/aesni-intel_glue.c 2017-01-01 17:07:12.971229856 +0100
  1222. @@ -372,14 +372,14 @@
  1223. err = blkcipher_walk_virt(desc, &walk);
  1224. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1225. - kernel_fpu_begin();
  1226. while ((nbytes = walk.nbytes)) {
  1227. + kernel_fpu_begin();
  1228. aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1229. - nbytes & AES_BLOCK_MASK);
  1230. + nbytes & AES_BLOCK_MASK);
  1231. + kernel_fpu_end();
  1232. nbytes &= AES_BLOCK_SIZE - 1;
  1233. err = blkcipher_walk_done(desc, &walk, nbytes);
  1234. }
  1235. - kernel_fpu_end();
  1236. return err;
  1237. }
  1238. @@ -396,14 +396,14 @@
  1239. err = blkcipher_walk_virt(desc, &walk);
  1240. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1241. - kernel_fpu_begin();
  1242. while ((nbytes = walk.nbytes)) {
  1243. + kernel_fpu_begin();
  1244. aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1245. nbytes & AES_BLOCK_MASK);
  1246. + kernel_fpu_end();
  1247. nbytes &= AES_BLOCK_SIZE - 1;
  1248. err = blkcipher_walk_done(desc, &walk, nbytes);
  1249. }
  1250. - kernel_fpu_end();
  1251. return err;
  1252. }
  1253. @@ -420,14 +420,14 @@
  1254. err = blkcipher_walk_virt(desc, &walk);
  1255. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1256. - kernel_fpu_begin();
  1257. while ((nbytes = walk.nbytes)) {
  1258. + kernel_fpu_begin();
  1259. aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1260. nbytes & AES_BLOCK_MASK, walk.iv);
  1261. + kernel_fpu_end();
  1262. nbytes &= AES_BLOCK_SIZE - 1;
  1263. err = blkcipher_walk_done(desc, &walk, nbytes);
  1264. }
  1265. - kernel_fpu_end();
  1266. return err;
  1267. }
  1268. @@ -444,14 +444,14 @@
  1269. err = blkcipher_walk_virt(desc, &walk);
  1270. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1271. - kernel_fpu_begin();
  1272. while ((nbytes = walk.nbytes)) {
  1273. + kernel_fpu_begin();
  1274. aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1275. nbytes & AES_BLOCK_MASK, walk.iv);
  1276. + kernel_fpu_end();
  1277. nbytes &= AES_BLOCK_SIZE - 1;
  1278. err = blkcipher_walk_done(desc, &walk, nbytes);
  1279. }
  1280. - kernel_fpu_end();
  1281. return err;
  1282. }
  1283. @@ -503,18 +503,20 @@
  1284. err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
  1285. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1286. - kernel_fpu_begin();
  1287. while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
  1288. + kernel_fpu_begin();
  1289. aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1290. nbytes & AES_BLOCK_MASK, walk.iv);
  1291. + kernel_fpu_end();
  1292. nbytes &= AES_BLOCK_SIZE - 1;
  1293. err = blkcipher_walk_done(desc, &walk, nbytes);
  1294. }
  1295. if (walk.nbytes) {
  1296. + kernel_fpu_begin();
  1297. ctr_crypt_final(ctx, &walk);
  1298. + kernel_fpu_end();
  1299. err = blkcipher_walk_done(desc, &walk, 0);
  1300. }
  1301. - kernel_fpu_end();
  1302. return err;
  1303. }
  1304. diff -Nur linux-4.8.15.orig/arch/x86/crypto/cast5_avx_glue.c linux-4.8.15/arch/x86/crypto/cast5_avx_glue.c
  1305. --- linux-4.8.15.orig/arch/x86/crypto/cast5_avx_glue.c 2016-12-15 17:50:48.000000000 +0100
  1306. +++ linux-4.8.15/arch/x86/crypto/cast5_avx_glue.c 2017-01-01 17:07:12.979230370 +0100
  1307. @@ -59,7 +59,7 @@
  1308. static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
  1309. bool enc)
  1310. {
  1311. - bool fpu_enabled = false;
  1312. + bool fpu_enabled;
  1313. struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  1314. const unsigned int bsize = CAST5_BLOCK_SIZE;
  1315. unsigned int nbytes;
  1316. @@ -75,7 +75,7 @@
  1317. u8 *wsrc = walk->src.virt.addr;
  1318. u8 *wdst = walk->dst.virt.addr;
  1319. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1320. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1321. /* Process multi-block batch */
  1322. if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
  1323. @@ -103,10 +103,9 @@
  1324. } while (nbytes >= bsize);
  1325. done:
  1326. + cast5_fpu_end(fpu_enabled);
  1327. err = blkcipher_walk_done(desc, walk, nbytes);
  1328. }
  1329. -
  1330. - cast5_fpu_end(fpu_enabled);
  1331. return err;
  1332. }
  1333. @@ -227,7 +226,7 @@
  1334. static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  1335. struct scatterlist *src, unsigned int nbytes)
  1336. {
  1337. - bool fpu_enabled = false;
  1338. + bool fpu_enabled;
  1339. struct blkcipher_walk walk;
  1340. int err;
  1341. @@ -236,12 +235,11 @@
  1342. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1343. while ((nbytes = walk.nbytes)) {
  1344. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1345. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1346. nbytes = __cbc_decrypt(desc, &walk);
  1347. + cast5_fpu_end(fpu_enabled);
  1348. err = blkcipher_walk_done(desc, &walk, nbytes);
  1349. }
  1350. -
  1351. - cast5_fpu_end(fpu_enabled);
  1352. return err;
  1353. }
  1354. @@ -311,7 +309,7 @@
  1355. static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  1356. struct scatterlist *src, unsigned int nbytes)
  1357. {
  1358. - bool fpu_enabled = false;
  1359. + bool fpu_enabled;
  1360. struct blkcipher_walk walk;
  1361. int err;
  1362. @@ -320,13 +318,12 @@
  1363. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1364. while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
  1365. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1366. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1367. nbytes = __ctr_crypt(desc, &walk);
  1368. + cast5_fpu_end(fpu_enabled);
  1369. err = blkcipher_walk_done(desc, &walk, nbytes);
  1370. }
  1371. - cast5_fpu_end(fpu_enabled);
  1372. -
  1373. if (walk.nbytes) {
  1374. ctr_crypt_final(desc, &walk);
  1375. err = blkcipher_walk_done(desc, &walk, 0);
  1376. diff -Nur linux-4.8.15.orig/arch/x86/crypto/glue_helper.c linux-4.8.15/arch/x86/crypto/glue_helper.c
  1377. --- linux-4.8.15.orig/arch/x86/crypto/glue_helper.c 2016-12-15 17:50:48.000000000 +0100
  1378. +++ linux-4.8.15/arch/x86/crypto/glue_helper.c 2017-01-01 17:07:12.983230644 +0100
  1379. @@ -39,7 +39,7 @@
  1380. void *ctx = crypto_blkcipher_ctx(desc->tfm);
  1381. const unsigned int bsize = 128 / 8;
  1382. unsigned int nbytes, i, func_bytes;
  1383. - bool fpu_enabled = false;
  1384. + bool fpu_enabled;
  1385. int err;
  1386. err = blkcipher_walk_virt(desc, walk);
  1387. @@ -49,7 +49,7 @@
  1388. u8 *wdst = walk->dst.virt.addr;
  1389. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1390. - desc, fpu_enabled, nbytes);
  1391. + desc, false, nbytes);
  1392. for (i = 0; i < gctx->num_funcs; i++) {
  1393. func_bytes = bsize * gctx->funcs[i].num_blocks;
  1394. @@ -71,10 +71,10 @@
  1395. }
  1396. done:
  1397. + glue_fpu_end(fpu_enabled);
  1398. err = blkcipher_walk_done(desc, walk, nbytes);
  1399. }
  1400. - glue_fpu_end(fpu_enabled);
  1401. return err;
  1402. }
  1403. @@ -194,7 +194,7 @@
  1404. struct scatterlist *src, unsigned int nbytes)
  1405. {
  1406. const unsigned int bsize = 128 / 8;
  1407. - bool fpu_enabled = false;
  1408. + bool fpu_enabled;
  1409. struct blkcipher_walk walk;
  1410. int err;
  1411. @@ -203,12 +203,12 @@
  1412. while ((nbytes = walk.nbytes)) {
  1413. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1414. - desc, fpu_enabled, nbytes);
  1415. + desc, false, nbytes);
  1416. nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
  1417. + glue_fpu_end(fpu_enabled);
  1418. err = blkcipher_walk_done(desc, &walk, nbytes);
  1419. }
  1420. - glue_fpu_end(fpu_enabled);
  1421. return err;
  1422. }
  1423. EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
  1424. @@ -277,7 +277,7 @@
  1425. struct scatterlist *src, unsigned int nbytes)
  1426. {
  1427. const unsigned int bsize = 128 / 8;
  1428. - bool fpu_enabled = false;
  1429. + bool fpu_enabled;
  1430. struct blkcipher_walk walk;
  1431. int err;
  1432. @@ -286,13 +286,12 @@
  1433. while ((nbytes = walk.nbytes) >= bsize) {
  1434. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1435. - desc, fpu_enabled, nbytes);
  1436. + desc, false, nbytes);
  1437. nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
  1438. + glue_fpu_end(fpu_enabled);
  1439. err = blkcipher_walk_done(desc, &walk, nbytes);
  1440. }
  1441. - glue_fpu_end(fpu_enabled);
  1442. -
  1443. if (walk.nbytes) {
  1444. glue_ctr_crypt_final_128bit(
  1445. gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
  1446. @@ -347,7 +346,7 @@
  1447. void *tweak_ctx, void *crypt_ctx)
  1448. {
  1449. const unsigned int bsize = 128 / 8;
  1450. - bool fpu_enabled = false;
  1451. + bool fpu_enabled;
  1452. struct blkcipher_walk walk;
  1453. int err;
  1454. @@ -360,21 +359,21 @@
  1455. /* set minimum length to bsize, for tweak_fn */
  1456. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1457. - desc, fpu_enabled,
  1458. + desc, false,
  1459. nbytes < bsize ? bsize : nbytes);
  1460. -
  1461. /* calculate first value of T */
  1462. tweak_fn(tweak_ctx, walk.iv, walk.iv);
  1463. + glue_fpu_end(fpu_enabled);
  1464. while (nbytes) {
  1465. + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1466. + desc, false, nbytes);
  1467. nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
  1468. + glue_fpu_end(fpu_enabled);
  1469. err = blkcipher_walk_done(desc, &walk, nbytes);
  1470. nbytes = walk.nbytes;
  1471. }
  1472. -
  1473. - glue_fpu_end(fpu_enabled);
  1474. -
  1475. return err;
  1476. }
  1477. EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
  1478. diff -Nur linux-4.8.15.orig/arch/x86/entry/common.c linux-4.8.15/arch/x86/entry/common.c
  1479. --- linux-4.8.15.orig/arch/x86/entry/common.c 2016-12-15 17:50:48.000000000 +0100
  1480. +++ linux-4.8.15/arch/x86/entry/common.c 2017-01-01 17:07:13.071236305 +0100
  1481. @@ -136,7 +136,7 @@
  1482. #define EXIT_TO_USERMODE_LOOP_FLAGS \
  1483. (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  1484. - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY)
  1485. + _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY)
  1486. static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
  1487. {
  1488. @@ -152,9 +152,16 @@
  1489. /* We have work to do. */
  1490. local_irq_enable();
  1491. - if (cached_flags & _TIF_NEED_RESCHED)
  1492. + if (cached_flags & _TIF_NEED_RESCHED_MASK)
  1493. schedule();
  1494. +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
  1495. + if (unlikely(current->forced_info.si_signo)) {
  1496. + struct task_struct *t = current;
  1497. + force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
  1498. + t->forced_info.si_signo = 0;
  1499. + }
  1500. +#endif
  1501. if (cached_flags & _TIF_UPROBE)
  1502. uprobe_notify_resume(regs);
  1503. diff -Nur linux-4.8.15.orig/arch/x86/entry/entry_32.S linux-4.8.15/arch/x86/entry/entry_32.S
  1504. --- linux-4.8.15.orig/arch/x86/entry/entry_32.S 2016-12-15 17:50:48.000000000 +0100
  1505. +++ linux-4.8.15/arch/x86/entry/entry_32.S 2017-01-01 17:07:13.071236305 +0100
  1506. @@ -271,8 +271,25 @@
  1507. ENTRY(resume_kernel)
  1508. DISABLE_INTERRUPTS(CLBR_ANY)
  1509. need_resched:
  1510. + # preempt count == 0 + NEED_RS set?
  1511. cmpl $0, PER_CPU_VAR(__preempt_count)
  1512. +#ifndef CONFIG_PREEMPT_LAZY
  1513. jnz restore_all
  1514. +#else
  1515. + jz test_int_off
  1516. +
  1517. + # atleast preempt count == 0 ?
  1518. + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
  1519. + jne restore_all
  1520. +
  1521. + GET_THREAD_INFO(%ebp)
  1522. + cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ?
  1523. + jnz restore_all
  1524. +
  1525. + testl $_TIF_NEED_RESCHED_LAZY, TI_flags(%ebp)
  1526. + jz restore_all
  1527. +test_int_off:
  1528. +#endif
  1529. testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
  1530. jz restore_all
  1531. call preempt_schedule_irq
  1532. diff -Nur linux-4.8.15.orig/arch/x86/entry/entry_64.S linux-4.8.15/arch/x86/entry/entry_64.S
  1533. --- linux-4.8.15.orig/arch/x86/entry/entry_64.S 2016-12-15 17:50:48.000000000 +0100
  1534. +++ linux-4.8.15/arch/x86/entry/entry_64.S 2017-01-01 17:07:13.071236305 +0100
  1535. @@ -512,7 +512,23 @@
  1536. bt $9, EFLAGS(%rsp) /* were interrupts off? */
  1537. jnc 1f
  1538. 0: cmpl $0, PER_CPU_VAR(__preempt_count)
  1539. +#ifndef CONFIG_PREEMPT_LAZY
  1540. jnz 1f
  1541. +#else
  1542. + jz do_preempt_schedule_irq
  1543. +
  1544. + # atleast preempt count == 0 ?
  1545. + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
  1546. + jnz 1f
  1547. +
  1548. + GET_THREAD_INFO(%rcx)
  1549. + cmpl $0, TI_preempt_lazy_count(%rcx)
  1550. + jnz 1f
  1551. +
  1552. + bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx)
  1553. + jnc 1f
  1554. +do_preempt_schedule_irq:
  1555. +#endif
  1556. call preempt_schedule_irq
  1557. jmp 0b
  1558. 1:
  1559. @@ -817,6 +833,7 @@
  1560. jmp 2b
  1561. .previous
  1562. +#ifndef CONFIG_PREEMPT_RT_FULL
  1563. /* Call softirq on interrupt stack. Interrupts are off. */
  1564. ENTRY(do_softirq_own_stack)
  1565. pushq %rbp
  1566. @@ -829,6 +846,7 @@
  1567. decl PER_CPU_VAR(irq_count)
  1568. ret
  1569. END(do_softirq_own_stack)
  1570. +#endif
  1571. #ifdef CONFIG_XEN
  1572. idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
  1573. diff -Nur linux-4.8.15.orig/arch/x86/include/asm/preempt.h linux-4.8.15/arch/x86/include/asm/preempt.h
  1574. --- linux-4.8.15.orig/arch/x86/include/asm/preempt.h 2016-12-15 17:50:48.000000000 +0100
  1575. +++ linux-4.8.15/arch/x86/include/asm/preempt.h 2017-01-01 17:07:13.123239646 +0100
  1576. @@ -79,17 +79,46 @@
  1577. * a decrement which hits zero means we have no preempt_count and should
  1578. * reschedule.
  1579. */
  1580. -static __always_inline bool __preempt_count_dec_and_test(void)
  1581. +static __always_inline bool ____preempt_count_dec_and_test(void)
  1582. {
  1583. GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e);
  1584. }
  1585. +static __always_inline bool __preempt_count_dec_and_test(void)
  1586. +{
  1587. + if (____preempt_count_dec_and_test())
  1588. + return true;
  1589. +#ifdef CONFIG_PREEMPT_LAZY
  1590. + if (current_thread_info()->preempt_lazy_count)
  1591. + return false;
  1592. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  1593. +#else
  1594. + return false;
  1595. +#endif
  1596. +}
  1597. +
  1598. /*
  1599. * Returns true when we need to resched and can (barring IRQ state).
  1600. */
  1601. static __always_inline bool should_resched(int preempt_offset)
  1602. {
  1603. +#ifdef CONFIG_PREEMPT_LAZY
  1604. + u32 tmp;
  1605. +
  1606. + tmp = raw_cpu_read_4(__preempt_count);
  1607. + if (tmp == preempt_offset)
  1608. + return true;
  1609. +
  1610. + /* preempt count == 0 ? */
  1611. + tmp &= ~PREEMPT_NEED_RESCHED;
  1612. + if (tmp)
  1613. + return false;
  1614. + if (current_thread_info()->preempt_lazy_count)
  1615. + return false;
  1616. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  1617. +#else
  1618. return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
  1619. +#endif
  1620. }
  1621. #ifdef CONFIG_PREEMPT
  1622. diff -Nur linux-4.8.15.orig/arch/x86/include/asm/signal.h linux-4.8.15/arch/x86/include/asm/signal.h
  1623. --- linux-4.8.15.orig/arch/x86/include/asm/signal.h 2016-12-15 17:50:48.000000000 +0100
  1624. +++ linux-4.8.15/arch/x86/include/asm/signal.h 2017-01-01 17:07:13.123239646 +0100
  1625. @@ -23,6 +23,19 @@
  1626. unsigned long sig[_NSIG_WORDS];
  1627. } sigset_t;
  1628. +/*
  1629. + * Because some traps use the IST stack, we must keep preemption
  1630. + * disabled while calling do_trap(), but do_trap() may call
  1631. + * force_sig_info() which will grab the signal spin_locks for the
  1632. + * task, which in PREEMPT_RT_FULL are mutexes. By defining
  1633. + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
  1634. + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
  1635. + * trap.
  1636. + */
  1637. +#if defined(CONFIG_PREEMPT_RT_FULL)
  1638. +#define ARCH_RT_DELAYS_SIGNAL_SEND
  1639. +#endif
  1640. +
  1641. #ifndef CONFIG_COMPAT
  1642. typedef sigset_t compat_sigset_t;
  1643. #endif
  1644. diff -Nur linux-4.8.15.orig/arch/x86/include/asm/stackprotector.h linux-4.8.15/arch/x86/include/asm/stackprotector.h
  1645. --- linux-4.8.15.orig/arch/x86/include/asm/stackprotector.h 2016-12-15 17:50:48.000000000 +0100
  1646. +++ linux-4.8.15/arch/x86/include/asm/stackprotector.h 2017-01-01 17:07:13.127239912 +0100
  1647. @@ -59,7 +59,7 @@
  1648. */
  1649. static __always_inline void boot_init_stack_canary(void)
  1650. {
  1651. - u64 canary;
  1652. + u64 uninitialized_var(canary);
  1653. u64 tsc;
  1654. #ifdef CONFIG_X86_64
  1655. @@ -70,8 +70,15 @@
  1656. * of randomness. The TSC only matters for very early init,
  1657. * there it already has some randomness on most systems. Later
  1658. * on during the bootup the random pool has true entropy too.
  1659. + *
  1660. + * For preempt-rt we need to weaken the randomness a bit, as
  1661. + * we can't call into the random generator from atomic context
  1662. + * due to locking constraints. We just leave canary
  1663. + * uninitialized and use the TSC based randomness on top of it.
  1664. */
  1665. +#ifndef CONFIG_PREEMPT_RT_FULL
  1666. get_random_bytes(&canary, sizeof(canary));
  1667. +#endif
  1668. tsc = rdtsc();
  1669. canary += tsc + (tsc << 32UL);
  1670. diff -Nur linux-4.8.15.orig/arch/x86/include/asm/thread_info.h linux-4.8.15/arch/x86/include/asm/thread_info.h
  1671. --- linux-4.8.15.orig/arch/x86/include/asm/thread_info.h 2016-12-15 17:50:48.000000000 +0100
  1672. +++ linux-4.8.15/arch/x86/include/asm/thread_info.h 2017-01-01 17:07:13.127239912 +0100
  1673. @@ -57,6 +57,8 @@
  1674. __u32 flags; /* low level flags */
  1675. __u32 status; /* thread synchronous flags */
  1676. __u32 cpu; /* current CPU */
  1677. + int preempt_lazy_count; /* 0 => lazy preemptable
  1678. + <0 => BUG */
  1679. };
  1680. #define INIT_THREAD_INFO(tsk) \
  1681. @@ -73,6 +75,10 @@
  1682. #include <asm/asm-offsets.h>
  1683. +#define GET_THREAD_INFO(reg) \
  1684. + _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
  1685. + _ASM_SUB $(THREAD_SIZE),reg ;
  1686. +
  1687. #endif
  1688. /*
  1689. @@ -91,6 +97,7 @@
  1690. #define TIF_SYSCALL_EMU 6 /* syscall emulation active */
  1691. #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
  1692. #define TIF_SECCOMP 8 /* secure computing */
  1693. +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
  1694. #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
  1695. #define TIF_UPROBE 12 /* breakpointed or singlestepping */
  1696. #define TIF_NOTSC 16 /* TSC is not accessible in userland */
  1697. @@ -115,6 +122,7 @@
  1698. #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
  1699. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  1700. #define _TIF_SECCOMP (1 << TIF_SECCOMP)
  1701. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  1702. #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
  1703. #define _TIF_UPROBE (1 << TIF_UPROBE)
  1704. #define _TIF_NOTSC (1 << TIF_NOTSC)
  1705. @@ -151,6 +159,8 @@
  1706. #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
  1707. #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
  1708. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  1709. +
  1710. #define STACK_WARN (THREAD_SIZE/8)
  1711. /*
  1712. diff -Nur linux-4.8.15.orig/arch/x86/include/asm/uv/uv_bau.h linux-4.8.15/arch/x86/include/asm/uv/uv_bau.h
  1713. --- linux-4.8.15.orig/arch/x86/include/asm/uv/uv_bau.h 2016-12-15 17:50:48.000000000 +0100
  1714. +++ linux-4.8.15/arch/x86/include/asm/uv/uv_bau.h 2017-01-01 17:07:13.127239912 +0100
  1715. @@ -615,9 +615,9 @@
  1716. cycles_t send_message;
  1717. cycles_t period_end;
  1718. cycles_t period_time;
  1719. - spinlock_t uvhub_lock;
  1720. - spinlock_t queue_lock;
  1721. - spinlock_t disable_lock;
  1722. + raw_spinlock_t uvhub_lock;
  1723. + raw_spinlock_t queue_lock;
  1724. + raw_spinlock_t disable_lock;
  1725. /* tunables */
  1726. int max_concurr;
  1727. int max_concurr_const;
  1728. @@ -776,15 +776,15 @@
  1729. * to be lowered below the current 'v'. atomic_add_unless can only stop
  1730. * on equal.
  1731. */
  1732. -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
  1733. +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u)
  1734. {
  1735. - spin_lock(lock);
  1736. + raw_spin_lock(lock);
  1737. if (atomic_read(v) >= u) {
  1738. - spin_unlock(lock);
  1739. + raw_spin_unlock(lock);
  1740. return 0;
  1741. }
  1742. atomic_inc(v);
  1743. - spin_unlock(lock);
  1744. + raw_spin_unlock(lock);
  1745. return 1;
  1746. }
  1747. diff -Nur linux-4.8.15.orig/arch/x86/Kconfig linux-4.8.15/arch/x86/Kconfig
  1748. --- linux-4.8.15.orig/arch/x86/Kconfig 2016-12-15 17:50:48.000000000 +0100
  1749. +++ linux-4.8.15/arch/x86/Kconfig 2017-01-01 17:07:12.915226253 +0100
  1750. @@ -17,6 +17,7 @@
  1751. ### Arch settings
  1752. config X86
  1753. def_bool y
  1754. + select HAVE_PREEMPT_LAZY
  1755. select ACPI_LEGACY_TABLES_LOOKUP if ACPI
  1756. select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
  1757. select ANON_INODES
  1758. @@ -231,8 +232,11 @@
  1759. def_bool y
  1760. depends on ISA_DMA_API
  1761. +config RWSEM_GENERIC_SPINLOCK
  1762. + def_bool PREEMPT_RT_FULL
  1763. +
  1764. config RWSEM_XCHGADD_ALGORITHM
  1765. - def_bool y
  1766. + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
  1767. config GENERIC_CALIBRATE_DELAY
  1768. def_bool y
  1769. @@ -885,7 +889,7 @@
  1770. config MAXSMP
  1771. bool "Enable Maximum number of SMP Processors and NUMA Nodes"
  1772. depends on X86_64 && SMP && DEBUG_KERNEL
  1773. - select CPUMASK_OFFSTACK
  1774. + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
  1775. ---help---
  1776. Enable maximum number of CPUS and NUMA Nodes for this architecture.
  1777. If unsure, say N.
  1778. diff -Nur linux-4.8.15.orig/arch/x86/kernel/acpi/boot.c linux-4.8.15/arch/x86/kernel/acpi/boot.c
  1779. --- linux-4.8.15.orig/arch/x86/kernel/acpi/boot.c 2016-12-15 17:50:48.000000000 +0100
  1780. +++ linux-4.8.15/arch/x86/kernel/acpi/boot.c 2017-01-01 17:07:13.171242740 +0100
  1781. @@ -87,7 +87,9 @@
  1782. * ->ioapic_mutex
  1783. * ->ioapic_lock
  1784. */
  1785. +#ifdef CONFIG_X86_IO_APIC
  1786. static DEFINE_MUTEX(acpi_ioapic_lock);
  1787. +#endif
  1788. /* --------------------------------------------------------------------------
  1789. Boot-time Configuration
  1790. diff -Nur linux-4.8.15.orig/arch/x86/kernel/apic/io_apic.c linux-4.8.15/arch/x86/kernel/apic/io_apic.c
  1791. --- linux-4.8.15.orig/arch/x86/kernel/apic/io_apic.c 2016-12-15 17:50:48.000000000 +0100
  1792. +++ linux-4.8.15/arch/x86/kernel/apic/io_apic.c 2017-01-01 17:07:13.263248666 +0100
  1793. @@ -1712,7 +1712,8 @@
  1794. static inline bool ioapic_irqd_mask(struct irq_data *data)
  1795. {
  1796. /* If we are moving the irq we need to mask it */
  1797. - if (unlikely(irqd_is_setaffinity_pending(data))) {
  1798. + if (unlikely(irqd_is_setaffinity_pending(data) &&
  1799. + !irqd_irq_inprogress(data))) {
  1800. mask_ioapic_irq(data);
  1801. return true;
  1802. }
  1803. diff -Nur linux-4.8.15.orig/arch/x86/kernel/asm-offsets.c linux-4.8.15/arch/x86/kernel/asm-offsets.c
  1804. --- linux-4.8.15.orig/arch/x86/kernel/asm-offsets.c 2016-12-15 17:50:48.000000000 +0100
  1805. +++ linux-4.8.15/arch/x86/kernel/asm-offsets.c 2017-01-01 17:07:13.279249693 +0100
  1806. @@ -31,6 +31,7 @@
  1807. BLANK();
  1808. OFFSET(TI_flags, thread_info, flags);
  1809. OFFSET(TI_status, thread_info, status);
  1810. + OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count);
  1811. BLANK();
  1812. OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
  1813. @@ -88,4 +89,5 @@
  1814. BLANK();
  1815. DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
  1816. + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED);
  1817. }
  1818. diff -Nur linux-4.8.15.orig/arch/x86/kernel/cpu/mcheck/mce.c linux-4.8.15/arch/x86/kernel/cpu/mcheck/mce.c
  1819. --- linux-4.8.15.orig/arch/x86/kernel/cpu/mcheck/mce.c 2016-12-15 17:50:48.000000000 +0100
  1820. +++ linux-4.8.15/arch/x86/kernel/cpu/mcheck/mce.c 2017-01-01 17:07:13.335253297 +0100
  1821. @@ -41,6 +41,8 @@
  1822. #include <linux/debugfs.h>
  1823. #include <linux/irq_work.h>
  1824. #include <linux/export.h>
  1825. +#include <linux/jiffies.h>
  1826. +#include <linux/swork.h>
  1827. #include <asm/processor.h>
  1828. #include <asm/traps.h>
  1829. @@ -1291,7 +1293,7 @@
  1830. static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
  1831. static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
  1832. -static DEFINE_PER_CPU(struct timer_list, mce_timer);
  1833. +static DEFINE_PER_CPU(struct hrtimer, mce_timer);
  1834. static unsigned long mce_adjust_timer_default(unsigned long interval)
  1835. {
  1836. @@ -1300,32 +1302,18 @@
  1837. static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
  1838. -static void __restart_timer(struct timer_list *t, unsigned long interval)
  1839. +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval)
  1840. {
  1841. - unsigned long when = jiffies + interval;
  1842. - unsigned long flags;
  1843. -
  1844. - local_irq_save(flags);
  1845. -
  1846. - if (timer_pending(t)) {
  1847. - if (time_before(when, t->expires))
  1848. - mod_timer(t, when);
  1849. - } else {
  1850. - t->expires = round_jiffies(when);
  1851. - add_timer_on(t, smp_processor_id());
  1852. - }
  1853. -
  1854. - local_irq_restore(flags);
  1855. + if (!interval)
  1856. + return HRTIMER_NORESTART;
  1857. + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval)));
  1858. + return HRTIMER_RESTART;
  1859. }
  1860. -static void mce_timer_fn(unsigned long data)
  1861. +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
  1862. {
  1863. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  1864. - int cpu = smp_processor_id();
  1865. unsigned long iv;
  1866. - WARN_ON(cpu != data);
  1867. -
  1868. iv = __this_cpu_read(mce_next_interval);
  1869. if (mce_available(this_cpu_ptr(&cpu_info))) {
  1870. @@ -1348,7 +1336,7 @@
  1871. done:
  1872. __this_cpu_write(mce_next_interval, iv);
  1873. - __restart_timer(t, iv);
  1874. + return __restart_timer(timer, iv);
  1875. }
  1876. /*
  1877. @@ -1356,7 +1344,7 @@
  1878. */
  1879. void mce_timer_kick(unsigned long interval)
  1880. {
  1881. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  1882. + struct hrtimer *t = this_cpu_ptr(&mce_timer);
  1883. unsigned long iv = __this_cpu_read(mce_next_interval);
  1884. __restart_timer(t, interval);
  1885. @@ -1371,7 +1359,7 @@
  1886. int cpu;
  1887. for_each_online_cpu(cpu)
  1888. - del_timer_sync(&per_cpu(mce_timer, cpu));
  1889. + hrtimer_cancel(&per_cpu(mce_timer, cpu));
  1890. }
  1891. static void mce_do_trigger(struct work_struct *work)
  1892. @@ -1381,6 +1369,56 @@
  1893. static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
  1894. +static void __mce_notify_work(struct swork_event *event)
  1895. +{
  1896. + /* Not more than two messages every minute */
  1897. + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
  1898. +
  1899. + /* wake processes polling /dev/mcelog */
  1900. + wake_up_interruptible(&mce_chrdev_wait);
  1901. +
  1902. + /*
  1903. + * There is no risk of missing notifications because
  1904. + * work_pending is always cleared before the function is
  1905. + * executed.
  1906. + */
  1907. + if (mce_helper[0] && !work_pending(&mce_trigger_work))
  1908. + schedule_work(&mce_trigger_work);
  1909. +
  1910. + if (__ratelimit(&ratelimit))
  1911. + pr_info(HW_ERR "Machine check events logged\n");
  1912. +}
  1913. +
  1914. +#ifdef CONFIG_PREEMPT_RT_FULL
  1915. +static bool notify_work_ready __read_mostly;
  1916. +static struct swork_event notify_work;
  1917. +
  1918. +static int mce_notify_work_init(void)
  1919. +{
  1920. + int err;
  1921. +
  1922. + err = swork_get();
  1923. + if (err)
  1924. + return err;
  1925. +
  1926. + INIT_SWORK(&notify_work, __mce_notify_work);
  1927. + notify_work_ready = true;
  1928. + return 0;
  1929. +}
  1930. +
  1931. +static void mce_notify_work(void)
  1932. +{
  1933. + if (notify_work_ready)
  1934. + swork_queue(&notify_work);
  1935. +}
  1936. +#else
  1937. +static void mce_notify_work(void)
  1938. +{
  1939. + __mce_notify_work(NULL);
  1940. +}
  1941. +static inline int mce_notify_work_init(void) { return 0; }
  1942. +#endif
  1943. +
  1944. /*
  1945. * Notify the user(s) about new machine check events.
  1946. * Can be called from interrupt context, but not from machine check/NMI
  1947. @@ -1388,19 +1426,8 @@
  1948. */
  1949. int mce_notify_irq(void)
  1950. {
  1951. - /* Not more than two messages every minute */
  1952. - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
  1953. -
  1954. if (test_and_clear_bit(0, &mce_need_notify)) {
  1955. - /* wake processes polling /dev/mcelog */
  1956. - wake_up_interruptible(&mce_chrdev_wait);
  1957. -
  1958. - if (mce_helper[0])
  1959. - schedule_work(&mce_trigger_work);
  1960. -
  1961. - if (__ratelimit(&ratelimit))
  1962. - pr_info(HW_ERR "Machine check events logged\n");
  1963. -
  1964. + mce_notify_work();
  1965. return 1;
  1966. }
  1967. return 0;
  1968. @@ -1717,7 +1744,7 @@
  1969. }
  1970. }
  1971. -static void mce_start_timer(unsigned int cpu, struct timer_list *t)
  1972. +static void mce_start_timer(unsigned int cpu, struct hrtimer *t)
  1973. {
  1974. unsigned long iv = check_interval * HZ;
  1975. @@ -1726,16 +1753,17 @@
  1976. per_cpu(mce_next_interval, cpu) = iv;
  1977. - t->expires = round_jiffies(jiffies + iv);
  1978. - add_timer_on(t, cpu);
  1979. + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL),
  1980. + 0, HRTIMER_MODE_REL_PINNED);
  1981. }
  1982. static void __mcheck_cpu_init_timer(void)
  1983. {
  1984. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  1985. + struct hrtimer *t = this_cpu_ptr(&mce_timer);
  1986. unsigned int cpu = smp_processor_id();
  1987. - setup_pinned_timer(t, mce_timer_fn, cpu);
  1988. + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  1989. + t->function = mce_timer_fn;
  1990. mce_start_timer(cpu, t);
  1991. }
  1992. @@ -2459,6 +2487,8 @@
  1993. if (!mce_available(raw_cpu_ptr(&cpu_info)))
  1994. return;
  1995. + hrtimer_cancel(this_cpu_ptr(&mce_timer));
  1996. +
  1997. if (!(action & CPU_TASKS_FROZEN))
  1998. cmci_clear();
  1999. @@ -2481,6 +2511,7 @@
  2000. if (b->init)
  2001. wrmsrl(msr_ops.ctl(i), b->ctl);
  2002. }
  2003. + __mcheck_cpu_init_timer();
  2004. }
  2005. /* Get notified when a cpu comes on/off. Be hotplug friendly. */
  2006. @@ -2488,7 +2519,6 @@
  2007. mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
  2008. {
  2009. unsigned int cpu = (unsigned long)hcpu;
  2010. - struct timer_list *t = &per_cpu(mce_timer, cpu);
  2011. switch (action & ~CPU_TASKS_FROZEN) {
  2012. case CPU_ONLINE:
  2013. @@ -2508,11 +2538,9 @@
  2014. break;
  2015. case CPU_DOWN_PREPARE:
  2016. smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
  2017. - del_timer_sync(t);
  2018. break;
  2019. case CPU_DOWN_FAILED:
  2020. smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
  2021. - mce_start_timer(cpu, t);
  2022. break;
  2023. }
  2024. @@ -2551,6 +2579,10 @@
  2025. goto err_out;
  2026. }
  2027. + err = mce_notify_work_init();
  2028. + if (err)
  2029. + goto err_out;
  2030. +
  2031. if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) {
  2032. err = -ENOMEM;
  2033. goto err_out;
  2034. diff -Nur linux-4.8.15.orig/arch/x86/kernel/dumpstack_32.c linux-4.8.15/arch/x86/kernel/dumpstack_32.c
  2035. --- linux-4.8.15.orig/arch/x86/kernel/dumpstack_32.c 2016-12-15 17:50:48.000000000 +0100
  2036. +++ linux-4.8.15/arch/x86/kernel/dumpstack_32.c 2017-01-01 17:07:13.351254342 +0100
  2037. @@ -42,7 +42,7 @@
  2038. unsigned long *stack, unsigned long bp,
  2039. const struct stacktrace_ops *ops, void *data)
  2040. {
  2041. - const unsigned cpu = get_cpu();
  2042. + const unsigned cpu = get_cpu_light();
  2043. int graph = 0;
  2044. u32 *prev_esp;
  2045. @@ -84,7 +84,7 @@
  2046. break;
  2047. touch_nmi_watchdog();
  2048. }
  2049. - put_cpu();
  2050. + put_cpu_light();
  2051. }
  2052. EXPORT_SYMBOL(dump_trace);
  2053. diff -Nur linux-4.8.15.orig/arch/x86/kernel/dumpstack_64.c linux-4.8.15/arch/x86/kernel/dumpstack_64.c
  2054. --- linux-4.8.15.orig/arch/x86/kernel/dumpstack_64.c 2016-12-15 17:50:48.000000000 +0100
  2055. +++ linux-4.8.15/arch/x86/kernel/dumpstack_64.c 2017-01-01 17:07:13.351254342 +0100
  2056. @@ -152,7 +152,7 @@
  2057. unsigned long *stack, unsigned long bp,
  2058. const struct stacktrace_ops *ops, void *data)
  2059. {
  2060. - const unsigned cpu = get_cpu();
  2061. + const unsigned cpu = get_cpu_light();
  2062. unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
  2063. unsigned long dummy;
  2064. unsigned used = 0;
  2065. @@ -239,7 +239,7 @@
  2066. * This handles the process stack:
  2067. */
  2068. bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph);
  2069. - put_cpu();
  2070. + put_cpu_light();
  2071. }
  2072. EXPORT_SYMBOL(dump_trace);
  2073. @@ -253,7 +253,7 @@
  2074. int cpu;
  2075. int i;
  2076. - preempt_disable();
  2077. + migrate_disable();
  2078. cpu = smp_processor_id();
  2079. irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
  2080. @@ -299,7 +299,7 @@
  2081. stack++;
  2082. touch_nmi_watchdog();
  2083. }
  2084. - preempt_enable();
  2085. + migrate_enable();
  2086. pr_cont("\n");
  2087. show_trace_log_lvl(task, regs, sp, bp, log_lvl);
  2088. diff -Nur linux-4.8.15.orig/arch/x86/kernel/irq_32.c linux-4.8.15/arch/x86/kernel/irq_32.c
  2089. --- linux-4.8.15.orig/arch/x86/kernel/irq_32.c 2016-12-15 17:50:48.000000000 +0100
  2090. +++ linux-4.8.15/arch/x86/kernel/irq_32.c 2017-01-01 17:07:13.351254342 +0100
  2091. @@ -127,6 +127,7 @@
  2092. cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
  2093. }
  2094. +#ifndef CONFIG_PREEMPT_RT_FULL
  2095. void do_softirq_own_stack(void)
  2096. {
  2097. struct irq_stack *irqstk;
  2098. @@ -143,6 +144,7 @@
  2099. call_on_stack(__do_softirq, isp);
  2100. }
  2101. +#endif
  2102. bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
  2103. {
  2104. diff -Nur linux-4.8.15.orig/arch/x86/kernel/process_32.c linux-4.8.15/arch/x86/kernel/process_32.c
  2105. --- linux-4.8.15.orig/arch/x86/kernel/process_32.c 2016-12-15 17:50:48.000000000 +0100
  2106. +++ linux-4.8.15/arch/x86/kernel/process_32.c 2017-01-01 17:07:13.359254847 +0100
  2107. @@ -35,6 +35,7 @@
  2108. #include <linux/uaccess.h>
  2109. #include <linux/io.h>
  2110. #include <linux/kdebug.h>
  2111. +#include <linux/highmem.h>
  2112. #include <asm/pgtable.h>
  2113. #include <asm/ldt.h>
  2114. @@ -210,6 +211,35 @@
  2115. }
  2116. EXPORT_SYMBOL_GPL(start_thread);
  2117. +#ifdef CONFIG_PREEMPT_RT_FULL
  2118. +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
  2119. +{
  2120. + int i;
  2121. +
  2122. + /*
  2123. + * Clear @prev's kmap_atomic mappings
  2124. + */
  2125. + for (i = 0; i < prev_p->kmap_idx; i++) {
  2126. + int idx = i + KM_TYPE_NR * smp_processor_id();
  2127. + pte_t *ptep = kmap_pte - idx;
  2128. +
  2129. + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
  2130. + }
  2131. + /*
  2132. + * Restore @next_p's kmap_atomic mappings
  2133. + */
  2134. + for (i = 0; i < next_p->kmap_idx; i++) {
  2135. + int idx = i + KM_TYPE_NR * smp_processor_id();
  2136. +
  2137. + if (!pte_none(next_p->kmap_pte[i]))
  2138. + set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
  2139. + }
  2140. +}
  2141. +#else
  2142. +static inline void
  2143. +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
  2144. +#endif
  2145. +
  2146. /*
  2147. * switch_to(x,y) should switch tasks from x to y.
  2148. @@ -286,6 +316,8 @@
  2149. task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
  2150. __switch_to_xtra(prev_p, next_p, tss);
  2151. + switch_kmaps(prev_p, next_p);
  2152. +
  2153. /*
  2154. * Leave lazy mode, flushing any hypercalls made here.
  2155. * This must be done before restoring TLS segments so
  2156. diff -Nur linux-4.8.15.orig/arch/x86/kvm/lapic.c linux-4.8.15/arch/x86/kvm/lapic.c
  2157. --- linux-4.8.15.orig/arch/x86/kvm/lapic.c 2016-12-15 17:50:48.000000000 +0100
  2158. +++ linux-4.8.15/arch/x86/kvm/lapic.c 2017-01-01 17:07:13.379256138 +0100
  2159. @@ -1938,6 +1938,7 @@
  2160. hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
  2161. HRTIMER_MODE_ABS_PINNED);
  2162. apic->lapic_timer.timer.function = apic_timer_fn;
  2163. + apic->lapic_timer.timer.irqsafe = 1;
  2164. /*
  2165. * APIC is created enabled. This will prevent kvm_lapic_set_base from
  2166. diff -Nur linux-4.8.15.orig/arch/x86/kvm/x86.c linux-4.8.15/arch/x86/kvm/x86.c
  2167. --- linux-4.8.15.orig/arch/x86/kvm/x86.c 2016-12-15 17:50:48.000000000 +0100
  2168. +++ linux-4.8.15/arch/x86/kvm/x86.c 2017-01-01 17:07:13.415258465 +0100
  2169. @@ -5877,6 +5877,13 @@
  2170. goto out;
  2171. }
  2172. +#ifdef CONFIG_PREEMPT_RT_FULL
  2173. + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
  2174. + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
  2175. + return -EOPNOTSUPP;
  2176. + }
  2177. +#endif
  2178. +
  2179. r = kvm_mmu_module_init();
  2180. if (r)
  2181. goto out_free_percpu;
  2182. diff -Nur linux-4.8.15.orig/arch/x86/mm/highmem_32.c linux-4.8.15/arch/x86/mm/highmem_32.c
  2183. --- linux-4.8.15.orig/arch/x86/mm/highmem_32.c 2016-12-15 17:50:48.000000000 +0100
  2184. +++ linux-4.8.15/arch/x86/mm/highmem_32.c 2017-01-01 17:07:13.443260261 +0100
  2185. @@ -32,10 +32,11 @@
  2186. */
  2187. void *kmap_atomic_prot(struct page *page, pgprot_t prot)
  2188. {
  2189. + pte_t pte = mk_pte(page, prot);
  2190. unsigned long vaddr;
  2191. int idx, type;
  2192. - preempt_disable();
  2193. + preempt_disable_nort();
  2194. pagefault_disable();
  2195. if (!PageHighMem(page))
  2196. @@ -45,7 +46,10 @@
  2197. idx = type + KM_TYPE_NR*smp_processor_id();
  2198. vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
  2199. BUG_ON(!pte_none(*(kmap_pte-idx)));
  2200. - set_pte(kmap_pte-idx, mk_pte(page, prot));
  2201. +#ifdef CONFIG_PREEMPT_RT_FULL
  2202. + current->kmap_pte[type] = pte;
  2203. +#endif
  2204. + set_pte(kmap_pte-idx, pte);
  2205. arch_flush_lazy_mmu_mode();
  2206. return (void *)vaddr;
  2207. @@ -88,6 +92,9 @@
  2208. * is a bad idea also, in case the page changes cacheability
  2209. * attributes or becomes a protected page in a hypervisor.
  2210. */
  2211. +#ifdef CONFIG_PREEMPT_RT_FULL
  2212. + current->kmap_pte[type] = __pte(0);
  2213. +#endif
  2214. kpte_clear_flush(kmap_pte-idx, vaddr);
  2215. kmap_atomic_idx_pop();
  2216. arch_flush_lazy_mmu_mode();
  2217. @@ -100,7 +107,7 @@
  2218. #endif
  2219. pagefault_enable();
  2220. - preempt_enable();
  2221. + preempt_enable_nort();
  2222. }
  2223. EXPORT_SYMBOL(__kunmap_atomic);
  2224. diff -Nur linux-4.8.15.orig/arch/x86/mm/iomap_32.c linux-4.8.15/arch/x86/mm/iomap_32.c
  2225. --- linux-4.8.15.orig/arch/x86/mm/iomap_32.c 2016-12-15 17:50:48.000000000 +0100
  2226. +++ linux-4.8.15/arch/x86/mm/iomap_32.c 2017-01-01 17:07:13.471262063 +0100
  2227. @@ -56,6 +56,7 @@
  2228. void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
  2229. {
  2230. + pte_t pte = pfn_pte(pfn, prot);
  2231. unsigned long vaddr;
  2232. int idx, type;
  2233. @@ -65,7 +66,12 @@
  2234. type = kmap_atomic_idx_push();
  2235. idx = type + KM_TYPE_NR * smp_processor_id();
  2236. vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
  2237. - set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
  2238. + WARN_ON(!pte_none(*(kmap_pte - idx)));
  2239. +
  2240. +#ifdef CONFIG_PREEMPT_RT_FULL
  2241. + current->kmap_pte[type] = pte;
  2242. +#endif
  2243. + set_pte(kmap_pte - idx, pte);
  2244. arch_flush_lazy_mmu_mode();
  2245. return (void *)vaddr;
  2246. @@ -113,6 +119,9 @@
  2247. * is a bad idea also, in case the page changes cacheability
  2248. * attributes or becomes a protected page in a hypervisor.
  2249. */
  2250. +#ifdef CONFIG_PREEMPT_RT_FULL
  2251. + current->kmap_pte[type] = __pte(0);
  2252. +#endif
  2253. kpte_clear_flush(kmap_pte-idx, vaddr);
  2254. kmap_atomic_idx_pop();
  2255. }
  2256. diff -Nur linux-4.8.15.orig/arch/x86/platform/uv/tlb_uv.c linux-4.8.15/arch/x86/platform/uv/tlb_uv.c
  2257. --- linux-4.8.15.orig/arch/x86/platform/uv/tlb_uv.c 2016-12-15 17:50:48.000000000 +0100
  2258. +++ linux-4.8.15/arch/x86/platform/uv/tlb_uv.c 2017-01-01 17:07:13.575268756 +0100
  2259. @@ -729,9 +729,9 @@
  2260. quiesce_local_uvhub(hmaster);
  2261. - spin_lock(&hmaster->queue_lock);
  2262. + raw_spin_lock(&hmaster->queue_lock);
  2263. reset_with_ipi(&bau_desc->distribution, bcp);
  2264. - spin_unlock(&hmaster->queue_lock);
  2265. + raw_spin_unlock(&hmaster->queue_lock);
  2266. end_uvhub_quiesce(hmaster);
  2267. @@ -751,9 +751,9 @@
  2268. quiesce_local_uvhub(hmaster);
  2269. - spin_lock(&hmaster->queue_lock);
  2270. + raw_spin_lock(&hmaster->queue_lock);
  2271. reset_with_ipi(&bau_desc->distribution, bcp);
  2272. - spin_unlock(&hmaster->queue_lock);
  2273. + raw_spin_unlock(&hmaster->queue_lock);
  2274. end_uvhub_quiesce(hmaster);
  2275. @@ -774,7 +774,7 @@
  2276. cycles_t tm1;
  2277. hmaster = bcp->uvhub_master;
  2278. - spin_lock(&hmaster->disable_lock);
  2279. + raw_spin_lock(&hmaster->disable_lock);
  2280. if (!bcp->baudisabled) {
  2281. stat->s_bau_disabled++;
  2282. tm1 = get_cycles();
  2283. @@ -787,7 +787,7 @@
  2284. }
  2285. }
  2286. }
  2287. - spin_unlock(&hmaster->disable_lock);
  2288. + raw_spin_unlock(&hmaster->disable_lock);
  2289. }
  2290. static void count_max_concurr(int stat, struct bau_control *bcp,
  2291. @@ -850,7 +850,7 @@
  2292. */
  2293. static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
  2294. {
  2295. - spinlock_t *lock = &hmaster->uvhub_lock;
  2296. + raw_spinlock_t *lock = &hmaster->uvhub_lock;
  2297. atomic_t *v;
  2298. v = &hmaster->active_descriptor_count;
  2299. @@ -983,7 +983,7 @@
  2300. struct bau_control *hmaster;
  2301. hmaster = bcp->uvhub_master;
  2302. - spin_lock(&hmaster->disable_lock);
  2303. + raw_spin_lock(&hmaster->disable_lock);
  2304. if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
  2305. stat->s_bau_reenabled++;
  2306. for_each_present_cpu(tcpu) {
  2307. @@ -995,10 +995,10 @@
  2308. tbcp->period_giveups = 0;
  2309. }
  2310. }
  2311. - spin_unlock(&hmaster->disable_lock);
  2312. + raw_spin_unlock(&hmaster->disable_lock);
  2313. return 0;
  2314. }
  2315. - spin_unlock(&hmaster->disable_lock);
  2316. + raw_spin_unlock(&hmaster->disable_lock);
  2317. return -1;
  2318. }
  2319. @@ -1916,9 +1916,9 @@
  2320. bcp->cong_reps = congested_reps;
  2321. bcp->disabled_period = sec_2_cycles(disabled_period);
  2322. bcp->giveup_limit = giveup_limit;
  2323. - spin_lock_init(&bcp->queue_lock);
  2324. - spin_lock_init(&bcp->uvhub_lock);
  2325. - spin_lock_init(&bcp->disable_lock);
  2326. + raw_spin_lock_init(&bcp->queue_lock);
  2327. + raw_spin_lock_init(&bcp->uvhub_lock);
  2328. + raw_spin_lock_init(&bcp->disable_lock);
  2329. }
  2330. }
  2331. diff -Nur linux-4.8.15.orig/arch/x86/platform/uv/uv_time.c linux-4.8.15/arch/x86/platform/uv/uv_time.c
  2332. --- linux-4.8.15.orig/arch/x86/platform/uv/uv_time.c 2016-12-15 17:50:48.000000000 +0100
  2333. +++ linux-4.8.15/arch/x86/platform/uv/uv_time.c 2017-01-01 17:07:13.575268756 +0100
  2334. @@ -57,7 +57,7 @@
  2335. /* There is one of these allocated per node */
  2336. struct uv_rtc_timer_head {
  2337. - spinlock_t lock;
  2338. + raw_spinlock_t lock;
  2339. /* next cpu waiting for timer, local node relative: */
  2340. int next_cpu;
  2341. /* number of cpus on this node: */
  2342. @@ -177,7 +177,7 @@
  2343. uv_rtc_deallocate_timers();
  2344. return -ENOMEM;
  2345. }
  2346. - spin_lock_init(&head->lock);
  2347. + raw_spin_lock_init(&head->lock);
  2348. head->ncpus = uv_blade_nr_possible_cpus(bid);
  2349. head->next_cpu = -1;
  2350. blade_info[bid] = head;
  2351. @@ -231,7 +231,7 @@
  2352. unsigned long flags;
  2353. int next_cpu;
  2354. - spin_lock_irqsave(&head->lock, flags);
  2355. + raw_spin_lock_irqsave(&head->lock, flags);
  2356. next_cpu = head->next_cpu;
  2357. *t = expires;
  2358. @@ -243,12 +243,12 @@
  2359. if (uv_setup_intr(cpu, expires)) {
  2360. *t = ULLONG_MAX;
  2361. uv_rtc_find_next_timer(head, pnode);
  2362. - spin_unlock_irqrestore(&head->lock, flags);
  2363. + raw_spin_unlock_irqrestore(&head->lock, flags);
  2364. return -ETIME;
  2365. }
  2366. }
  2367. - spin_unlock_irqrestore(&head->lock, flags);
  2368. + raw_spin_unlock_irqrestore(&head->lock, flags);
  2369. return 0;
  2370. }
  2371. @@ -267,7 +267,7 @@
  2372. unsigned long flags;
  2373. int rc = 0;
  2374. - spin_lock_irqsave(&head->lock, flags);
  2375. + raw_spin_lock_irqsave(&head->lock, flags);
  2376. if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
  2377. rc = 1;
  2378. @@ -279,7 +279,7 @@
  2379. uv_rtc_find_next_timer(head, pnode);
  2380. }
  2381. - spin_unlock_irqrestore(&head->lock, flags);
  2382. + raw_spin_unlock_irqrestore(&head->lock, flags);
  2383. return rc;
  2384. }
  2385. @@ -299,13 +299,18 @@
  2386. static cycle_t uv_read_rtc(struct clocksource *cs)
  2387. {
  2388. unsigned long offset;
  2389. + cycle_t cycles;
  2390. + preempt_disable();
  2391. if (uv_get_min_hub_revision_id() == 1)
  2392. offset = 0;
  2393. else
  2394. offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
  2395. - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
  2396. + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
  2397. + preempt_enable();
  2398. +
  2399. + return cycles;
  2400. }
  2401. /*
  2402. diff -Nur linux-4.8.15.orig/block/blk-core.c linux-4.8.15/block/blk-core.c
  2403. --- linux-4.8.15.orig/block/blk-core.c 2016-12-15 17:50:48.000000000 +0100
  2404. +++ linux-4.8.15/block/blk-core.c 2017-01-01 17:07:13.619271599 +0100
  2405. @@ -125,6 +125,9 @@
  2406. INIT_LIST_HEAD(&rq->queuelist);
  2407. INIT_LIST_HEAD(&rq->timeout_list);
  2408. +#ifdef CONFIG_PREEMPT_RT_FULL
  2409. + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
  2410. +#endif
  2411. rq->cpu = -1;
  2412. rq->q = q;
  2413. rq->__sector = (sector_t) -1;
  2414. @@ -233,7 +236,7 @@
  2415. **/
  2416. void blk_start_queue(struct request_queue *q)
  2417. {
  2418. - WARN_ON(!irqs_disabled());
  2419. + WARN_ON_NONRT(!irqs_disabled());
  2420. queue_flag_clear(QUEUE_FLAG_STOPPED, q);
  2421. __blk_run_queue(q);
  2422. @@ -659,7 +662,7 @@
  2423. if (nowait)
  2424. return -EBUSY;
  2425. - ret = wait_event_interruptible(q->mq_freeze_wq,
  2426. + ret = swait_event_interruptible(q->mq_freeze_wq,
  2427. !atomic_read(&q->mq_freeze_depth) ||
  2428. blk_queue_dying(q));
  2429. if (blk_queue_dying(q))
  2430. @@ -679,7 +682,7 @@
  2431. struct request_queue *q =
  2432. container_of(ref, struct request_queue, q_usage_counter);
  2433. - wake_up_all(&q->mq_freeze_wq);
  2434. + swake_up_all(&q->mq_freeze_wq);
  2435. }
  2436. static void blk_rq_timed_out_timer(unsigned long data)
  2437. @@ -748,7 +751,7 @@
  2438. q->bypass_depth = 1;
  2439. __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
  2440. - init_waitqueue_head(&q->mq_freeze_wq);
  2441. + init_swait_queue_head(&q->mq_freeze_wq);
  2442. /*
  2443. * Init percpu_ref in atomic mode so that it's faster to shutdown.
  2444. @@ -3171,7 +3174,7 @@
  2445. blk_run_queue_async(q);
  2446. else
  2447. __blk_run_queue(q);
  2448. - spin_unlock(q->queue_lock);
  2449. + spin_unlock_irq(q->queue_lock);
  2450. }
  2451. static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
  2452. @@ -3219,7 +3222,6 @@
  2453. void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  2454. {
  2455. struct request_queue *q;
  2456. - unsigned long flags;
  2457. struct request *rq;
  2458. LIST_HEAD(list);
  2459. unsigned int depth;
  2460. @@ -3239,11 +3241,6 @@
  2461. q = NULL;
  2462. depth = 0;
  2463. - /*
  2464. - * Save and disable interrupts here, to avoid doing it for every
  2465. - * queue lock we have to take.
  2466. - */
  2467. - local_irq_save(flags);
  2468. while (!list_empty(&list)) {
  2469. rq = list_entry_rq(list.next);
  2470. list_del_init(&rq->queuelist);
  2471. @@ -3256,7 +3253,7 @@
  2472. queue_unplugged(q, depth, from_schedule);
  2473. q = rq->q;
  2474. depth = 0;
  2475. - spin_lock(q->queue_lock);
  2476. + spin_lock_irq(q->queue_lock);
  2477. }
  2478. /*
  2479. @@ -3283,8 +3280,6 @@
  2480. */
  2481. if (q)
  2482. queue_unplugged(q, depth, from_schedule);
  2483. -
  2484. - local_irq_restore(flags);
  2485. }
  2486. void blk_finish_plug(struct blk_plug *plug)
  2487. diff -Nur linux-4.8.15.orig/block/blk-ioc.c linux-4.8.15/block/blk-ioc.c
  2488. --- linux-4.8.15.orig/block/blk-ioc.c 2016-12-15 17:50:48.000000000 +0100
  2489. +++ linux-4.8.15/block/blk-ioc.c 2017-01-01 17:07:13.619271599 +0100
  2490. @@ -7,6 +7,7 @@
  2491. #include <linux/bio.h>
  2492. #include <linux/blkdev.h>
  2493. #include <linux/slab.h>
  2494. +#include <linux/delay.h>
  2495. #include "blk.h"
  2496. @@ -109,7 +110,7 @@
  2497. spin_unlock(q->queue_lock);
  2498. } else {
  2499. spin_unlock_irqrestore(&ioc->lock, flags);
  2500. - cpu_relax();
  2501. + cpu_chill();
  2502. spin_lock_irqsave_nested(&ioc->lock, flags, 1);
  2503. }
  2504. }
  2505. @@ -187,7 +188,7 @@
  2506. spin_unlock(icq->q->queue_lock);
  2507. } else {
  2508. spin_unlock_irqrestore(&ioc->lock, flags);
  2509. - cpu_relax();
  2510. + cpu_chill();
  2511. goto retry;
  2512. }
  2513. }
  2514. diff -Nur linux-4.8.15.orig/block/blk-mq.c linux-4.8.15/block/blk-mq.c
  2515. --- linux-4.8.15.orig/block/blk-mq.c 2016-12-15 17:50:48.000000000 +0100
  2516. +++ linux-4.8.15/block/blk-mq.c 2017-01-01 17:07:13.623271855 +0100
  2517. @@ -92,7 +92,7 @@
  2518. static void blk_mq_freeze_queue_wait(struct request_queue *q)
  2519. {
  2520. - wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
  2521. + swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
  2522. }
  2523. /*
  2524. @@ -130,7 +130,7 @@
  2525. WARN_ON_ONCE(freeze_depth < 0);
  2526. if (!freeze_depth) {
  2527. percpu_ref_reinit(&q->q_usage_counter);
  2528. - wake_up_all(&q->mq_freeze_wq);
  2529. + swake_up_all(&q->mq_freeze_wq);
  2530. }
  2531. }
  2532. EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
  2533. @@ -149,7 +149,7 @@
  2534. * dying, we need to ensure that processes currently waiting on
  2535. * the queue are notified as well.
  2536. */
  2537. - wake_up_all(&q->mq_freeze_wq);
  2538. + swake_up_all(&q->mq_freeze_wq);
  2539. }
  2540. bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
  2541. @@ -197,6 +197,9 @@
  2542. rq->resid_len = 0;
  2543. rq->sense = NULL;
  2544. +#ifdef CONFIG_PREEMPT_RT_FULL
  2545. + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
  2546. +#endif
  2547. INIT_LIST_HEAD(&rq->timeout_list);
  2548. rq->timeout = 0;
  2549. @@ -379,6 +382,17 @@
  2550. }
  2551. EXPORT_SYMBOL(blk_mq_end_request);
  2552. +#ifdef CONFIG_PREEMPT_RT_FULL
  2553. +
  2554. +void __blk_mq_complete_request_remote_work(struct work_struct *work)
  2555. +{
  2556. + struct request *rq = container_of(work, struct request, work);
  2557. +
  2558. + rq->q->softirq_done_fn(rq);
  2559. +}
  2560. +
  2561. +#else
  2562. +
  2563. static void __blk_mq_complete_request_remote(void *data)
  2564. {
  2565. struct request *rq = data;
  2566. @@ -386,6 +400,8 @@
  2567. rq->q->softirq_done_fn(rq);
  2568. }
  2569. +#endif
  2570. +
  2571. static void blk_mq_ipi_complete_request(struct request *rq)
  2572. {
  2573. struct blk_mq_ctx *ctx = rq->mq_ctx;
  2574. @@ -397,19 +413,23 @@
  2575. return;
  2576. }
  2577. - cpu = get_cpu();
  2578. + cpu = get_cpu_light();
  2579. if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
  2580. shared = cpus_share_cache(cpu, ctx->cpu);
  2581. if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
  2582. +#ifdef CONFIG_PREEMPT_RT_FULL
  2583. + schedule_work_on(ctx->cpu, &rq->work);
  2584. +#else
  2585. rq->csd.func = __blk_mq_complete_request_remote;
  2586. rq->csd.info = rq;
  2587. rq->csd.flags = 0;
  2588. smp_call_function_single_async(ctx->cpu, &rq->csd);
  2589. +#endif
  2590. } else {
  2591. rq->q->softirq_done_fn(rq);
  2592. }
  2593. - put_cpu();
  2594. + put_cpu_light();
  2595. }
  2596. static void __blk_mq_complete_request(struct request *rq)
  2597. @@ -938,14 +958,14 @@
  2598. return;
  2599. if (!async) {
  2600. - int cpu = get_cpu();
  2601. + int cpu = get_cpu_light();
  2602. if (cpumask_test_cpu(cpu, hctx->cpumask)) {
  2603. __blk_mq_run_hw_queue(hctx);
  2604. - put_cpu();
  2605. + put_cpu_light();
  2606. return;
  2607. }
  2608. - put_cpu();
  2609. + put_cpu_light();
  2610. }
  2611. kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
  2612. @@ -1667,7 +1687,7 @@
  2613. {
  2614. struct blk_mq_hw_ctx *hctx = data;
  2615. - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
  2616. + if (action == CPU_POST_DEAD)
  2617. return blk_mq_hctx_cpu_offline(hctx, cpu);
  2618. /*
  2619. diff -Nur linux-4.8.15.orig/block/blk-mq-cpu.c linux-4.8.15/block/blk-mq-cpu.c
  2620. --- linux-4.8.15.orig/block/blk-mq-cpu.c 2016-12-15 17:50:48.000000000 +0100
  2621. +++ linux-4.8.15/block/blk-mq-cpu.c 2017-01-01 17:07:13.623271855 +0100
  2622. @@ -16,7 +16,7 @@
  2623. #include "blk-mq.h"
  2624. static LIST_HEAD(blk_mq_cpu_notify_list);
  2625. -static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
  2626. +static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
  2627. static int blk_mq_main_cpu_notify(struct notifier_block *self,
  2628. unsigned long action, void *hcpu)
  2629. @@ -25,7 +25,10 @@
  2630. struct blk_mq_cpu_notifier *notify;
  2631. int ret = NOTIFY_OK;
  2632. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  2633. + if (action != CPU_POST_DEAD)
  2634. + return NOTIFY_OK;
  2635. +
  2636. + spin_lock(&blk_mq_cpu_notify_lock);
  2637. list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) {
  2638. ret = notify->notify(notify->data, action, cpu);
  2639. @@ -33,7 +36,7 @@
  2640. break;
  2641. }
  2642. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  2643. + spin_unlock(&blk_mq_cpu_notify_lock);
  2644. return ret;
  2645. }
  2646. @@ -41,16 +44,16 @@
  2647. {
  2648. BUG_ON(!notifier->notify);
  2649. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  2650. + spin_lock(&blk_mq_cpu_notify_lock);
  2651. list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
  2652. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  2653. + spin_unlock(&blk_mq_cpu_notify_lock);
  2654. }
  2655. void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
  2656. {
  2657. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  2658. + spin_lock(&blk_mq_cpu_notify_lock);
  2659. list_del(&notifier->list);
  2660. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  2661. + spin_unlock(&blk_mq_cpu_notify_lock);
  2662. }
  2663. void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
  2664. diff -Nur linux-4.8.15.orig/block/blk-mq.h linux-4.8.15/block/blk-mq.h
  2665. --- linux-4.8.15.orig/block/blk-mq.h 2016-12-15 17:50:48.000000000 +0100
  2666. +++ linux-4.8.15/block/blk-mq.h 2017-01-01 17:07:13.627272112 +0100
  2667. @@ -86,12 +86,12 @@
  2668. */
  2669. static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
  2670. {
  2671. - return __blk_mq_get_ctx(q, get_cpu());
  2672. + return __blk_mq_get_ctx(q, get_cpu_light());
  2673. }
  2674. static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
  2675. {
  2676. - put_cpu();
  2677. + put_cpu_light();
  2678. }
  2679. struct blk_mq_alloc_data {
  2680. diff -Nur linux-4.8.15.orig/block/blk-softirq.c linux-4.8.15/block/blk-softirq.c
  2681. --- linux-4.8.15.orig/block/blk-softirq.c 2016-12-15 17:50:48.000000000 +0100
  2682. +++ linux-4.8.15/block/blk-softirq.c 2017-01-01 17:07:13.627272112 +0100
  2683. @@ -51,6 +51,7 @@
  2684. raise_softirq_irqoff(BLOCK_SOFTIRQ);
  2685. local_irq_restore(flags);
  2686. + preempt_check_resched_rt();
  2687. }
  2688. /*
  2689. @@ -93,6 +94,7 @@
  2690. this_cpu_ptr(&blk_cpu_done));
  2691. raise_softirq_irqoff(BLOCK_SOFTIRQ);
  2692. local_irq_enable();
  2693. + preempt_check_resched_rt();
  2694. }
  2695. return NOTIFY_OK;
  2696. @@ -150,6 +152,7 @@
  2697. goto do_local;
  2698. local_irq_restore(flags);
  2699. + preempt_check_resched_rt();
  2700. }
  2701. /**
  2702. diff -Nur linux-4.8.15.orig/block/bounce.c linux-4.8.15/block/bounce.c
  2703. --- linux-4.8.15.orig/block/bounce.c 2016-12-15 17:50:48.000000000 +0100
  2704. +++ linux-4.8.15/block/bounce.c 2017-01-01 17:07:13.627272112 +0100
  2705. @@ -55,11 +55,11 @@
  2706. unsigned long flags;
  2707. unsigned char *vto;
  2708. - local_irq_save(flags);
  2709. + local_irq_save_nort(flags);
  2710. vto = kmap_atomic(to->bv_page);
  2711. memcpy(vto + to->bv_offset, vfrom, to->bv_len);
  2712. kunmap_atomic(vto);
  2713. - local_irq_restore(flags);
  2714. + local_irq_restore_nort(flags);
  2715. }
  2716. #else /* CONFIG_HIGHMEM */
  2717. diff -Nur linux-4.8.15.orig/crypto/algapi.c linux-4.8.15/crypto/algapi.c
  2718. --- linux-4.8.15.orig/crypto/algapi.c 2016-12-15 17:50:48.000000000 +0100
  2719. +++ linux-4.8.15/crypto/algapi.c 2017-01-01 17:07:13.671274944 +0100
  2720. @@ -718,13 +718,13 @@
  2721. int crypto_register_notifier(struct notifier_block *nb)
  2722. {
  2723. - return blocking_notifier_chain_register(&crypto_chain, nb);
  2724. + return srcu_notifier_chain_register(&crypto_chain, nb);
  2725. }
  2726. EXPORT_SYMBOL_GPL(crypto_register_notifier);
  2727. int crypto_unregister_notifier(struct notifier_block *nb)
  2728. {
  2729. - return blocking_notifier_chain_unregister(&crypto_chain, nb);
  2730. + return srcu_notifier_chain_unregister(&crypto_chain, nb);
  2731. }
  2732. EXPORT_SYMBOL_GPL(crypto_unregister_notifier);
  2733. diff -Nur linux-4.8.15.orig/crypto/api.c linux-4.8.15/crypto/api.c
  2734. --- linux-4.8.15.orig/crypto/api.c 2016-12-15 17:50:48.000000000 +0100
  2735. +++ linux-4.8.15/crypto/api.c 2017-01-01 17:07:13.671274944 +0100
  2736. @@ -31,7 +31,7 @@
  2737. DECLARE_RWSEM(crypto_alg_sem);
  2738. EXPORT_SYMBOL_GPL(crypto_alg_sem);
  2739. -BLOCKING_NOTIFIER_HEAD(crypto_chain);
  2740. +SRCU_NOTIFIER_HEAD(crypto_chain);
  2741. EXPORT_SYMBOL_GPL(crypto_chain);
  2742. static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg);
  2743. @@ -236,10 +236,10 @@
  2744. {
  2745. int ok;
  2746. - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
  2747. + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
  2748. if (ok == NOTIFY_DONE) {
  2749. request_module("cryptomgr");
  2750. - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
  2751. + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
  2752. }
  2753. return ok;
  2754. diff -Nur linux-4.8.15.orig/crypto/internal.h linux-4.8.15/crypto/internal.h
  2755. --- linux-4.8.15.orig/crypto/internal.h 2016-12-15 17:50:48.000000000 +0100
  2756. +++ linux-4.8.15/crypto/internal.h 2017-01-01 17:07:13.675275210 +0100
  2757. @@ -47,7 +47,7 @@
  2758. extern struct list_head crypto_alg_list;
  2759. extern struct rw_semaphore crypto_alg_sem;
  2760. -extern struct blocking_notifier_head crypto_chain;
  2761. +extern struct srcu_notifier_head crypto_chain;
  2762. #ifdef CONFIG_PROC_FS
  2763. void __init crypto_init_proc(void);
  2764. @@ -146,7 +146,7 @@
  2765. static inline void crypto_notify(unsigned long val, void *v)
  2766. {
  2767. - blocking_notifier_call_chain(&crypto_chain, val, v);
  2768. + srcu_notifier_call_chain(&crypto_chain, val, v);
  2769. }
  2770. #endif /* _CRYPTO_INTERNAL_H */
  2771. diff -Nur linux-4.8.15.orig/Documentation/hwlat_detector.txt linux-4.8.15/Documentation/hwlat_detector.txt
  2772. --- linux-4.8.15.orig/Documentation/hwlat_detector.txt 1970-01-01 01:00:00.000000000 +0100
  2773. +++ linux-4.8.15/Documentation/hwlat_detector.txt 2017-01-01 17:07:11.319123458 +0100
  2774. @@ -0,0 +1,64 @@
  2775. +Introduction:
  2776. +-------------
  2777. +
  2778. +The module hwlat_detector is a special purpose kernel module that is used to
  2779. +detect large system latencies induced by the behavior of certain underlying
  2780. +hardware or firmware, independent of Linux itself. The code was developed
  2781. +originally to detect SMIs (System Management Interrupts) on x86 systems,
  2782. +however there is nothing x86 specific about this patchset. It was
  2783. +originally written for use by the "RT" patch since the Real Time
  2784. +kernel is highly latency sensitive.
  2785. +
  2786. +SMIs are usually not serviced by the Linux kernel, which typically does not
  2787. +even know that they are occuring. SMIs are instead are set up by BIOS code
  2788. +and are serviced by BIOS code, usually for "critical" events such as
  2789. +management of thermal sensors and fans. Sometimes though, SMIs are used for
  2790. +other tasks and those tasks can spend an inordinate amount of time in the
  2791. +handler (sometimes measured in milliseconds). Obviously this is a problem if
  2792. +you are trying to keep event service latencies down in the microsecond range.
  2793. +
  2794. +The hardware latency detector works by hogging all of the cpus for configurable
  2795. +amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter
  2796. +for some period, then looking for gaps in the TSC data. Any gap indicates a
  2797. +time when the polling was interrupted and since the machine is stopped and
  2798. +interrupts turned off the only thing that could do that would be an SMI.
  2799. +
  2800. +Note that the SMI detector should *NEVER* be used in a production environment.
  2801. +It is intended to be run manually to determine if the hardware platform has a
  2802. +problem with long system firmware service routines.
  2803. +
  2804. +Usage:
  2805. +------
  2806. +
  2807. +Loading the module hwlat_detector passing the parameter "enabled=1" (or by
  2808. +setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only
  2809. +step required to start the hwlat_detector. It is possible to redefine the
  2810. +threshold in microseconds (us) above which latency spikes will be taken
  2811. +into account (parameter "threshold=").
  2812. +
  2813. +Example:
  2814. +
  2815. + # modprobe hwlat_detector enabled=1 threshold=100
  2816. +
  2817. +After the module is loaded, it creates a directory named "hwlat_detector" under
  2818. +the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary
  2819. +to have debugfs mounted, which might be on /sys/debug on your system.
  2820. +
  2821. +The /debug/hwlat_detector interface contains the following files:
  2822. +
  2823. +count - number of latency spikes observed since last reset
  2824. +enable - a global enable/disable toggle (0/1), resets count
  2825. +max - maximum hardware latency actually observed (usecs)
  2826. +sample - a pipe from which to read current raw sample data
  2827. + in the format <timestamp> <latency observed usecs>
  2828. + (can be opened O_NONBLOCK for a single sample)
  2829. +threshold - minimum latency value to be considered (usecs)
  2830. +width - time period to sample with CPUs held (usecs)
  2831. + must be less than the total window size (enforced)
  2832. +window - total period of sampling, width being inside (usecs)
  2833. +
  2834. +By default we will set width to 500,000 and window to 1,000,000, meaning that
  2835. +we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we
  2836. +observe any latencies that exceed the threshold (initially 100 usecs),
  2837. +then we write to a global sample ring buffer of 8K samples, which is
  2838. +consumed by reading from the "sample" (pipe) debugfs file interface.
  2839. diff -Nur linux-4.8.15.orig/Documentation/sysrq.txt linux-4.8.15/Documentation/sysrq.txt
  2840. --- linux-4.8.15.orig/Documentation/sysrq.txt 2016-12-15 17:50:48.000000000 +0100
  2841. +++ linux-4.8.15/Documentation/sysrq.txt 2017-01-01 17:07:11.331124221 +0100
  2842. @@ -59,10 +59,17 @@
  2843. On other - If you know of the key combos for other architectures, please
  2844. let me know so I can add them to this section.
  2845. -On all - write a character to /proc/sysrq-trigger. e.g.:
  2846. -
  2847. +On all - write a character to /proc/sysrq-trigger, e.g.:
  2848. echo t > /proc/sysrq-trigger
  2849. +On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g.
  2850. + echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq
  2851. + Send an ICMP echo request with this pattern plus the particular
  2852. + SysRq command key. Example:
  2853. + # ping -c1 -s57 -p0102030468
  2854. + will trigger the SysRq-H (help) command.
  2855. +
  2856. +
  2857. * What are the 'command' keys?
  2858. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  2859. 'b' - Will immediately reboot the system without syncing or unmounting
  2860. diff -Nur linux-4.8.15.orig/Documentation/trace/histograms.txt linux-4.8.15/Documentation/trace/histograms.txt
  2861. --- linux-4.8.15.orig/Documentation/trace/histograms.txt 1970-01-01 01:00:00.000000000 +0100
  2862. +++ linux-4.8.15/Documentation/trace/histograms.txt 2017-01-01 17:07:11.399128611 +0100
  2863. @@ -0,0 +1,186 @@
  2864. + Using the Linux Kernel Latency Histograms
  2865. +
  2866. +
  2867. +This document gives a short explanation how to enable, configure and use
  2868. +latency histograms. Latency histograms are primarily relevant in the
  2869. +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT)
  2870. +and are used in the quality management of the Linux real-time
  2871. +capabilities.
  2872. +
  2873. +
  2874. +* Purpose of latency histograms
  2875. +
  2876. +A latency histogram continuously accumulates the frequencies of latency
  2877. +data. There are two types of histograms
  2878. +- potential sources of latencies
  2879. +- effective latencies
  2880. +
  2881. +
  2882. +* Potential sources of latencies
  2883. +
  2884. +Potential sources of latencies are code segments where interrupts,
  2885. +preemption or both are disabled (aka critical sections). To create
  2886. +histograms of potential sources of latency, the kernel stores the time
  2887. +stamp at the start of a critical section, determines the time elapsed
  2888. +when the end of the section is reached, and increments the frequency
  2889. +counter of that latency value - irrespective of whether any concurrently
  2890. +running process is affected by latency or not.
  2891. +- Configuration items (in the Kernel hacking/Tracers submenu)
  2892. + CONFIG_INTERRUPT_OFF_LATENCY
  2893. + CONFIG_PREEMPT_OFF_LATENCY
  2894. +
  2895. +
  2896. +* Effective latencies
  2897. +
  2898. +Effective latencies are actually occuring during wakeup of a process. To
  2899. +determine effective latencies, the kernel stores the time stamp when a
  2900. +process is scheduled to be woken up, and determines the duration of the
  2901. +wakeup time shortly before control is passed over to this process. Note
  2902. +that the apparent latency in user space may be somewhat longer, since the
  2903. +process may be interrupted after control is passed over to it but before
  2904. +the execution in user space takes place. Simply measuring the interval
  2905. +between enqueuing and wakeup may also not appropriate in cases when a
  2906. +process is scheduled as a result of a timer expiration. The timer may have
  2907. +missed its deadline, e.g. due to disabled interrupts, but this latency
  2908. +would not be registered. Therefore, the offsets of missed timers are
  2909. +recorded in a separate histogram. If both wakeup latency and missed timer
  2910. +offsets are configured and enabled, a third histogram may be enabled that
  2911. +records the overall latency as a sum of the timer latency, if any, and the
  2912. +wakeup latency. This histogram is called "timerandwakeup".
  2913. +- Configuration items (in the Kernel hacking/Tracers submenu)
  2914. + CONFIG_WAKEUP_LATENCY
  2915. + CONFIG_MISSED_TIMER_OFSETS
  2916. +
  2917. +
  2918. +* Usage
  2919. +
  2920. +The interface to the administration of the latency histograms is located
  2921. +in the debugfs file system. To mount it, either enter
  2922. +
  2923. +mount -t sysfs nodev /sys
  2924. +mount -t debugfs nodev /sys/kernel/debug
  2925. +
  2926. +from shell command line level, or add
  2927. +
  2928. +nodev /sys sysfs defaults 0 0
  2929. +nodev /sys/kernel/debug debugfs defaults 0 0
  2930. +
  2931. +to the file /etc/fstab. All latency histogram related files are then
  2932. +available in the directory /sys/kernel/debug/tracing/latency_hist. A
  2933. +particular histogram type is enabled by writing non-zero to the related
  2934. +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory.
  2935. +Select "preemptirqsoff" for the histograms of potential sources of
  2936. +latencies and "wakeup" for histograms of effective latencies etc. The
  2937. +histogram data - one per CPU - are available in the files
  2938. +
  2939. +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx
  2940. +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx
  2941. +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx
  2942. +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx
  2943. +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx
  2944. +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx
  2945. +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx
  2946. +
  2947. +The histograms are reset by writing non-zero to the file "reset" in a
  2948. +particular latency directory. To reset all latency data, use
  2949. +
  2950. +#!/bin/sh
  2951. +
  2952. +TRACINGDIR=/sys/kernel/debug/tracing
  2953. +HISTDIR=$TRACINGDIR/latency_hist
  2954. +
  2955. +if test -d $HISTDIR
  2956. +then
  2957. + cd $HISTDIR
  2958. + for i in `find . | grep /reset$`
  2959. + do
  2960. + echo 1 >$i
  2961. + done
  2962. +fi
  2963. +
  2964. +
  2965. +* Data format
  2966. +
  2967. +Latency data are stored with a resolution of one microsecond. The
  2968. +maximum latency is 10,240 microseconds. The data are only valid, if the
  2969. +overflow register is empty. Every output line contains the latency in
  2970. +microseconds in the first row and the number of samples in the second
  2971. +row. To display only lines with a positive latency count, use, for
  2972. +example,
  2973. +
  2974. +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0
  2975. +
  2976. +#Minimum latency: 0 microseconds.
  2977. +#Average latency: 0 microseconds.
  2978. +#Maximum latency: 25 microseconds.
  2979. +#Total samples: 3104770694
  2980. +#There are 0 samples greater or equal than 10240 microseconds
  2981. +#usecs samples
  2982. + 0 2984486876
  2983. + 1 49843506
  2984. + 2 58219047
  2985. + 3 5348126
  2986. + 4 2187960
  2987. + 5 3388262
  2988. + 6 959289
  2989. + 7 208294
  2990. + 8 40420
  2991. + 9 4485
  2992. + 10 14918
  2993. + 11 18340
  2994. + 12 25052
  2995. + 13 19455
  2996. + 14 5602
  2997. + 15 969
  2998. + 16 47
  2999. + 17 18
  3000. + 18 14
  3001. + 19 1
  3002. + 20 3
  3003. + 21 2
  3004. + 22 5
  3005. + 23 2
  3006. + 25 1
  3007. +
  3008. +
  3009. +* Wakeup latency of a selected process
  3010. +
  3011. +To only collect wakeup latency data of a particular process, write the
  3012. +PID of the requested process to
  3013. +
  3014. +/sys/kernel/debug/tracing/latency_hist/wakeup/pid
  3015. +
  3016. +PIDs are not considered, if this variable is set to 0.
  3017. +
  3018. +
  3019. +* Details of the process with the highest wakeup latency so far
  3020. +
  3021. +Selected data of the process that suffered from the highest wakeup
  3022. +latency that occurred in a particular CPU are available in the file
  3023. +
  3024. +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx.
  3025. +
  3026. +In addition, other relevant system data at the time when the
  3027. +latency occurred are given.
  3028. +
  3029. +The format of the data is (all in one line):
  3030. +<PID> <Priority> <Latency> (<Timeroffset>) <Command> \
  3031. +<- <PID> <Priority> <Command> <Timestamp>
  3032. +
  3033. +The value of <Timeroffset> is only relevant in the combined timer
  3034. +and wakeup latency recording. In the wakeup recording, it is
  3035. +always 0, in the missed_timer_offsets recording, it is the same
  3036. +as <Latency>.
  3037. +
  3038. +When retrospectively searching for the origin of a latency and
  3039. +tracing was not enabled, it may be helpful to know the name and
  3040. +some basic data of the task that (finally) was switching to the
  3041. +late real-tlme task. In addition to the victim's data, also the
  3042. +data of the possible culprit are therefore displayed after the
  3043. +"<-" symbol.
  3044. +
  3045. +Finally, the timestamp of the time when the latency occurred
  3046. +in <seconds>.<microseconds> after the most recent system boot
  3047. +is provided.
  3048. +
  3049. +These data are also reset when the wakeup histogram is reset.
  3050. diff -Nur linux-4.8.15.orig/drivers/acpi/acpica/acglobal.h linux-4.8.15/drivers/acpi/acpica/acglobal.h
  3051. --- linux-4.8.15.orig/drivers/acpi/acpica/acglobal.h 2016-12-15 17:50:48.000000000 +0100
  3052. +++ linux-4.8.15/drivers/acpi/acpica/acglobal.h 2017-01-01 17:07:13.707277269 +0100
  3053. @@ -116,7 +116,7 @@
  3054. * interrupt level
  3055. */
  3056. ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */
  3057. -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
  3058. +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
  3059. ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock);
  3060. /* Mutex for _OSI support */
  3061. diff -Nur linux-4.8.15.orig/drivers/acpi/acpica/hwregs.c linux-4.8.15/drivers/acpi/acpica/hwregs.c
  3062. --- linux-4.8.15.orig/drivers/acpi/acpica/hwregs.c 2016-12-15 17:50:48.000000000 +0100
  3063. +++ linux-4.8.15/drivers/acpi/acpica/hwregs.c 2017-01-01 17:07:13.719278046 +0100
  3064. @@ -363,14 +363,14 @@
  3065. ACPI_BITMASK_ALL_FIXED_STATUS,
  3066. ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address)));
  3067. - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
  3068. + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
  3069. /* Clear the fixed events in PM1 A/B */
  3070. status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
  3071. ACPI_BITMASK_ALL_FIXED_STATUS);
  3072. - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
  3073. + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
  3074. if (ACPI_FAILURE(status)) {
  3075. goto exit;
  3076. diff -Nur linux-4.8.15.orig/drivers/acpi/acpica/hwxface.c linux-4.8.15/drivers/acpi/acpica/hwxface.c
  3077. --- linux-4.8.15.orig/drivers/acpi/acpica/hwxface.c 2016-12-15 17:50:48.000000000 +0100
  3078. +++ linux-4.8.15/drivers/acpi/acpica/hwxface.c 2017-01-01 17:07:13.719278046 +0100
  3079. @@ -373,7 +373,7 @@
  3080. return_ACPI_STATUS(AE_BAD_PARAMETER);
  3081. }
  3082. - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
  3083. + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
  3084. /*
  3085. * At this point, we know that the parent register is one of the
  3086. @@ -434,7 +434,7 @@
  3087. unlock_and_exit:
  3088. - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
  3089. + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
  3090. return_ACPI_STATUS(status);
  3091. }
  3092. diff -Nur linux-4.8.15.orig/drivers/acpi/acpica/utmutex.c linux-4.8.15/drivers/acpi/acpica/utmutex.c
  3093. --- linux-4.8.15.orig/drivers/acpi/acpica/utmutex.c 2016-12-15 17:50:48.000000000 +0100
  3094. +++ linux-4.8.15/drivers/acpi/acpica/utmutex.c 2017-01-01 17:07:13.727278551 +0100
  3095. @@ -88,7 +88,7 @@
  3096. return_ACPI_STATUS (status);
  3097. }
  3098. - status = acpi_os_create_lock (&acpi_gbl_hardware_lock);
  3099. + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock);
  3100. if (ACPI_FAILURE (status)) {
  3101. return_ACPI_STATUS (status);
  3102. }
  3103. @@ -145,7 +145,7 @@
  3104. /* Delete the spinlocks */
  3105. acpi_os_delete_lock(acpi_gbl_gpe_lock);
  3106. - acpi_os_delete_lock(acpi_gbl_hardware_lock);
  3107. + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock);
  3108. acpi_os_delete_lock(acpi_gbl_reference_count_lock);
  3109. /* Delete the reader/writer lock */
  3110. diff -Nur linux-4.8.15.orig/drivers/ata/libata-sff.c linux-4.8.15/drivers/ata/libata-sff.c
  3111. --- linux-4.8.15.orig/drivers/ata/libata-sff.c 2016-12-15 17:50:48.000000000 +0100
  3112. +++ linux-4.8.15/drivers/ata/libata-sff.c 2017-01-01 17:07:13.759280614 +0100
  3113. @@ -678,9 +678,9 @@
  3114. unsigned long flags;
  3115. unsigned int consumed;
  3116. - local_irq_save(flags);
  3117. + local_irq_save_nort(flags);
  3118. consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
  3119. - local_irq_restore(flags);
  3120. + local_irq_restore_nort(flags);
  3121. return consumed;
  3122. }
  3123. @@ -719,7 +719,7 @@
  3124. unsigned long flags;
  3125. /* FIXME: use a bounce buffer */
  3126. - local_irq_save(flags);
  3127. + local_irq_save_nort(flags);
  3128. buf = kmap_atomic(page);
  3129. /* do the actual data transfer */
  3130. @@ -727,7 +727,7 @@
  3131. do_write);
  3132. kunmap_atomic(buf);
  3133. - local_irq_restore(flags);
  3134. + local_irq_restore_nort(flags);
  3135. } else {
  3136. buf = page_address(page);
  3137. ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
  3138. @@ -864,7 +864,7 @@
  3139. unsigned long flags;
  3140. /* FIXME: use bounce buffer */
  3141. - local_irq_save(flags);
  3142. + local_irq_save_nort(flags);
  3143. buf = kmap_atomic(page);
  3144. /* do the actual data transfer */
  3145. @@ -872,7 +872,7 @@
  3146. count, rw);
  3147. kunmap_atomic(buf);
  3148. - local_irq_restore(flags);
  3149. + local_irq_restore_nort(flags);
  3150. } else {
  3151. buf = page_address(page);
  3152. consumed = ap->ops->sff_data_xfer(dev, buf + offset,
  3153. diff -Nur linux-4.8.15.orig/drivers/block/zram/zcomp.c linux-4.8.15/drivers/block/zram/zcomp.c
  3154. --- linux-4.8.15.orig/drivers/block/zram/zcomp.c 2016-12-15 17:50:48.000000000 +0100
  3155. +++ linux-4.8.15/drivers/block/zram/zcomp.c 2017-01-01 17:07:13.815284222 +0100
  3156. @@ -118,12 +118,19 @@
  3157. struct zcomp_strm *zcomp_stream_get(struct zcomp *comp)
  3158. {
  3159. - return *get_cpu_ptr(comp->stream);
  3160. + struct zcomp_strm *zstrm;
  3161. +
  3162. + zstrm = *this_cpu_ptr(comp->stream);
  3163. + spin_lock(&zstrm->zcomp_lock);
  3164. + return zstrm;
  3165. }
  3166. void zcomp_stream_put(struct zcomp *comp)
  3167. {
  3168. - put_cpu_ptr(comp->stream);
  3169. + struct zcomp_strm *zstrm;
  3170. +
  3171. + zstrm = *this_cpu_ptr(comp->stream);
  3172. + spin_unlock(&zstrm->zcomp_lock);
  3173. }
  3174. int zcomp_compress(struct zcomp_strm *zstrm,
  3175. @@ -174,6 +181,7 @@
  3176. pr_err("Can't allocate a compression stream\n");
  3177. return NOTIFY_BAD;
  3178. }
  3179. + spin_lock_init(&zstrm->zcomp_lock);
  3180. *per_cpu_ptr(comp->stream, cpu) = zstrm;
  3181. break;
  3182. case CPU_DEAD:
  3183. diff -Nur linux-4.8.15.orig/drivers/block/zram/zcomp.h linux-4.8.15/drivers/block/zram/zcomp.h
  3184. --- linux-4.8.15.orig/drivers/block/zram/zcomp.h 2016-12-15 17:50:48.000000000 +0100
  3185. +++ linux-4.8.15/drivers/block/zram/zcomp.h 2017-01-01 17:07:13.823284735 +0100
  3186. @@ -14,6 +14,7 @@
  3187. /* compression/decompression buffer */
  3188. void *buffer;
  3189. struct crypto_comp *tfm;
  3190. + spinlock_t zcomp_lock;
  3191. };
  3192. /* dynamic per-device compression frontend */
  3193. diff -Nur linux-4.8.15.orig/drivers/block/zram/zram_drv.c linux-4.8.15/drivers/block/zram/zram_drv.c
  3194. --- linux-4.8.15.orig/drivers/block/zram/zram_drv.c 2016-12-15 17:50:48.000000000 +0100
  3195. +++ linux-4.8.15/drivers/block/zram/zram_drv.c 2017-01-01 17:07:13.831285255 +0100
  3196. @@ -519,6 +519,8 @@
  3197. goto out_error;
  3198. }
  3199. + zram_meta_init_table_locks(meta, disksize);
  3200. +
  3201. return meta;
  3202. out_error:
  3203. @@ -566,28 +568,28 @@
  3204. struct zram_meta *meta = zram->meta;
  3205. unsigned long handle;
  3206. unsigned int size;
  3207. + struct zcomp_strm *zstrm;
  3208. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3209. + zram_lock_table(&meta->table[index]);
  3210. handle = meta->table[index].handle;
  3211. size = zram_get_obj_size(meta, index);
  3212. if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
  3213. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3214. + zram_unlock_table(&meta->table[index]);
  3215. clear_page(mem);
  3216. return 0;
  3217. }
  3218. + zstrm = zcomp_stream_get(zram->comp);
  3219. cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
  3220. if (size == PAGE_SIZE) {
  3221. copy_page(mem, cmem);
  3222. } else {
  3223. - struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
  3224. -
  3225. ret = zcomp_decompress(zstrm, cmem, size, mem);
  3226. - zcomp_stream_put(zram->comp);
  3227. }
  3228. zs_unmap_object(meta->mem_pool, handle);
  3229. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3230. + zcomp_stream_put(zram->comp);
  3231. + zram_unlock_table(&meta->table[index]);
  3232. /* Should NEVER happen. Return bio error if it does. */
  3233. if (unlikely(ret)) {
  3234. @@ -607,14 +609,14 @@
  3235. struct zram_meta *meta = zram->meta;
  3236. page = bvec->bv_page;
  3237. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3238. + zram_lock_table(&meta->table[index]);
  3239. if (unlikely(!meta->table[index].handle) ||
  3240. zram_test_flag(meta, index, ZRAM_ZERO)) {
  3241. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3242. + zram_unlock_table(&meta->table[index]);
  3243. handle_zero_page(bvec);
  3244. return 0;
  3245. }
  3246. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3247. + zram_unlock_table(&meta->table[index]);
  3248. if (is_partial_io(bvec))
  3249. /* Use a temporary buffer to decompress the page */
  3250. @@ -691,10 +693,10 @@
  3251. if (user_mem)
  3252. kunmap_atomic(user_mem);
  3253. /* Free memory associated with this sector now. */
  3254. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3255. + zram_lock_table(&meta->table[index]);
  3256. zram_free_page(zram, index);
  3257. zram_set_flag(meta, index, ZRAM_ZERO);
  3258. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3259. + zram_unlock_table(&meta->table[index]);
  3260. atomic64_inc(&zram->stats.zero_pages);
  3261. ret = 0;
  3262. @@ -785,12 +787,12 @@
  3263. * Free memory associated with this sector
  3264. * before overwriting unused sectors.
  3265. */
  3266. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3267. + zram_lock_table(&meta->table[index]);
  3268. zram_free_page(zram, index);
  3269. meta->table[index].handle = handle;
  3270. zram_set_obj_size(meta, index, clen);
  3271. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3272. + zram_unlock_table(&meta->table[index]);
  3273. /* Update stats */
  3274. atomic64_add(clen, &zram->stats.compr_data_size);
  3275. @@ -833,9 +835,9 @@
  3276. }
  3277. while (n >= PAGE_SIZE) {
  3278. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3279. + zram_lock_table(&meta->table[index]);
  3280. zram_free_page(zram, index);
  3281. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3282. + zram_unlock_table(&meta->table[index]);
  3283. atomic64_inc(&zram->stats.notify_free);
  3284. index++;
  3285. n -= PAGE_SIZE;
  3286. @@ -964,9 +966,9 @@
  3287. zram = bdev->bd_disk->private_data;
  3288. meta = zram->meta;
  3289. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3290. + zram_lock_table(&meta->table[index]);
  3291. zram_free_page(zram, index);
  3292. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3293. + zram_unlock_table(&meta->table[index]);
  3294. atomic64_inc(&zram->stats.notify_free);
  3295. }
  3296. diff -Nur linux-4.8.15.orig/drivers/block/zram/zram_drv.h linux-4.8.15/drivers/block/zram/zram_drv.h
  3297. --- linux-4.8.15.orig/drivers/block/zram/zram_drv.h 2016-12-15 17:50:48.000000000 +0100
  3298. +++ linux-4.8.15/drivers/block/zram/zram_drv.h 2017-01-01 17:07:13.831285255 +0100
  3299. @@ -73,6 +73,9 @@
  3300. struct zram_table_entry {
  3301. unsigned long handle;
  3302. unsigned long value;
  3303. +#ifdef CONFIG_PREEMPT_RT_BASE
  3304. + spinlock_t lock;
  3305. +#endif
  3306. };
  3307. struct zram_stats {
  3308. @@ -120,4 +123,42 @@
  3309. */
  3310. bool claim; /* Protected by bdev->bd_mutex */
  3311. };
  3312. +
  3313. +#ifndef CONFIG_PREEMPT_RT_BASE
  3314. +static inline void zram_lock_table(struct zram_table_entry *table)
  3315. +{
  3316. + bit_spin_lock(ZRAM_ACCESS, &table->value);
  3317. +}
  3318. +
  3319. +static inline void zram_unlock_table(struct zram_table_entry *table)
  3320. +{
  3321. + bit_spin_unlock(ZRAM_ACCESS, &table->value);
  3322. +}
  3323. +
  3324. +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { }
  3325. +#else /* CONFIG_PREEMPT_RT_BASE */
  3326. +static inline void zram_lock_table(struct zram_table_entry *table)
  3327. +{
  3328. + spin_lock(&table->lock);
  3329. + __set_bit(ZRAM_ACCESS, &table->value);
  3330. +}
  3331. +
  3332. +static inline void zram_unlock_table(struct zram_table_entry *table)
  3333. +{
  3334. + __clear_bit(ZRAM_ACCESS, &table->value);
  3335. + spin_unlock(&table->lock);
  3336. +}
  3337. +
  3338. +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize)
  3339. +{
  3340. + size_t num_pages = disksize >> PAGE_SHIFT;
  3341. + size_t index;
  3342. +
  3343. + for (index = 0; index < num_pages; index++) {
  3344. + spinlock_t *lock = &meta->table[index].lock;
  3345. + spin_lock_init(lock);
  3346. + }
  3347. +}
  3348. +#endif /* CONFIG_PREEMPT_RT_BASE */
  3349. +
  3350. #endif
  3351. diff -Nur linux-4.8.15.orig/drivers/char/random.c linux-4.8.15/drivers/char/random.c
  3352. --- linux-4.8.15.orig/drivers/char/random.c 2016-12-15 17:50:48.000000000 +0100
  3353. +++ linux-4.8.15/drivers/char/random.c 2017-01-01 17:07:13.863287315 +0100
  3354. @@ -1028,8 +1028,6 @@
  3355. } sample;
  3356. long delta, delta2, delta3;
  3357. - preempt_disable();
  3358. -
  3359. sample.jiffies = jiffies;
  3360. sample.cycles = random_get_entropy();
  3361. sample.num = num;
  3362. @@ -1070,7 +1068,6 @@
  3363. */
  3364. credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
  3365. }
  3366. - preempt_enable();
  3367. }
  3368. void add_input_randomness(unsigned int type, unsigned int code,
  3369. @@ -1123,28 +1120,27 @@
  3370. return *(ptr + f->reg_idx++);
  3371. }
  3372. -void add_interrupt_randomness(int irq, int irq_flags)
  3373. +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip)
  3374. {
  3375. struct entropy_store *r;
  3376. struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
  3377. - struct pt_regs *regs = get_irq_regs();
  3378. unsigned long now = jiffies;
  3379. cycles_t cycles = random_get_entropy();
  3380. __u32 c_high, j_high;
  3381. - __u64 ip;
  3382. unsigned long seed;
  3383. int credit = 0;
  3384. if (cycles == 0)
  3385. - cycles = get_reg(fast_pool, regs);
  3386. + cycles = get_reg(fast_pool, NULL);
  3387. c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
  3388. j_high = (sizeof(now) > 4) ? now >> 32 : 0;
  3389. fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
  3390. fast_pool->pool[1] ^= now ^ c_high;
  3391. - ip = regs ? instruction_pointer(regs) : _RET_IP_;
  3392. + if (!ip)
  3393. + ip = _RET_IP_;
  3394. fast_pool->pool[2] ^= ip;
  3395. fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 :
  3396. - get_reg(fast_pool, regs);
  3397. + get_reg(fast_pool, NULL);
  3398. fast_mix(fast_pool);
  3399. add_interrupt_bench(cycles);
  3400. diff -Nur linux-4.8.15.orig/drivers/clocksource/tcb_clksrc.c linux-4.8.15/drivers/clocksource/tcb_clksrc.c
  3401. --- linux-4.8.15.orig/drivers/clocksource/tcb_clksrc.c 2016-12-15 17:50:48.000000000 +0100
  3402. +++ linux-4.8.15/drivers/clocksource/tcb_clksrc.c 2017-01-01 17:07:13.903289890 +0100
  3403. @@ -23,8 +23,7 @@
  3404. * this 32 bit free-running counter. the second channel is not used.
  3405. *
  3406. * - The third channel may be used to provide a 16-bit clockevent
  3407. - * source, used in either periodic or oneshot mode. This runs
  3408. - * at 32 KiHZ, and can handle delays of up to two seconds.
  3409. + * source, used in either periodic or oneshot mode.
  3410. *
  3411. * A boot clocksource and clockevent source are also currently needed,
  3412. * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
  3413. @@ -74,6 +73,8 @@
  3414. struct tc_clkevt_device {
  3415. struct clock_event_device clkevt;
  3416. struct clk *clk;
  3417. + bool clk_enabled;
  3418. + u32 freq;
  3419. void __iomem *regs;
  3420. };
  3421. @@ -82,15 +83,26 @@
  3422. return container_of(clkevt, struct tc_clkevt_device, clkevt);
  3423. }
  3424. -/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
  3425. - * because using one of the divided clocks would usually mean the
  3426. - * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
  3427. - *
  3428. - * A divided clock could be good for high resolution timers, since
  3429. - * 30.5 usec resolution can seem "low".
  3430. - */
  3431. static u32 timer_clock;
  3432. +static void tc_clk_disable(struct clock_event_device *d)
  3433. +{
  3434. + struct tc_clkevt_device *tcd = to_tc_clkevt(d);
  3435. +
  3436. + clk_disable(tcd->clk);
  3437. + tcd->clk_enabled = false;
  3438. +}
  3439. +
  3440. +static void tc_clk_enable(struct clock_event_device *d)
  3441. +{
  3442. + struct tc_clkevt_device *tcd = to_tc_clkevt(d);
  3443. +
  3444. + if (tcd->clk_enabled)
  3445. + return;
  3446. + clk_enable(tcd->clk);
  3447. + tcd->clk_enabled = true;
  3448. +}
  3449. +
  3450. static int tc_shutdown(struct clock_event_device *d)
  3451. {
  3452. struct tc_clkevt_device *tcd = to_tc_clkevt(d);
  3453. @@ -98,8 +110,14 @@
  3454. __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR));
  3455. __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
  3456. + return 0;
  3457. +}
  3458. +
  3459. +static int tc_shutdown_clk_off(struct clock_event_device *d)
  3460. +{
  3461. + tc_shutdown(d);
  3462. if (!clockevent_state_detached(d))
  3463. - clk_disable(tcd->clk);
  3464. + tc_clk_disable(d);
  3465. return 0;
  3466. }
  3467. @@ -112,9 +130,9 @@
  3468. if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
  3469. tc_shutdown(d);
  3470. - clk_enable(tcd->clk);
  3471. + tc_clk_enable(d);
  3472. - /* slow clock, count up to RC, then irq and stop */
  3473. + /* count up to RC, then irq and stop */
  3474. __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE |
  3475. ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR));
  3476. __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
  3477. @@ -134,12 +152,12 @@
  3478. /* By not making the gentime core emulate periodic mode on top
  3479. * of oneshot, we get lower overhead and improved accuracy.
  3480. */
  3481. - clk_enable(tcd->clk);
  3482. + tc_clk_enable(d);
  3483. - /* slow clock, count up to RC, then irq and restart */
  3484. + /* count up to RC, then irq and restart */
  3485. __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
  3486. regs + ATMEL_TC_REG(2, CMR));
  3487. - __raw_writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
  3488. + __raw_writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
  3489. /* Enable clock and interrupts on RC compare */
  3490. __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
  3491. @@ -166,9 +184,13 @@
  3492. .features = CLOCK_EVT_FEAT_PERIODIC |
  3493. CLOCK_EVT_FEAT_ONESHOT,
  3494. /* Should be lower than at91rm9200's system timer */
  3495. +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  3496. .rating = 125,
  3497. +#else
  3498. + .rating = 200,
  3499. +#endif
  3500. .set_next_event = tc_next_event,
  3501. - .set_state_shutdown = tc_shutdown,
  3502. + .set_state_shutdown = tc_shutdown_clk_off,
  3503. .set_state_periodic = tc_set_periodic,
  3504. .set_state_oneshot = tc_set_oneshot,
  3505. },
  3506. @@ -188,8 +210,9 @@
  3507. return IRQ_NONE;
  3508. }
  3509. -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
  3510. +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
  3511. {
  3512. + unsigned divisor = atmel_tc_divisors[divisor_idx];
  3513. int ret;
  3514. struct clk *t2_clk = tc->clk[2];
  3515. int irq = tc->irq[2];
  3516. @@ -210,7 +233,11 @@
  3517. clkevt.regs = tc->regs;
  3518. clkevt.clk = t2_clk;
  3519. - timer_clock = clk32k_divisor_idx;
  3520. + timer_clock = divisor_idx;
  3521. + if (!divisor)
  3522. + clkevt.freq = 32768;
  3523. + else
  3524. + clkevt.freq = clk_get_rate(t2_clk) / divisor;
  3525. clkevt.clkevt.cpumask = cpumask_of(0);
  3526. @@ -221,7 +248,7 @@
  3527. return ret;
  3528. }
  3529. - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
  3530. + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff);
  3531. return ret;
  3532. }
  3533. @@ -358,7 +385,11 @@
  3534. goto err_disable_t1;
  3535. /* channel 2: periodic and oneshot timer support */
  3536. +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  3537. ret = setup_clkevents(tc, clk32k_divisor_idx);
  3538. +#else
  3539. + ret = setup_clkevents(tc, best_divisor_idx);
  3540. +#endif
  3541. if (ret)
  3542. goto err_unregister_clksrc;
  3543. diff -Nur linux-4.8.15.orig/drivers/clocksource/timer-atmel-pit.c linux-4.8.15/drivers/clocksource/timer-atmel-pit.c
  3544. --- linux-4.8.15.orig/drivers/clocksource/timer-atmel-pit.c 2016-12-15 17:50:48.000000000 +0100
  3545. +++ linux-4.8.15/drivers/clocksource/timer-atmel-pit.c 2017-01-01 17:07:13.911290405 +0100
  3546. @@ -46,6 +46,7 @@
  3547. u32 cycle;
  3548. u32 cnt;
  3549. unsigned int irq;
  3550. + bool irq_requested;
  3551. struct clk *mck;
  3552. };
  3553. @@ -96,15 +97,29 @@
  3554. /* disable irq, leaving the clocksource active */
  3555. pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN);
  3556. + if (data->irq_requested) {
  3557. + free_irq(data->irq, data);
  3558. + data->irq_requested = false;
  3559. + }
  3560. return 0;
  3561. }
  3562. +static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id);
  3563. /*
  3564. * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16)
  3565. */
  3566. static int pit_clkevt_set_periodic(struct clock_event_device *dev)
  3567. {
  3568. struct pit_data *data = clkevt_to_pit_data(dev);
  3569. + int ret;
  3570. +
  3571. + ret = request_irq(data->irq, at91sam926x_pit_interrupt,
  3572. + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3573. + "at91_tick", data);
  3574. + if (ret)
  3575. + panic(pr_fmt("Unable to setup IRQ\n"));
  3576. +
  3577. + data->irq_requested = true;
  3578. /* update clocksource counter */
  3579. data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR));
  3580. @@ -211,15 +226,6 @@
  3581. return ret;
  3582. }
  3583. - /* Set up irq handler */
  3584. - ret = request_irq(data->irq, at91sam926x_pit_interrupt,
  3585. - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3586. - "at91_tick", data);
  3587. - if (ret) {
  3588. - pr_err("Unable to setup IRQ\n");
  3589. - return ret;
  3590. - }
  3591. -
  3592. /* Set up and register clockevents */
  3593. data->clkevt.name = "pit";
  3594. data->clkevt.features = CLOCK_EVT_FEAT_PERIODIC;
  3595. diff -Nur linux-4.8.15.orig/drivers/clocksource/timer-atmel-st.c linux-4.8.15/drivers/clocksource/timer-atmel-st.c
  3596. --- linux-4.8.15.orig/drivers/clocksource/timer-atmel-st.c 2016-12-15 17:50:48.000000000 +0100
  3597. +++ linux-4.8.15/drivers/clocksource/timer-atmel-st.c 2017-01-01 17:07:13.911290405 +0100
  3598. @@ -115,18 +115,29 @@
  3599. last_crtr = read_CRTR();
  3600. }
  3601. +static int atmel_st_irq;
  3602. +
  3603. static int clkevt32k_shutdown(struct clock_event_device *evt)
  3604. {
  3605. clkdev32k_disable_and_flush_irq();
  3606. irqmask = 0;
  3607. regmap_write(regmap_st, AT91_ST_IER, irqmask);
  3608. + free_irq(atmel_st_irq, regmap_st);
  3609. return 0;
  3610. }
  3611. static int clkevt32k_set_oneshot(struct clock_event_device *dev)
  3612. {
  3613. + int ret;
  3614. +
  3615. clkdev32k_disable_and_flush_irq();
  3616. + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt,
  3617. + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3618. + "at91_tick", regmap_st);
  3619. + if (ret)
  3620. + panic(pr_fmt("Unable to setup IRQ\n"));
  3621. +
  3622. /*
  3623. * ALM for oneshot irqs, set by next_event()
  3624. * before 32 seconds have passed.
  3625. @@ -139,8 +150,16 @@
  3626. static int clkevt32k_set_periodic(struct clock_event_device *dev)
  3627. {
  3628. + int ret;
  3629. +
  3630. clkdev32k_disable_and_flush_irq();
  3631. + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt,
  3632. + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3633. + "at91_tick", regmap_st);
  3634. + if (ret)
  3635. + panic(pr_fmt("Unable to setup IRQ\n"));
  3636. +
  3637. /* PIT for periodic irqs; fixed rate of 1/HZ */
  3638. irqmask = AT91_ST_PITS;
  3639. regmap_write(regmap_st, AT91_ST_PIMR, timer_latch);
  3640. @@ -198,7 +217,7 @@
  3641. {
  3642. struct clk *sclk;
  3643. unsigned int sclk_rate, val;
  3644. - int irq, ret;
  3645. + int ret;
  3646. regmap_st = syscon_node_to_regmap(node);
  3647. if (IS_ERR(regmap_st)) {
  3648. @@ -212,21 +231,12 @@
  3649. regmap_read(regmap_st, AT91_ST_SR, &val);
  3650. /* Get the interrupts property */
  3651. - irq = irq_of_parse_and_map(node, 0);
  3652. - if (!irq) {
  3653. + atmel_st_irq = irq_of_parse_and_map(node, 0);
  3654. + if (!atmel_st_irq) {
  3655. pr_err("Unable to get IRQ from DT\n");
  3656. return -EINVAL;
  3657. }
  3658. - /* Make IRQs happen for the system timer */
  3659. - ret = request_irq(irq, at91rm9200_timer_interrupt,
  3660. - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  3661. - "at91_tick", regmap_st);
  3662. - if (ret) {
  3663. - pr_err("Unable to setup IRQ\n");
  3664. - return ret;
  3665. - }
  3666. -
  3667. sclk = of_clk_get(node, 0);
  3668. if (IS_ERR(sclk)) {
  3669. pr_err("Unable to get slow clock\n");
  3670. diff -Nur linux-4.8.15.orig/drivers/connector/cn_proc.c linux-4.8.15/drivers/connector/cn_proc.c
  3671. --- linux-4.8.15.orig/drivers/connector/cn_proc.c 2016-12-15 17:50:48.000000000 +0100
  3672. +++ linux-4.8.15/drivers/connector/cn_proc.c 2017-01-01 17:07:13.951292981 +0100
  3673. @@ -32,6 +32,7 @@
  3674. #include <linux/pid_namespace.h>
  3675. #include <linux/cn_proc.h>
  3676. +#include <linux/locallock.h>
  3677. /*
  3678. * Size of a cn_msg followed by a proc_event structure. Since the
  3679. @@ -54,10 +55,11 @@
  3680. /* proc_event_counts is used as the sequence number of the netlink message */
  3681. static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 };
  3682. +static DEFINE_LOCAL_IRQ_LOCK(send_msg_lock);
  3683. static inline void send_msg(struct cn_msg *msg)
  3684. {
  3685. - preempt_disable();
  3686. + local_lock(send_msg_lock);
  3687. msg->seq = __this_cpu_inc_return(proc_event_counts) - 1;
  3688. ((struct proc_event *)msg->data)->cpu = smp_processor_id();
  3689. @@ -70,7 +72,7 @@
  3690. */
  3691. cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT);
  3692. - preempt_enable();
  3693. + local_unlock(send_msg_lock);
  3694. }
  3695. void proc_fork_connector(struct task_struct *task)
  3696. diff -Nur linux-4.8.15.orig/drivers/cpufreq/Kconfig.x86 linux-4.8.15/drivers/cpufreq/Kconfig.x86
  3697. --- linux-4.8.15.orig/drivers/cpufreq/Kconfig.x86 2016-12-15 17:50:48.000000000 +0100
  3698. +++ linux-4.8.15/drivers/cpufreq/Kconfig.x86 2017-01-01 17:07:13.951292981 +0100
  3699. @@ -124,7 +124,7 @@
  3700. config X86_POWERNOW_K8
  3701. tristate "AMD Opteron/Athlon64 PowerNow!"
  3702. - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ
  3703. + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE
  3704. help
  3705. This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors.
  3706. Support for K10 and newer processors is now in acpi-cpufreq.
  3707. diff -Nur linux-4.8.15.orig/drivers/gpu/drm/i915/i915_gem_execbuffer.c linux-4.8.15/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  3708. --- linux-4.8.15.orig/drivers/gpu/drm/i915/i915_gem_execbuffer.c 2016-12-15 17:50:48.000000000 +0100
  3709. +++ linux-4.8.15/drivers/gpu/drm/i915/i915_gem_execbuffer.c 2017-01-01 17:07:13.983295041 +0100
  3710. @@ -1302,7 +1302,9 @@
  3711. if (ret)
  3712. return ret;
  3713. +#ifndef CONFIG_PREEMPT_RT_BASE
  3714. trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
  3715. +#endif
  3716. i915_gem_execbuffer_move_to_active(vmas, params->request);
  3717. diff -Nur linux-4.8.15.orig/drivers/gpu/drm/i915/i915_gem_shrinker.c linux-4.8.15/drivers/gpu/drm/i915/i915_gem_shrinker.c
  3718. --- linux-4.8.15.orig/drivers/gpu/drm/i915/i915_gem_shrinker.c 2016-12-15 17:50:48.000000000 +0100
  3719. +++ linux-4.8.15/drivers/gpu/drm/i915/i915_gem_shrinker.c 2017-01-01 17:07:13.987295307 +0100
  3720. @@ -40,7 +40,7 @@
  3721. if (!mutex_is_locked(mutex))
  3722. return false;
  3723. -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
  3724. +#if (defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)) && !defined(CONFIG_PREEMPT_RT_BASE)
  3725. return mutex->owner == task;
  3726. #else
  3727. /* Since UP may be pre-empted, we cannot assume that we own the lock */
  3728. diff -Nur linux-4.8.15.orig/drivers/gpu/drm/i915/i915_irq.c linux-4.8.15/drivers/gpu/drm/i915/i915_irq.c
  3729. --- linux-4.8.15.orig/drivers/gpu/drm/i915/i915_irq.c 2016-12-15 17:50:48.000000000 +0100
  3730. +++ linux-4.8.15/drivers/gpu/drm/i915/i915_irq.c 2017-01-01 17:07:14.007296588 +0100
  3731. @@ -812,6 +812,7 @@
  3732. spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  3733. /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
  3734. + preempt_disable_rt();
  3735. /* Get optional system timestamp before query. */
  3736. if (stime)
  3737. @@ -863,6 +864,7 @@
  3738. *etime = ktime_get();
  3739. /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
  3740. + preempt_enable_rt();
  3741. spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
  3742. diff -Nur linux-4.8.15.orig/drivers/gpu/drm/i915/intel_display.c linux-4.8.15/drivers/gpu/drm/i915/intel_display.c
  3743. --- linux-4.8.15.orig/drivers/gpu/drm/i915/intel_display.c 2016-12-15 17:50:48.000000000 +0100
  3744. +++ linux-4.8.15/drivers/gpu/drm/i915/intel_display.c 2017-01-01 17:07:14.079301234 +0100
  3745. @@ -11670,7 +11670,7 @@
  3746. struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
  3747. struct intel_flip_work *work;
  3748. - WARN_ON(!in_interrupt());
  3749. + WARN_ON_NONRT(!in_interrupt());
  3750. if (crtc == NULL)
  3751. return;
  3752. diff -Nur linux-4.8.15.orig/drivers/gpu/drm/i915/intel_sprite.c linux-4.8.15/drivers/gpu/drm/i915/intel_sprite.c
  3753. --- linux-4.8.15.orig/drivers/gpu/drm/i915/intel_sprite.c 2016-12-15 17:50:48.000000000 +0100
  3754. +++ linux-4.8.15/drivers/gpu/drm/i915/intel_sprite.c 2017-01-01 17:07:14.091301999 +0100
  3755. @@ -38,6 +38,7 @@
  3756. #include "intel_drv.h"
  3757. #include <drm/i915_drm.h>
  3758. #include "i915_drv.h"
  3759. +#include <linux/locallock.h>
  3760. static bool
  3761. format_is_yuv(uint32_t format)
  3762. @@ -64,6 +65,8 @@
  3763. 1000 * adjusted_mode->crtc_htotal);
  3764. }
  3765. +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock);
  3766. +
  3767. /**
  3768. * intel_pipe_update_start() - start update of a set of display registers
  3769. * @crtc: the crtc of which the registers are going to be updated
  3770. @@ -94,7 +97,7 @@
  3771. min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, 100);
  3772. max = vblank_start - 1;
  3773. - local_irq_disable();
  3774. + local_lock_irq(pipe_update_lock);
  3775. if (min <= 0 || max <= 0)
  3776. return;
  3777. @@ -124,11 +127,11 @@
  3778. break;
  3779. }
  3780. - local_irq_enable();
  3781. + local_unlock_irq(pipe_update_lock);
  3782. timeout = schedule_timeout(timeout);
  3783. - local_irq_disable();
  3784. + local_lock_irq(pipe_update_lock);
  3785. }
  3786. finish_wait(wq, &wait);
  3787. @@ -180,7 +183,7 @@
  3788. crtc->base.state->event = NULL;
  3789. }
  3790. - local_irq_enable();
  3791. + local_unlock_irq(pipe_update_lock);
  3792. if (crtc->debug.start_vbl_count &&
  3793. crtc->debug.start_vbl_count != end_vbl_count) {
  3794. diff -Nur linux-4.8.15.orig/drivers/gpu/drm/msm/msm_gem_shrinker.c linux-4.8.15/drivers/gpu/drm/msm/msm_gem_shrinker.c
  3795. --- linux-4.8.15.orig/drivers/gpu/drm/msm/msm_gem_shrinker.c 2016-12-15 17:50:48.000000000 +0100
  3796. +++ linux-4.8.15/drivers/gpu/drm/msm/msm_gem_shrinker.c 2017-01-01 17:07:14.095302264 +0100
  3797. @@ -23,7 +23,7 @@
  3798. if (!mutex_is_locked(mutex))
  3799. return false;
  3800. -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
  3801. +#if (defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)) && !defined(CONFIG_PREEMPT_RT_BASE)
  3802. return mutex->owner == task;
  3803. #else
  3804. /* Since UP may be pre-empted, we cannot assume that we own the lock */
  3805. diff -Nur linux-4.8.15.orig/drivers/gpu/drm/radeon/radeon_display.c linux-4.8.15/drivers/gpu/drm/radeon/radeon_display.c
  3806. --- linux-4.8.15.orig/drivers/gpu/drm/radeon/radeon_display.c 2016-12-15 17:50:48.000000000 +0100
  3807. +++ linux-4.8.15/drivers/gpu/drm/radeon/radeon_display.c 2017-01-01 17:07:14.135304831 +0100
  3808. @@ -1869,6 +1869,7 @@
  3809. struct radeon_device *rdev = dev->dev_private;
  3810. /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
  3811. + preempt_disable_rt();
  3812. /* Get optional system timestamp before query. */
  3813. if (stime)
  3814. @@ -1961,6 +1962,7 @@
  3815. *etime = ktime_get();
  3816. /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
  3817. + preempt_enable_rt();
  3818. /* Decode into vertical and horizontal scanout position. */
  3819. *vpos = position & 0x1fff;
  3820. diff -Nur linux-4.8.15.orig/drivers/hv/vmbus_drv.c linux-4.8.15/drivers/hv/vmbus_drv.c
  3821. --- linux-4.8.15.orig/drivers/hv/vmbus_drv.c 2016-12-15 17:50:48.000000000 +0100
  3822. +++ linux-4.8.15/drivers/hv/vmbus_drv.c 2017-01-01 17:07:14.187308183 +0100
  3823. @@ -761,6 +761,8 @@
  3824. void *page_addr;
  3825. struct hv_message *msg;
  3826. union hv_synic_event_flags *event;
  3827. + struct pt_regs *regs = get_irq_regs();
  3828. + u64 ip = regs ? instruction_pointer(regs) : 0;
  3829. bool handled = false;
  3830. page_addr = hv_context.synic_event_page[cpu];
  3831. @@ -808,7 +810,7 @@
  3832. tasklet_schedule(hv_context.msg_dpc[cpu]);
  3833. }
  3834. - add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
  3835. + add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, ip);
  3836. }
  3837. diff -Nur linux-4.8.15.orig/drivers/ide/alim15x3.c linux-4.8.15/drivers/ide/alim15x3.c
  3838. --- linux-4.8.15.orig/drivers/ide/alim15x3.c 2016-12-15 17:50:48.000000000 +0100
  3839. +++ linux-4.8.15/drivers/ide/alim15x3.c 2017-01-01 17:07:14.227310759 +0100
  3840. @@ -234,7 +234,7 @@
  3841. isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
  3842. - local_irq_save(flags);
  3843. + local_irq_save_nort(flags);
  3844. if (m5229_revision < 0xC2) {
  3845. /*
  3846. @@ -325,7 +325,7 @@
  3847. }
  3848. pci_dev_put(north);
  3849. pci_dev_put(isa_dev);
  3850. - local_irq_restore(flags);
  3851. + local_irq_restore_nort(flags);
  3852. return 0;
  3853. }
  3854. diff -Nur linux-4.8.15.orig/drivers/ide/hpt366.c linux-4.8.15/drivers/ide/hpt366.c
  3855. --- linux-4.8.15.orig/drivers/ide/hpt366.c 2016-12-15 17:50:48.000000000 +0100
  3856. +++ linux-4.8.15/drivers/ide/hpt366.c 2017-01-01 17:07:14.243311792 +0100
  3857. @@ -1236,7 +1236,7 @@
  3858. dma_old = inb(base + 2);
  3859. - local_irq_save(flags);
  3860. + local_irq_save_nort(flags);
  3861. dma_new = dma_old;
  3862. pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
  3863. @@ -1247,7 +1247,7 @@
  3864. if (dma_new != dma_old)
  3865. outb(dma_new, base + 2);
  3866. - local_irq_restore(flags);
  3867. + local_irq_restore_nort(flags);
  3868. printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n",
  3869. hwif->name, base, base + 7);
  3870. diff -Nur linux-4.8.15.orig/drivers/ide/ide-io.c linux-4.8.15/drivers/ide/ide-io.c
  3871. --- linux-4.8.15.orig/drivers/ide/ide-io.c 2016-12-15 17:50:48.000000000 +0100
  3872. +++ linux-4.8.15/drivers/ide/ide-io.c 2017-01-01 17:07:14.243311792 +0100
  3873. @@ -659,7 +659,7 @@
  3874. /* disable_irq_nosync ?? */
  3875. disable_irq(hwif->irq);
  3876. /* local CPU only, as if we were handling an interrupt */
  3877. - local_irq_disable();
  3878. + local_irq_disable_nort();
  3879. if (hwif->polling) {
  3880. startstop = handler(drive);
  3881. } else if (drive_is_ready(drive)) {
  3882. diff -Nur linux-4.8.15.orig/drivers/ide/ide-iops.c linux-4.8.15/drivers/ide/ide-iops.c
  3883. --- linux-4.8.15.orig/drivers/ide/ide-iops.c 2016-12-15 17:50:48.000000000 +0100
  3884. +++ linux-4.8.15/drivers/ide/ide-iops.c 2017-01-01 17:07:14.247312048 +0100
  3885. @@ -129,12 +129,12 @@
  3886. if ((stat & ATA_BUSY) == 0)
  3887. break;
  3888. - local_irq_restore(flags);
  3889. + local_irq_restore_nort(flags);
  3890. *rstat = stat;
  3891. return -EBUSY;
  3892. }
  3893. }
  3894. - local_irq_restore(flags);
  3895. + local_irq_restore_nort(flags);
  3896. }
  3897. /*
  3898. * Allow status to settle, then read it again.
  3899. diff -Nur linux-4.8.15.orig/drivers/ide/ide-io-std.c linux-4.8.15/drivers/ide/ide-io-std.c
  3900. --- linux-4.8.15.orig/drivers/ide/ide-io-std.c 2016-12-15 17:50:48.000000000 +0100
  3901. +++ linux-4.8.15/drivers/ide/ide-io-std.c 2017-01-01 17:07:14.243311792 +0100
  3902. @@ -175,7 +175,7 @@
  3903. unsigned long uninitialized_var(flags);
  3904. if ((io_32bit & 2) && !mmio) {
  3905. - local_irq_save(flags);
  3906. + local_irq_save_nort(flags);
  3907. ata_vlb_sync(io_ports->nsect_addr);
  3908. }
  3909. @@ -186,7 +186,7 @@
  3910. insl(data_addr, buf, words);
  3911. if ((io_32bit & 2) && !mmio)
  3912. - local_irq_restore(flags);
  3913. + local_irq_restore_nort(flags);
  3914. if (((len + 1) & 3) < 2)
  3915. return;
  3916. @@ -219,7 +219,7 @@
  3917. unsigned long uninitialized_var(flags);
  3918. if ((io_32bit & 2) && !mmio) {
  3919. - local_irq_save(flags);
  3920. + local_irq_save_nort(flags);
  3921. ata_vlb_sync(io_ports->nsect_addr);
  3922. }
  3923. @@ -230,7 +230,7 @@
  3924. outsl(data_addr, buf, words);
  3925. if ((io_32bit & 2) && !mmio)
  3926. - local_irq_restore(flags);
  3927. + local_irq_restore_nort(flags);
  3928. if (((len + 1) & 3) < 2)
  3929. return;
  3930. diff -Nur linux-4.8.15.orig/drivers/ide/ide-probe.c linux-4.8.15/drivers/ide/ide-probe.c
  3931. --- linux-4.8.15.orig/drivers/ide/ide-probe.c 2016-12-15 17:50:48.000000000 +0100
  3932. +++ linux-4.8.15/drivers/ide/ide-probe.c 2017-01-01 17:07:14.247312048 +0100
  3933. @@ -196,10 +196,10 @@
  3934. int bswap = 1;
  3935. /* local CPU only; some systems need this */
  3936. - local_irq_save(flags);
  3937. + local_irq_save_nort(flags);
  3938. /* read 512 bytes of id info */
  3939. hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
  3940. - local_irq_restore(flags);
  3941. + local_irq_restore_nort(flags);
  3942. drive->dev_flags |= IDE_DFLAG_ID_READ;
  3943. #ifdef DEBUG
  3944. diff -Nur linux-4.8.15.orig/drivers/ide/ide-taskfile.c linux-4.8.15/drivers/ide/ide-taskfile.c
  3945. --- linux-4.8.15.orig/drivers/ide/ide-taskfile.c 2016-12-15 17:50:48.000000000 +0100
  3946. +++ linux-4.8.15/drivers/ide/ide-taskfile.c 2017-01-01 17:07:14.255312569 +0100
  3947. @@ -250,7 +250,7 @@
  3948. page_is_high = PageHighMem(page);
  3949. if (page_is_high)
  3950. - local_irq_save(flags);
  3951. + local_irq_save_nort(flags);
  3952. buf = kmap_atomic(page) + offset;
  3953. @@ -271,7 +271,7 @@
  3954. kunmap_atomic(buf);
  3955. if (page_is_high)
  3956. - local_irq_restore(flags);
  3957. + local_irq_restore_nort(flags);
  3958. len -= nr_bytes;
  3959. }
  3960. @@ -414,7 +414,7 @@
  3961. }
  3962. if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
  3963. - local_irq_disable();
  3964. + local_irq_disable_nort();
  3965. ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
  3966. diff -Nur linux-4.8.15.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c linux-4.8.15/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  3967. --- linux-4.8.15.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2016-12-15 17:50:48.000000000 +0100
  3968. +++ linux-4.8.15/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2017-01-01 17:07:14.299315394 +0100
  3969. @@ -899,7 +899,7 @@
  3970. ipoib_dbg_mcast(priv, "restarting multicast task\n");
  3971. - local_irq_save(flags);
  3972. + local_irq_save_nort(flags);
  3973. netif_addr_lock(dev);
  3974. spin_lock(&priv->lock);
  3975. @@ -981,7 +981,7 @@
  3976. spin_unlock(&priv->lock);
  3977. netif_addr_unlock(dev);
  3978. - local_irq_restore(flags);
  3979. + local_irq_restore_nort(flags);
  3980. /*
  3981. * make sure the in-flight joins have finished before we attempt
  3982. diff -Nur linux-4.8.15.orig/drivers/input/gameport/gameport.c linux-4.8.15/drivers/input/gameport/gameport.c
  3983. --- linux-4.8.15.orig/drivers/input/gameport/gameport.c 2016-12-15 17:50:48.000000000 +0100
  3984. +++ linux-4.8.15/drivers/input/gameport/gameport.c 2017-01-01 17:07:14.343318229 +0100
  3985. @@ -91,13 +91,13 @@
  3986. tx = ~0;
  3987. for (i = 0; i < 50; i++) {
  3988. - local_irq_save(flags);
  3989. + local_irq_save_nort(flags);
  3990. t1 = ktime_get_ns();
  3991. for (t = 0; t < 50; t++)
  3992. gameport_read(gameport);
  3993. t2 = ktime_get_ns();
  3994. t3 = ktime_get_ns();
  3995. - local_irq_restore(flags);
  3996. + local_irq_restore_nort(flags);
  3997. udelay(i * 10);
  3998. t = (t2 - t1) - (t3 - t2);
  3999. if (t < tx)
  4000. @@ -124,12 +124,12 @@
  4001. tx = 1 << 30;
  4002. for(i = 0; i < 50; i++) {
  4003. - local_irq_save(flags);
  4004. + local_irq_save_nort(flags);
  4005. GET_TIME(t1);
  4006. for (t = 0; t < 50; t++) gameport_read(gameport);
  4007. GET_TIME(t2);
  4008. GET_TIME(t3);
  4009. - local_irq_restore(flags);
  4010. + local_irq_restore_nort(flags);
  4011. udelay(i * 10);
  4012. if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t;
  4013. }
  4014. @@ -148,11 +148,11 @@
  4015. tx = 1 << 30;
  4016. for(i = 0; i < 50; i++) {
  4017. - local_irq_save(flags);
  4018. + local_irq_save_nort(flags);
  4019. t1 = rdtsc();
  4020. for (t = 0; t < 50; t++) gameport_read(gameport);
  4021. t2 = rdtsc();
  4022. - local_irq_restore(flags);
  4023. + local_irq_restore_nort(flags);
  4024. udelay(i * 10);
  4025. if (t2 - t1 < tx) tx = t2 - t1;
  4026. }
  4027. diff -Nur linux-4.8.15.orig/drivers/iommu/amd_iommu.c linux-4.8.15/drivers/iommu/amd_iommu.c
  4028. --- linux-4.8.15.orig/drivers/iommu/amd_iommu.c 2016-12-15 17:50:48.000000000 +0100
  4029. +++ linux-4.8.15/drivers/iommu/amd_iommu.c 2017-01-01 17:07:14.403322093 +0100
  4030. @@ -1835,10 +1835,10 @@
  4031. int ret;
  4032. /*
  4033. - * Must be called with IRQs disabled. Warn here to detect early
  4034. - * when its not.
  4035. + * Must be called with IRQs disabled on a non RT kernel. Warn here to
  4036. + * detect early when its not.
  4037. */
  4038. - WARN_ON(!irqs_disabled());
  4039. + WARN_ON_NONRT(!irqs_disabled());
  4040. /* lock domain */
  4041. spin_lock(&domain->lock);
  4042. @@ -2006,10 +2006,10 @@
  4043. struct protection_domain *domain;
  4044. /*
  4045. - * Must be called with IRQs disabled. Warn here to detect early
  4046. - * when its not.
  4047. + * Must be called with IRQs disabled on a non RT kernel. Warn here to
  4048. + * detect early when its not.
  4049. */
  4050. - WARN_ON(!irqs_disabled());
  4051. + WARN_ON_NONRT(!irqs_disabled());
  4052. if (WARN_ON(!dev_data->domain))
  4053. return;
  4054. diff -Nur linux-4.8.15.orig/drivers/iommu/intel-iommu.c linux-4.8.15/drivers/iommu/intel-iommu.c
  4055. --- linux-4.8.15.orig/drivers/iommu/intel-iommu.c 2016-12-15 17:50:48.000000000 +0100
  4056. +++ linux-4.8.15/drivers/iommu/intel-iommu.c 2017-01-01 17:07:14.443324672 +0100
  4057. @@ -479,7 +479,7 @@
  4058. struct deferred_flush_table *tables;
  4059. };
  4060. -DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
  4061. +static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
  4062. /* bitmap for indexing intel_iommus */
  4063. static int g_num_of_iommus;
  4064. @@ -3649,10 +3649,8 @@
  4065. struct intel_iommu *iommu;
  4066. struct deferred_flush_entry *entry;
  4067. struct deferred_flush_data *flush_data;
  4068. - unsigned int cpuid;
  4069. - cpuid = get_cpu();
  4070. - flush_data = per_cpu_ptr(&deferred_flush, cpuid);
  4071. + flush_data = raw_cpu_ptr(&deferred_flush);
  4072. /* Flush all CPUs' entries to avoid deferring too much. If
  4073. * this becomes a bottleneck, can just flush us, and rely on
  4074. @@ -3685,8 +3683,6 @@
  4075. }
  4076. flush_data->size++;
  4077. spin_unlock_irqrestore(&flush_data->lock, flags);
  4078. -
  4079. - put_cpu();
  4080. }
  4081. static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
  4082. diff -Nur linux-4.8.15.orig/drivers/iommu/iova.c linux-4.8.15/drivers/iommu/iova.c
  4083. --- linux-4.8.15.orig/drivers/iommu/iova.c 2016-12-15 17:50:48.000000000 +0100
  4084. +++ linux-4.8.15/drivers/iommu/iova.c 2017-01-01 17:07:14.455325452 +0100
  4085. @@ -22,6 +22,7 @@
  4086. #include <linux/slab.h>
  4087. #include <linux/smp.h>
  4088. #include <linux/bitops.h>
  4089. +#include <linux/cpu.h>
  4090. static bool iova_rcache_insert(struct iova_domain *iovad,
  4091. unsigned long pfn,
  4092. @@ -420,10 +421,8 @@
  4093. /* Try replenishing IOVAs by flushing rcache. */
  4094. flushed_rcache = true;
  4095. - preempt_disable();
  4096. for_each_online_cpu(cpu)
  4097. free_cpu_cached_iovas(cpu, iovad);
  4098. - preempt_enable();
  4099. goto retry;
  4100. }
  4101. @@ -751,7 +750,7 @@
  4102. bool can_insert = false;
  4103. unsigned long flags;
  4104. - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
  4105. + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
  4106. spin_lock_irqsave(&cpu_rcache->lock, flags);
  4107. if (!iova_magazine_full(cpu_rcache->loaded)) {
  4108. @@ -781,7 +780,6 @@
  4109. iova_magazine_push(cpu_rcache->loaded, iova_pfn);
  4110. spin_unlock_irqrestore(&cpu_rcache->lock, flags);
  4111. - put_cpu_ptr(rcache->cpu_rcaches);
  4112. if (mag_to_free) {
  4113. iova_magazine_free_pfns(mag_to_free, iovad);
  4114. @@ -815,7 +813,7 @@
  4115. bool has_pfn = false;
  4116. unsigned long flags;
  4117. - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
  4118. + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
  4119. spin_lock_irqsave(&cpu_rcache->lock, flags);
  4120. if (!iova_magazine_empty(cpu_rcache->loaded)) {
  4121. @@ -837,7 +835,6 @@
  4122. iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
  4123. spin_unlock_irqrestore(&cpu_rcache->lock, flags);
  4124. - put_cpu_ptr(rcache->cpu_rcaches);
  4125. return iova_pfn;
  4126. }
  4127. diff -Nur linux-4.8.15.orig/drivers/leds/trigger/Kconfig linux-4.8.15/drivers/leds/trigger/Kconfig
  4128. --- linux-4.8.15.orig/drivers/leds/trigger/Kconfig 2016-12-15 17:50:48.000000000 +0100
  4129. +++ linux-4.8.15/drivers/leds/trigger/Kconfig 2017-01-01 17:07:14.495328021 +0100
  4130. @@ -69,7 +69,7 @@
  4131. config LEDS_TRIGGER_CPU
  4132. bool "LED CPU Trigger"
  4133. - depends on LEDS_TRIGGERS
  4134. + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE
  4135. help
  4136. This allows LEDs to be controlled by active CPUs. This shows
  4137. the active CPUs across an array of LEDs so you can see which
  4138. diff -Nur linux-4.8.15.orig/drivers/md/bcache/Kconfig linux-4.8.15/drivers/md/bcache/Kconfig
  4139. --- linux-4.8.15.orig/drivers/md/bcache/Kconfig 2016-12-15 17:50:48.000000000 +0100
  4140. +++ linux-4.8.15/drivers/md/bcache/Kconfig 2017-01-01 17:07:14.511329049 +0100
  4141. @@ -1,6 +1,7 @@
  4142. config BCACHE
  4143. tristate "Block device as cache"
  4144. + depends on !PREEMPT_RT_FULL
  4145. ---help---
  4146. Allows a block device to be used as cache for other devices; uses
  4147. a btree for indexing and the layout is optimized for SSDs.
  4148. diff -Nur linux-4.8.15.orig/drivers/md/dm-rq.c linux-4.8.15/drivers/md/dm-rq.c
  4149. --- linux-4.8.15.orig/drivers/md/dm-rq.c 2016-12-15 17:50:48.000000000 +0100
  4150. +++ linux-4.8.15/drivers/md/dm-rq.c 2017-01-01 17:07:14.515329315 +0100
  4151. @@ -811,7 +811,7 @@
  4152. /* Establish tio->ti before queuing work (map_tio_request) */
  4153. tio->ti = ti;
  4154. queue_kthread_work(&md->kworker, &tio->work);
  4155. - BUG_ON(!irqs_disabled());
  4156. + BUG_ON_NONRT(!irqs_disabled());
  4157. }
  4158. }
  4159. diff -Nur linux-4.8.15.orig/drivers/md/raid5.c linux-4.8.15/drivers/md/raid5.c
  4160. --- linux-4.8.15.orig/drivers/md/raid5.c 2016-12-15 17:50:48.000000000 +0100
  4161. +++ linux-4.8.15/drivers/md/raid5.c 2017-01-01 17:07:14.555331893 +0100
  4162. @@ -1928,8 +1928,9 @@
  4163. struct raid5_percpu *percpu;
  4164. unsigned long cpu;
  4165. - cpu = get_cpu();
  4166. + cpu = get_cpu_light();
  4167. percpu = per_cpu_ptr(conf->percpu, cpu);
  4168. + spin_lock(&percpu->lock);
  4169. if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
  4170. ops_run_biofill(sh);
  4171. overlap_clear++;
  4172. @@ -1985,7 +1986,8 @@
  4173. if (test_and_clear_bit(R5_Overlap, &dev->flags))
  4174. wake_up(&sh->raid_conf->wait_for_overlap);
  4175. }
  4176. - put_cpu();
  4177. + spin_unlock(&percpu->lock);
  4178. + put_cpu_light();
  4179. }
  4180. static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
  4181. @@ -6438,6 +6440,7 @@
  4182. __func__, cpu);
  4183. break;
  4184. }
  4185. + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
  4186. }
  4187. put_online_cpus();
  4188. diff -Nur linux-4.8.15.orig/drivers/md/raid5.h linux-4.8.15/drivers/md/raid5.h
  4189. --- linux-4.8.15.orig/drivers/md/raid5.h 2016-12-15 17:50:48.000000000 +0100
  4190. +++ linux-4.8.15/drivers/md/raid5.h 2017-01-01 17:07:14.571332922 +0100
  4191. @@ -504,6 +504,7 @@
  4192. int recovery_disabled;
  4193. /* per cpu variables */
  4194. struct raid5_percpu {
  4195. + spinlock_t lock; /* Protection for -RT */
  4196. struct page *spare_page; /* Used when checking P/Q in raid6 */
  4197. struct flex_array *scribble; /* space for constructing buffer
  4198. * lists and performing address
  4199. diff -Nur linux-4.8.15.orig/drivers/misc/hwlat_detector.c linux-4.8.15/drivers/misc/hwlat_detector.c
  4200. --- linux-4.8.15.orig/drivers/misc/hwlat_detector.c 1970-01-01 01:00:00.000000000 +0100
  4201. +++ linux-4.8.15/drivers/misc/hwlat_detector.c 2017-01-01 17:07:14.579333432 +0100
  4202. @@ -0,0 +1,1240 @@
  4203. +/*
  4204. + * hwlat_detector.c - A simple Hardware Latency detector.
  4205. + *
  4206. + * Use this module to detect large system latencies induced by the behavior of
  4207. + * certain underlying system hardware or firmware, independent of Linux itself.
  4208. + * The code was developed originally to detect the presence of SMIs on Intel
  4209. + * and AMD systems, although there is no dependency upon x86 herein.
  4210. + *
  4211. + * The classical example usage of this module is in detecting the presence of
  4212. + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
  4213. + * somewhat special form of hardware interrupt spawned from earlier CPU debug
  4214. + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
  4215. + * LPC (or other device) to generate a special interrupt under certain
  4216. + * circumstances, for example, upon expiration of a special SMI timer device,
  4217. + * due to certain external thermal readings, on certain I/O address accesses,
  4218. + * and other situations. An SMI hits a special CPU pin, triggers a special
  4219. + * SMI mode (complete with special memory map), and the OS is unaware.
  4220. + *
  4221. + * Although certain hardware-inducing latencies are necessary (for example,
  4222. + * a modern system often requires an SMI handler for correct thermal control
  4223. + * and remote management) they can wreak havoc upon any OS-level performance
  4224. + * guarantees toward low-latency, especially when the OS is not even made
  4225. + * aware of the presence of these interrupts. For this reason, we need a
  4226. + * somewhat brute force mechanism to detect these interrupts. In this case,
  4227. + * we do it by hogging all of the CPU(s) for configurable timer intervals,
  4228. + * sampling the built-in CPU timer, looking for discontiguous readings.
  4229. + *
  4230. + * WARNING: This implementation necessarily introduces latencies. Therefore,
  4231. + * you should NEVER use this module in a production environment
  4232. + * requiring any kind of low-latency performance guarantee(s).
  4233. + *
  4234. + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
  4235. + *
  4236. + * Includes useful feedback from Clark Williams <clark@redhat.com>
  4237. + *
  4238. + * This file is licensed under the terms of the GNU General Public
  4239. + * License version 2. This program is licensed "as is" without any
  4240. + * warranty of any kind, whether express or implied.
  4241. + */
  4242. +
  4243. +#include <linux/module.h>
  4244. +#include <linux/init.h>
  4245. +#include <linux/ring_buffer.h>
  4246. +#include <linux/time.h>
  4247. +#include <linux/hrtimer.h>
  4248. +#include <linux/kthread.h>
  4249. +#include <linux/debugfs.h>
  4250. +#include <linux/seq_file.h>
  4251. +#include <linux/uaccess.h>
  4252. +#include <linux/version.h>
  4253. +#include <linux/delay.h>
  4254. +#include <linux/slab.h>
  4255. +#include <linux/trace_clock.h>
  4256. +
  4257. +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */
  4258. +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */
  4259. +#define U64STR_SIZE 22 /* 20 digits max */
  4260. +
  4261. +#define VERSION "1.0.0"
  4262. +#define BANNER "hwlat_detector: "
  4263. +#define DRVNAME "hwlat_detector"
  4264. +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */
  4265. +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
  4266. +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */
  4267. +
  4268. +/* Module metadata */
  4269. +
  4270. +MODULE_LICENSE("GPL");
  4271. +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
  4272. +MODULE_DESCRIPTION("A simple hardware latency detector");
  4273. +MODULE_VERSION(VERSION);
  4274. +
  4275. +/* Module parameters */
  4276. +
  4277. +static int debug;
  4278. +static int enabled;
  4279. +static int threshold;
  4280. +
  4281. +module_param(debug, int, 0); /* enable debug */
  4282. +module_param(enabled, int, 0); /* enable detector */
  4283. +module_param(threshold, int, 0); /* latency threshold */
  4284. +
  4285. +/* Buffering and sampling */
  4286. +
  4287. +static struct ring_buffer *ring_buffer; /* sample buffer */
  4288. +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */
  4289. +static unsigned long buf_size = BUF_SIZE_DEFAULT;
  4290. +static struct task_struct *kthread; /* sampling thread */
  4291. +
  4292. +/* DebugFS filesystem entries */
  4293. +
  4294. +static struct dentry *debug_dir; /* debugfs directory */
  4295. +static struct dentry *debug_max; /* maximum TSC delta */
  4296. +static struct dentry *debug_count; /* total detect count */
  4297. +static struct dentry *debug_sample_width; /* sample width us */
  4298. +static struct dentry *debug_sample_window; /* sample window us */
  4299. +static struct dentry *debug_sample; /* raw samples us */
  4300. +static struct dentry *debug_threshold; /* threshold us */
  4301. +static struct dentry *debug_enable; /* enable/disable */
  4302. +
  4303. +/* Individual samples and global state */
  4304. +
  4305. +struct sample; /* latency sample */
  4306. +struct data; /* Global state */
  4307. +
  4308. +/* Sampling functions */
  4309. +static int __buffer_add_sample(struct sample *sample);
  4310. +static struct sample *buffer_get_sample(struct sample *sample);
  4311. +
  4312. +/* Threading and state */
  4313. +static int kthread_fn(void *unused);
  4314. +static int start_kthread(void);
  4315. +static int stop_kthread(void);
  4316. +static void __reset_stats(void);
  4317. +static int init_stats(void);
  4318. +
  4319. +/* Debugfs interface */
  4320. +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
  4321. + size_t cnt, loff_t *ppos, const u64 *entry);
  4322. +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
  4323. + size_t cnt, loff_t *ppos, u64 *entry);
  4324. +static int debug_sample_fopen(struct inode *inode, struct file *filp);
  4325. +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
  4326. + size_t cnt, loff_t *ppos);
  4327. +static int debug_sample_release(struct inode *inode, struct file *filp);
  4328. +static int debug_enable_fopen(struct inode *inode, struct file *filp);
  4329. +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
  4330. + size_t cnt, loff_t *ppos);
  4331. +static ssize_t debug_enable_fwrite(struct file *file,
  4332. + const char __user *user_buffer,
  4333. + size_t user_size, loff_t *offset);
  4334. +
  4335. +/* Initialization functions */
  4336. +static int init_debugfs(void);
  4337. +static void free_debugfs(void);
  4338. +static int detector_init(void);
  4339. +static void detector_exit(void);
  4340. +
  4341. +/* Individual latency samples are stored here when detected and packed into
  4342. + * the ring_buffer circular buffer, where they are overwritten when
  4343. + * more than buf_size/sizeof(sample) samples are received. */
  4344. +struct sample {
  4345. + u64 seqnum; /* unique sequence */
  4346. + u64 duration; /* ktime delta */
  4347. + u64 outer_duration; /* ktime delta (outer loop) */
  4348. + struct timespec timestamp; /* wall time */
  4349. + unsigned long lost;
  4350. +};
  4351. +
  4352. +/* keep the global state somewhere. */
  4353. +static struct data {
  4354. +
  4355. + struct mutex lock; /* protect changes */
  4356. +
  4357. + u64 count; /* total since reset */
  4358. + u64 max_sample; /* max hardware latency */
  4359. + u64 threshold; /* sample threshold level */
  4360. +
  4361. + u64 sample_window; /* total sampling window (on+off) */
  4362. + u64 sample_width; /* active sampling portion of window */
  4363. +
  4364. + atomic_t sample_open; /* whether the sample file is open */
  4365. +
  4366. + wait_queue_head_t wq; /* waitqeue for new sample values */
  4367. +
  4368. +} data;
  4369. +
  4370. +/**
  4371. + * __buffer_add_sample - add a new latency sample recording to the ring buffer
  4372. + * @sample: The new latency sample value
  4373. + *
  4374. + * This receives a new latency sample and records it in a global ring buffer.
  4375. + * No additional locking is used in this case.
  4376. + */
  4377. +static int __buffer_add_sample(struct sample *sample)
  4378. +{
  4379. + return ring_buffer_write(ring_buffer,
  4380. + sizeof(struct sample), sample);
  4381. +}
  4382. +
  4383. +/**
  4384. + * buffer_get_sample - remove a hardware latency sample from the ring buffer
  4385. + * @sample: Pre-allocated storage for the sample
  4386. + *
  4387. + * This retrieves a hardware latency sample from the global circular buffer
  4388. + */
  4389. +static struct sample *buffer_get_sample(struct sample *sample)
  4390. +{
  4391. + struct ring_buffer_event *e = NULL;
  4392. + struct sample *s = NULL;
  4393. + unsigned int cpu = 0;
  4394. +
  4395. + if (!sample)
  4396. + return NULL;
  4397. +
  4398. + mutex_lock(&ring_buffer_mutex);
  4399. + for_each_online_cpu(cpu) {
  4400. + e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost);
  4401. + if (e)
  4402. + break;
  4403. + }
  4404. +
  4405. + if (e) {
  4406. + s = ring_buffer_event_data(e);
  4407. + memcpy(sample, s, sizeof(struct sample));
  4408. + } else
  4409. + sample = NULL;
  4410. + mutex_unlock(&ring_buffer_mutex);
  4411. +
  4412. + return sample;
  4413. +}
  4414. +
  4415. +#ifndef CONFIG_TRACING
  4416. +#define time_type ktime_t
  4417. +#define time_get() ktime_get()
  4418. +#define time_to_us(x) ktime_to_us(x)
  4419. +#define time_sub(a, b) ktime_sub(a, b)
  4420. +#define init_time(a, b) (a).tv64 = b
  4421. +#define time_u64(a) ((a).tv64)
  4422. +#else
  4423. +#define time_type u64
  4424. +#define time_get() trace_clock_local()
  4425. +#define time_to_us(x) div_u64(x, 1000)
  4426. +#define time_sub(a, b) ((a) - (b))
  4427. +#define init_time(a, b) (a = b)
  4428. +#define time_u64(a) a
  4429. +#endif
  4430. +/**
  4431. + * get_sample - sample the CPU TSC and look for likely hardware latencies
  4432. + *
  4433. + * Used to repeatedly capture the CPU TSC (or similar), looking for potential
  4434. + * hardware-induced latency. Called with interrupts disabled and with
  4435. + * data.lock held.
  4436. + */
  4437. +static int get_sample(void)
  4438. +{
  4439. + time_type start, t1, t2, last_t2;
  4440. + s64 diff, total = 0;
  4441. + u64 sample = 0;
  4442. + u64 outer_sample = 0;
  4443. + int ret = -1;
  4444. +
  4445. + init_time(last_t2, 0);
  4446. + start = time_get(); /* start timestamp */
  4447. +
  4448. + do {
  4449. +
  4450. + t1 = time_get(); /* we'll look for a discontinuity */
  4451. + t2 = time_get();
  4452. +
  4453. + if (time_u64(last_t2)) {
  4454. + /* Check the delta from outer loop (t2 to next t1) */
  4455. + diff = time_to_us(time_sub(t1, last_t2));
  4456. + /* This shouldn't happen */
  4457. + if (diff < 0) {
  4458. + pr_err(BANNER "time running backwards\n");
  4459. + goto out;
  4460. + }
  4461. + if (diff > outer_sample)
  4462. + outer_sample = diff;
  4463. + }
  4464. + last_t2 = t2;
  4465. +
  4466. + total = time_to_us(time_sub(t2, start)); /* sample width */
  4467. +
  4468. + /* This checks the inner loop (t1 to t2) */
  4469. + diff = time_to_us(time_sub(t2, t1)); /* current diff */
  4470. +
  4471. + /* This shouldn't happen */
  4472. + if (diff < 0) {
  4473. + pr_err(BANNER "time running backwards\n");
  4474. + goto out;
  4475. + }
  4476. +
  4477. + if (diff > sample)
  4478. + sample = diff; /* only want highest value */
  4479. +
  4480. + } while (total <= data.sample_width);
  4481. +
  4482. + ret = 0;
  4483. +
  4484. + /* If we exceed the threshold value, we have found a hardware latency */
  4485. + if (sample > data.threshold || outer_sample > data.threshold) {
  4486. + struct sample s;
  4487. +
  4488. + ret = 1;
  4489. +
  4490. + data.count++;
  4491. + s.seqnum = data.count;
  4492. + s.duration = sample;
  4493. + s.outer_duration = outer_sample;
  4494. + s.timestamp = CURRENT_TIME;
  4495. + __buffer_add_sample(&s);
  4496. +
  4497. + /* Keep a running maximum ever recorded hardware latency */
  4498. + if (sample > data.max_sample)
  4499. + data.max_sample = sample;
  4500. + }
  4501. +
  4502. +out:
  4503. + return ret;
  4504. +}
  4505. +
  4506. +/*
  4507. + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
  4508. + * @unused: A required part of the kthread API.
  4509. + *
  4510. + * Used to periodically sample the CPU TSC via a call to get_sample. We
  4511. + * disable interrupts, which does (intentionally) introduce latency since we
  4512. + * need to ensure nothing else might be running (and thus pre-empting).
  4513. + * Obviously this should never be used in production environments.
  4514. + *
  4515. + * Currently this runs on which ever CPU it was scheduled on, but most
  4516. + * real-worald hardware latency situations occur across several CPUs,
  4517. + * but we might later generalize this if we find there are any actualy
  4518. + * systems with alternate SMI delivery or other hardware latencies.
  4519. + */
  4520. +static int kthread_fn(void *unused)
  4521. +{
  4522. + int ret;
  4523. + u64 interval;
  4524. +
  4525. + while (!kthread_should_stop()) {
  4526. +
  4527. + mutex_lock(&data.lock);
  4528. +
  4529. + local_irq_disable();
  4530. + ret = get_sample();
  4531. + local_irq_enable();
  4532. +
  4533. + if (ret > 0)
  4534. + wake_up(&data.wq); /* wake up reader(s) */
  4535. +
  4536. + interval = data.sample_window - data.sample_width;
  4537. + do_div(interval, USEC_PER_MSEC); /* modifies interval value */
  4538. +
  4539. + mutex_unlock(&data.lock);
  4540. +
  4541. + if (msleep_interruptible(interval))
  4542. + break;
  4543. + }
  4544. +
  4545. + return 0;
  4546. +}
  4547. +
  4548. +/**
  4549. + * start_kthread - Kick off the hardware latency sampling/detector kthread
  4550. + *
  4551. + * This starts a kernel thread that will sit and sample the CPU timestamp
  4552. + * counter (TSC or similar) and look for potential hardware latencies.
  4553. + */
  4554. +static int start_kthread(void)
  4555. +{
  4556. + kthread = kthread_run(kthread_fn, NULL,
  4557. + DRVNAME);
  4558. + if (IS_ERR(kthread)) {
  4559. + pr_err(BANNER "could not start sampling thread\n");
  4560. + enabled = 0;
  4561. + return -ENOMEM;
  4562. + }
  4563. +
  4564. + return 0;
  4565. +}
  4566. +
  4567. +/**
  4568. + * stop_kthread - Inform the hardware latency samping/detector kthread to stop
  4569. + *
  4570. + * This kicks the running hardware latency sampling/detector kernel thread and
  4571. + * tells it to stop sampling now. Use this on unload and at system shutdown.
  4572. + */
  4573. +static int stop_kthread(void)
  4574. +{
  4575. + int ret;
  4576. +
  4577. + ret = kthread_stop(kthread);
  4578. +
  4579. + return ret;
  4580. +}
  4581. +
  4582. +/**
  4583. + * __reset_stats - Reset statistics for the hardware latency detector
  4584. + *
  4585. + * We use data to store various statistics and global state. We call this
  4586. + * function in order to reset those when "enable" is toggled on or off, and
  4587. + * also at initialization. Should be called with data.lock held.
  4588. + */
  4589. +static void __reset_stats(void)
  4590. +{
  4591. + data.count = 0;
  4592. + data.max_sample = 0;
  4593. + ring_buffer_reset(ring_buffer); /* flush out old sample entries */
  4594. +}
  4595. +
  4596. +/**
  4597. + * init_stats - Setup global state statistics for the hardware latency detector
  4598. + *
  4599. + * We use data to store various statistics and global state. We also use
  4600. + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
  4601. + * induced system latencies. This function initializes these structures and
  4602. + * allocates the global ring buffer also.
  4603. + */
  4604. +static int init_stats(void)
  4605. +{
  4606. + int ret = -ENOMEM;
  4607. +
  4608. + mutex_init(&data.lock);
  4609. + init_waitqueue_head(&data.wq);
  4610. + atomic_set(&data.sample_open, 0);
  4611. +
  4612. + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
  4613. +
  4614. + if (WARN(!ring_buffer, KERN_ERR BANNER
  4615. + "failed to allocate ring buffer!\n"))
  4616. + goto out;
  4617. +
  4618. + __reset_stats();
  4619. + data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */
  4620. + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
  4621. + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */
  4622. +
  4623. + ret = 0;
  4624. +
  4625. +out:
  4626. + return ret;
  4627. +
  4628. +}
  4629. +
  4630. +/*
  4631. + * simple_data_read - Wrapper read function for global state debugfs entries
  4632. + * @filp: The active open file structure for the debugfs "file"
  4633. + * @ubuf: The userspace provided buffer to read value into
  4634. + * @cnt: The maximum number of bytes to read
  4635. + * @ppos: The current "file" position
  4636. + * @entry: The entry to read from
  4637. + *
  4638. + * This function provides a generic read implementation for the global state
  4639. + * "data" structure debugfs filesystem entries. It would be nice to use
  4640. + * simple_attr_read directly, but we need to make sure that the data.lock
  4641. + * is held during the actual read.
  4642. + */
  4643. +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
  4644. + size_t cnt, loff_t *ppos, const u64 *entry)
  4645. +{
  4646. + char buf[U64STR_SIZE];
  4647. + u64 val = 0;
  4648. + int len = 0;
  4649. +
  4650. + memset(buf, 0, sizeof(buf));
  4651. +
  4652. + if (!entry)
  4653. + return -EFAULT;
  4654. +
  4655. + mutex_lock(&data.lock);
  4656. + val = *entry;
  4657. + mutex_unlock(&data.lock);
  4658. +
  4659. + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
  4660. +
  4661. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
  4662. +
  4663. +}
  4664. +
  4665. +/*
  4666. + * simple_data_write - Wrapper write function for global state debugfs entries
  4667. + * @filp: The active open file structure for the debugfs "file"
  4668. + * @ubuf: The userspace provided buffer to write value from
  4669. + * @cnt: The maximum number of bytes to write
  4670. + * @ppos: The current "file" position
  4671. + * @entry: The entry to write to
  4672. + *
  4673. + * This function provides a generic write implementation for the global state
  4674. + * "data" structure debugfs filesystem entries. It would be nice to use
  4675. + * simple_attr_write directly, but we need to make sure that the data.lock
  4676. + * is held during the actual write.
  4677. + */
  4678. +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
  4679. + size_t cnt, loff_t *ppos, u64 *entry)
  4680. +{
  4681. + char buf[U64STR_SIZE];
  4682. + int csize = min(cnt, sizeof(buf));
  4683. + u64 val = 0;
  4684. + int err = 0;
  4685. +
  4686. + memset(buf, '\0', sizeof(buf));
  4687. + if (copy_from_user(buf, ubuf, csize))
  4688. + return -EFAULT;
  4689. +
  4690. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  4691. + err = kstrtoull(buf, 10, &val);
  4692. + if (err)
  4693. + return -EINVAL;
  4694. +
  4695. + mutex_lock(&data.lock);
  4696. + *entry = val;
  4697. + mutex_unlock(&data.lock);
  4698. +
  4699. + return csize;
  4700. +}
  4701. +
  4702. +/**
  4703. + * debug_count_fopen - Open function for "count" debugfs entry
  4704. + * @inode: The in-kernel inode representation of the debugfs "file"
  4705. + * @filp: The active open file structure for the debugfs "file"
  4706. + *
  4707. + * This function provides an open implementation for the "count" debugfs
  4708. + * interface to the hardware latency detector.
  4709. + */
  4710. +static int debug_count_fopen(struct inode *inode, struct file *filp)
  4711. +{
  4712. + return 0;
  4713. +}
  4714. +
  4715. +/**
  4716. + * debug_count_fread - Read function for "count" debugfs entry
  4717. + * @filp: The active open file structure for the debugfs "file"
  4718. + * @ubuf: The userspace provided buffer to read value into
  4719. + * @cnt: The maximum number of bytes to read
  4720. + * @ppos: The current "file" position
  4721. + *
  4722. + * This function provides a read implementation for the "count" debugfs
  4723. + * interface to the hardware latency detector. Can be used to read the
  4724. + * number of latency readings exceeding the configured threshold since
  4725. + * the detector was last reset (e.g. by writing a zero into "count").
  4726. + */
  4727. +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
  4728. + size_t cnt, loff_t *ppos)
  4729. +{
  4730. + return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
  4731. +}
  4732. +
  4733. +/**
  4734. + * debug_count_fwrite - Write function for "count" debugfs entry
  4735. + * @filp: The active open file structure for the debugfs "file"
  4736. + * @ubuf: The user buffer that contains the value to write
  4737. + * @cnt: The maximum number of bytes to write to "file"
  4738. + * @ppos: The current position in the debugfs "file"
  4739. + *
  4740. + * This function provides a write implementation for the "count" debugfs
  4741. + * interface to the hardware latency detector. Can be used to write a
  4742. + * desired value, especially to zero the total count.
  4743. + */
  4744. +static ssize_t debug_count_fwrite(struct file *filp,
  4745. + const char __user *ubuf,
  4746. + size_t cnt,
  4747. + loff_t *ppos)
  4748. +{
  4749. + return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
  4750. +}
  4751. +
  4752. +/**
  4753. + * debug_enable_fopen - Dummy open function for "enable" debugfs interface
  4754. + * @inode: The in-kernel inode representation of the debugfs "file"
  4755. + * @filp: The active open file structure for the debugfs "file"
  4756. + *
  4757. + * This function provides an open implementation for the "enable" debugfs
  4758. + * interface to the hardware latency detector.
  4759. + */
  4760. +static int debug_enable_fopen(struct inode *inode, struct file *filp)
  4761. +{
  4762. + return 0;
  4763. +}
  4764. +
  4765. +/**
  4766. + * debug_enable_fread - Read function for "enable" debugfs interface
  4767. + * @filp: The active open file structure for the debugfs "file"
  4768. + * @ubuf: The userspace provided buffer to read value into
  4769. + * @cnt: The maximum number of bytes to read
  4770. + * @ppos: The current "file" position
  4771. + *
  4772. + * This function provides a read implementation for the "enable" debugfs
  4773. + * interface to the hardware latency detector. Can be used to determine
  4774. + * whether the detector is currently enabled ("0\n" or "1\n" returned).
  4775. + */
  4776. +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
  4777. + size_t cnt, loff_t *ppos)
  4778. +{
  4779. + char buf[4];
  4780. +
  4781. + if ((cnt < sizeof(buf)) || (*ppos))
  4782. + return 0;
  4783. +
  4784. + buf[0] = enabled ? '1' : '0';
  4785. + buf[1] = '\n';
  4786. + buf[2] = '\0';
  4787. + if (copy_to_user(ubuf, buf, strlen(buf)))
  4788. + return -EFAULT;
  4789. + return *ppos = strlen(buf);
  4790. +}
  4791. +
  4792. +/**
  4793. + * debug_enable_fwrite - Write function for "enable" debugfs interface
  4794. + * @filp: The active open file structure for the debugfs "file"
  4795. + * @ubuf: The user buffer that contains the value to write
  4796. + * @cnt: The maximum number of bytes to write to "file"
  4797. + * @ppos: The current position in the debugfs "file"
  4798. + *
  4799. + * This function provides a write implementation for the "enable" debugfs
  4800. + * interface to the hardware latency detector. Can be used to enable or
  4801. + * disable the detector, which will have the side-effect of possibly
  4802. + * also resetting the global stats and kicking off the measuring
  4803. + * kthread (on an enable) or the converse (upon a disable).
  4804. + */
  4805. +static ssize_t debug_enable_fwrite(struct file *filp,
  4806. + const char __user *ubuf,
  4807. + size_t cnt,
  4808. + loff_t *ppos)
  4809. +{
  4810. + char buf[4];
  4811. + int csize = min(cnt, sizeof(buf));
  4812. + long val = 0;
  4813. + int err = 0;
  4814. +
  4815. + memset(buf, '\0', sizeof(buf));
  4816. + if (copy_from_user(buf, ubuf, csize))
  4817. + return -EFAULT;
  4818. +
  4819. + buf[sizeof(buf)-1] = '\0'; /* just in case */
  4820. + err = kstrtoul(buf, 10, &val);
  4821. + if (err)
  4822. + return -EINVAL;
  4823. +
  4824. + if (val) {
  4825. + if (enabled)
  4826. + goto unlock;
  4827. + enabled = 1;
  4828. + __reset_stats();
  4829. + if (start_kthread())
  4830. + return -EFAULT;
  4831. + } else {
  4832. + if (!enabled)
  4833. + goto unlock;
  4834. + enabled = 0;
  4835. + err = stop_kthread();
  4836. + if (err) {
  4837. + pr_err(BANNER "cannot stop kthread\n");
  4838. + return -EFAULT;
  4839. + }
  4840. + wake_up(&data.wq); /* reader(s) should return */
  4841. + }
  4842. +unlock:
  4843. + return csize;
  4844. +}
  4845. +
  4846. +/**
  4847. + * debug_max_fopen - Open function for "max" debugfs entry
  4848. + * @inode: The in-kernel inode representation of the debugfs "file"
  4849. + * @filp: The active open file structure for the debugfs "file"
  4850. + *
  4851. + * This function provides an open implementation for the "max" debugfs
  4852. + * interface to the hardware latency detector.
  4853. + */
  4854. +static int debug_max_fopen(struct inode *inode, struct file *filp)
  4855. +{
  4856. + return 0;
  4857. +}
  4858. +
  4859. +/**
  4860. + * debug_max_fread - Read function for "max" debugfs entry
  4861. + * @filp: The active open file structure for the debugfs "file"
  4862. + * @ubuf: The userspace provided buffer to read value into
  4863. + * @cnt: The maximum number of bytes to read
  4864. + * @ppos: The current "file" position
  4865. + *
  4866. + * This function provides a read implementation for the "max" debugfs
  4867. + * interface to the hardware latency detector. Can be used to determine
  4868. + * the maximum latency value observed since it was last reset.
  4869. + */
  4870. +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
  4871. + size_t cnt, loff_t *ppos)
  4872. +{
  4873. + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
  4874. +}
  4875. +
  4876. +/**
  4877. + * debug_max_fwrite - Write function for "max" debugfs entry
  4878. + * @filp: The active open file structure for the debugfs "file"
  4879. + * @ubuf: The user buffer that contains the value to write
  4880. + * @cnt: The maximum number of bytes to write to "file"
  4881. + * @ppos: The current position in the debugfs "file"
  4882. + *
  4883. + * This function provides a write implementation for the "max" debugfs
  4884. + * interface to the hardware latency detector. Can be used to reset the
  4885. + * maximum or set it to some other desired value - if, then, subsequent
  4886. + * measurements exceed this value, the maximum will be updated.
  4887. + */
  4888. +static ssize_t debug_max_fwrite(struct file *filp,
  4889. + const char __user *ubuf,
  4890. + size_t cnt,
  4891. + loff_t *ppos)
  4892. +{
  4893. + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
  4894. +}
  4895. +
  4896. +
  4897. +/**
  4898. + * debug_sample_fopen - An open function for "sample" debugfs interface
  4899. + * @inode: The in-kernel inode representation of this debugfs "file"
  4900. + * @filp: The active open file structure for the debugfs "file"
  4901. + *
  4902. + * This function handles opening the "sample" file within the hardware
  4903. + * latency detector debugfs directory interface. This file is used to read
  4904. + * raw samples from the global ring_buffer and allows the user to see a
  4905. + * running latency history. Can be opened blocking or non-blocking,
  4906. + * affecting whether it behaves as a buffer read pipe, or does not.
  4907. + * Implements simple locking to prevent multiple simultaneous use.
  4908. + */
  4909. +static int debug_sample_fopen(struct inode *inode, struct file *filp)
  4910. +{
  4911. + if (!atomic_add_unless(&data.sample_open, 1, 1))
  4912. + return -EBUSY;
  4913. + else
  4914. + return 0;
  4915. +}
  4916. +
  4917. +/**
  4918. + * debug_sample_fread - A read function for "sample" debugfs interface
  4919. + * @filp: The active open file structure for the debugfs "file"
  4920. + * @ubuf: The user buffer that will contain the samples read
  4921. + * @cnt: The maximum bytes to read from the debugfs "file"
  4922. + * @ppos: The current position in the debugfs "file"
  4923. + *
  4924. + * This function handles reading from the "sample" file within the hardware
  4925. + * latency detector debugfs directory interface. This file is used to read
  4926. + * raw samples from the global ring_buffer and allows the user to see a
  4927. + * running latency history. By default this will block pending a new
  4928. + * value written into the sample buffer, unless there are already a
  4929. + * number of value(s) waiting in the buffer, or the sample file was
  4930. + * previously opened in a non-blocking mode of operation.
  4931. + */
  4932. +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
  4933. + size_t cnt, loff_t *ppos)
  4934. +{
  4935. + int len = 0;
  4936. + char buf[64];
  4937. + struct sample *sample = NULL;
  4938. +
  4939. + if (!enabled)
  4940. + return 0;
  4941. +
  4942. + sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
  4943. + if (!sample)
  4944. + return -ENOMEM;
  4945. +
  4946. + while (!buffer_get_sample(sample)) {
  4947. +
  4948. + DEFINE_WAIT(wait);
  4949. +
  4950. + if (filp->f_flags & O_NONBLOCK) {
  4951. + len = -EAGAIN;
  4952. + goto out;
  4953. + }
  4954. +
  4955. + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
  4956. + schedule();
  4957. + finish_wait(&data.wq, &wait);
  4958. +
  4959. + if (signal_pending(current)) {
  4960. + len = -EINTR;
  4961. + goto out;
  4962. + }
  4963. +
  4964. + if (!enabled) { /* enable was toggled */
  4965. + len = 0;
  4966. + goto out;
  4967. + }
  4968. + }
  4969. +
  4970. + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n",
  4971. + sample->timestamp.tv_sec,
  4972. + sample->timestamp.tv_nsec,
  4973. + sample->duration,
  4974. + sample->outer_duration);
  4975. +
  4976. +
  4977. + /* handling partial reads is more trouble than it's worth */
  4978. + if (len > cnt)
  4979. + goto out;
  4980. +
  4981. + if (copy_to_user(ubuf, buf, len))
  4982. + len = -EFAULT;
  4983. +
  4984. +out:
  4985. + kfree(sample);
  4986. + return len;
  4987. +}
  4988. +
  4989. +/**
  4990. + * debug_sample_release - Release function for "sample" debugfs interface
  4991. + * @inode: The in-kernel inode represenation of the debugfs "file"
  4992. + * @filp: The active open file structure for the debugfs "file"
  4993. + *
  4994. + * This function completes the close of the debugfs interface "sample" file.
  4995. + * Frees the sample_open "lock" so that other users may open the interface.
  4996. + */
  4997. +static int debug_sample_release(struct inode *inode, struct file *filp)
  4998. +{
  4999. + atomic_dec(&data.sample_open);
  5000. +
  5001. + return 0;
  5002. +}
  5003. +
  5004. +/**
  5005. + * debug_threshold_fopen - Open function for "threshold" debugfs entry
  5006. + * @inode: The in-kernel inode representation of the debugfs "file"
  5007. + * @filp: The active open file structure for the debugfs "file"
  5008. + *
  5009. + * This function provides an open implementation for the "threshold" debugfs
  5010. + * interface to the hardware latency detector.
  5011. + */
  5012. +static int debug_threshold_fopen(struct inode *inode, struct file *filp)
  5013. +{
  5014. + return 0;
  5015. +}
  5016. +
  5017. +/**
  5018. + * debug_threshold_fread - Read function for "threshold" debugfs entry
  5019. + * @filp: The active open file structure for the debugfs "file"
  5020. + * @ubuf: The userspace provided buffer to read value into
  5021. + * @cnt: The maximum number of bytes to read
  5022. + * @ppos: The current "file" position
  5023. + *
  5024. + * This function provides a read implementation for the "threshold" debugfs
  5025. + * interface to the hardware latency detector. It can be used to determine
  5026. + * the current threshold level at which a latency will be recorded in the
  5027. + * global ring buffer, typically on the order of 10us.
  5028. + */
  5029. +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
  5030. + size_t cnt, loff_t *ppos)
  5031. +{
  5032. + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
  5033. +}
  5034. +
  5035. +/**
  5036. + * debug_threshold_fwrite - Write function for "threshold" debugfs entry
  5037. + * @filp: The active open file structure for the debugfs "file"
  5038. + * @ubuf: The user buffer that contains the value to write
  5039. + * @cnt: The maximum number of bytes to write to "file"
  5040. + * @ppos: The current position in the debugfs "file"
  5041. + *
  5042. + * This function provides a write implementation for the "threshold" debugfs
  5043. + * interface to the hardware latency detector. It can be used to configure
  5044. + * the threshold level at which any subsequently detected latencies will
  5045. + * be recorded into the global ring buffer.
  5046. + */
  5047. +static ssize_t debug_threshold_fwrite(struct file *filp,
  5048. + const char __user *ubuf,
  5049. + size_t cnt,
  5050. + loff_t *ppos)
  5051. +{
  5052. + int ret;
  5053. +
  5054. + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
  5055. +
  5056. + if (enabled)
  5057. + wake_up_process(kthread);
  5058. +
  5059. + return ret;
  5060. +}
  5061. +
  5062. +/**
  5063. + * debug_width_fopen - Open function for "width" debugfs entry
  5064. + * @inode: The in-kernel inode representation of the debugfs "file"
  5065. + * @filp: The active open file structure for the debugfs "file"
  5066. + *
  5067. + * This function provides an open implementation for the "width" debugfs
  5068. + * interface to the hardware latency detector.
  5069. + */
  5070. +static int debug_width_fopen(struct inode *inode, struct file *filp)
  5071. +{
  5072. + return 0;
  5073. +}
  5074. +
  5075. +/**
  5076. + * debug_width_fread - Read function for "width" debugfs entry
  5077. + * @filp: The active open file structure for the debugfs "file"
  5078. + * @ubuf: The userspace provided buffer to read value into
  5079. + * @cnt: The maximum number of bytes to read
  5080. + * @ppos: The current "file" position
  5081. + *
  5082. + * This function provides a read implementation for the "width" debugfs
  5083. + * interface to the hardware latency detector. It can be used to determine
  5084. + * for how many us of the total window us we will actively sample for any
  5085. + * hardware-induced latecy periods. Obviously, it is not possible to
  5086. + * sample constantly and have the system respond to a sample reader, or,
  5087. + * worse, without having the system appear to have gone out to lunch.
  5088. + */
  5089. +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
  5090. + size_t cnt, loff_t *ppos)
  5091. +{
  5092. + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
  5093. +}
  5094. +
  5095. +/**
  5096. + * debug_width_fwrite - Write function for "width" debugfs entry
  5097. + * @filp: The active open file structure for the debugfs "file"
  5098. + * @ubuf: The user buffer that contains the value to write
  5099. + * @cnt: The maximum number of bytes to write to "file"
  5100. + * @ppos: The current position in the debugfs "file"
  5101. + *
  5102. + * This function provides a write implementation for the "width" debugfs
  5103. + * interface to the hardware latency detector. It can be used to configure
  5104. + * for how many us of the total window us we will actively sample for any
  5105. + * hardware-induced latency periods. Obviously, it is not possible to
  5106. + * sample constantly and have the system respond to a sample reader, or,
  5107. + * worse, without having the system appear to have gone out to lunch. It
  5108. + * is enforced that width is less that the total window size.
  5109. + */
  5110. +static ssize_t debug_width_fwrite(struct file *filp,
  5111. + const char __user *ubuf,
  5112. + size_t cnt,
  5113. + loff_t *ppos)
  5114. +{
  5115. + char buf[U64STR_SIZE];
  5116. + int csize = min(cnt, sizeof(buf));
  5117. + u64 val = 0;
  5118. + int err = 0;
  5119. +
  5120. + memset(buf, '\0', sizeof(buf));
  5121. + if (copy_from_user(buf, ubuf, csize))
  5122. + return -EFAULT;
  5123. +
  5124. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  5125. + err = kstrtoull(buf, 10, &val);
  5126. + if (err)
  5127. + return -EINVAL;
  5128. +
  5129. + mutex_lock(&data.lock);
  5130. + if (val < data.sample_window)
  5131. + data.sample_width = val;
  5132. + else {
  5133. + mutex_unlock(&data.lock);
  5134. + return -EINVAL;
  5135. + }
  5136. + mutex_unlock(&data.lock);
  5137. +
  5138. + if (enabled)
  5139. + wake_up_process(kthread);
  5140. +
  5141. + return csize;
  5142. +}
  5143. +
  5144. +/**
  5145. + * debug_window_fopen - Open function for "window" debugfs entry
  5146. + * @inode: The in-kernel inode representation of the debugfs "file"
  5147. + * @filp: The active open file structure for the debugfs "file"
  5148. + *
  5149. + * This function provides an open implementation for the "window" debugfs
  5150. + * interface to the hardware latency detector. The window is the total time
  5151. + * in us that will be considered one sample period. Conceptually, windows
  5152. + * occur back-to-back and contain a sample width period during which
  5153. + * actual sampling occurs.
  5154. + */
  5155. +static int debug_window_fopen(struct inode *inode, struct file *filp)
  5156. +{
  5157. + return 0;
  5158. +}
  5159. +
  5160. +/**
  5161. + * debug_window_fread - Read function for "window" debugfs entry
  5162. + * @filp: The active open file structure for the debugfs "file"
  5163. + * @ubuf: The userspace provided buffer to read value into
  5164. + * @cnt: The maximum number of bytes to read
  5165. + * @ppos: The current "file" position
  5166. + *
  5167. + * This function provides a read implementation for the "window" debugfs
  5168. + * interface to the hardware latency detector. The window is the total time
  5169. + * in us that will be considered one sample period. Conceptually, windows
  5170. + * occur back-to-back and contain a sample width period during which
  5171. + * actual sampling occurs. Can be used to read the total window size.
  5172. + */
  5173. +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
  5174. + size_t cnt, loff_t *ppos)
  5175. +{
  5176. + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
  5177. +}
  5178. +
  5179. +/**
  5180. + * debug_window_fwrite - Write function for "window" debugfs entry
  5181. + * @filp: The active open file structure for the debugfs "file"
  5182. + * @ubuf: The user buffer that contains the value to write
  5183. + * @cnt: The maximum number of bytes to write to "file"
  5184. + * @ppos: The current position in the debugfs "file"
  5185. + *
  5186. + * This function provides a write implementation for the "window" debufds
  5187. + * interface to the hardware latency detetector. The window is the total time
  5188. + * in us that will be considered one sample period. Conceptually, windows
  5189. + * occur back-to-back and contain a sample width period during which
  5190. + * actual sampling occurs. Can be used to write a new total window size. It
  5191. + * is enfoced that any value written must be greater than the sample width
  5192. + * size, or an error results.
  5193. + */
  5194. +static ssize_t debug_window_fwrite(struct file *filp,
  5195. + const char __user *ubuf,
  5196. + size_t cnt,
  5197. + loff_t *ppos)
  5198. +{
  5199. + char buf[U64STR_SIZE];
  5200. + int csize = min(cnt, sizeof(buf));
  5201. + u64 val = 0;
  5202. + int err = 0;
  5203. +
  5204. + memset(buf, '\0', sizeof(buf));
  5205. + if (copy_from_user(buf, ubuf, csize))
  5206. + return -EFAULT;
  5207. +
  5208. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  5209. + err = kstrtoull(buf, 10, &val);
  5210. + if (err)
  5211. + return -EINVAL;
  5212. +
  5213. + mutex_lock(&data.lock);
  5214. + if (data.sample_width < val)
  5215. + data.sample_window = val;
  5216. + else {
  5217. + mutex_unlock(&data.lock);
  5218. + return -EINVAL;
  5219. + }
  5220. + mutex_unlock(&data.lock);
  5221. +
  5222. + return csize;
  5223. +}
  5224. +
  5225. +/*
  5226. + * Function pointers for the "count" debugfs file operations
  5227. + */
  5228. +static const struct file_operations count_fops = {
  5229. + .open = debug_count_fopen,
  5230. + .read = debug_count_fread,
  5231. + .write = debug_count_fwrite,
  5232. + .owner = THIS_MODULE,
  5233. +};
  5234. +
  5235. +/*
  5236. + * Function pointers for the "enable" debugfs file operations
  5237. + */
  5238. +static const struct file_operations enable_fops = {
  5239. + .open = debug_enable_fopen,
  5240. + .read = debug_enable_fread,
  5241. + .write = debug_enable_fwrite,
  5242. + .owner = THIS_MODULE,
  5243. +};
  5244. +
  5245. +/*
  5246. + * Function pointers for the "max" debugfs file operations
  5247. + */
  5248. +static const struct file_operations max_fops = {
  5249. + .open = debug_max_fopen,
  5250. + .read = debug_max_fread,
  5251. + .write = debug_max_fwrite,
  5252. + .owner = THIS_MODULE,
  5253. +};
  5254. +
  5255. +/*
  5256. + * Function pointers for the "sample" debugfs file operations
  5257. + */
  5258. +static const struct file_operations sample_fops = {
  5259. + .open = debug_sample_fopen,
  5260. + .read = debug_sample_fread,
  5261. + .release = debug_sample_release,
  5262. + .owner = THIS_MODULE,
  5263. +};
  5264. +
  5265. +/*
  5266. + * Function pointers for the "threshold" debugfs file operations
  5267. + */
  5268. +static const struct file_operations threshold_fops = {
  5269. + .open = debug_threshold_fopen,
  5270. + .read = debug_threshold_fread,
  5271. + .write = debug_threshold_fwrite,
  5272. + .owner = THIS_MODULE,
  5273. +};
  5274. +
  5275. +/*
  5276. + * Function pointers for the "width" debugfs file operations
  5277. + */
  5278. +static const struct file_operations width_fops = {
  5279. + .open = debug_width_fopen,
  5280. + .read = debug_width_fread,
  5281. + .write = debug_width_fwrite,
  5282. + .owner = THIS_MODULE,
  5283. +};
  5284. +
  5285. +/*
  5286. + * Function pointers for the "window" debugfs file operations
  5287. + */
  5288. +static const struct file_operations window_fops = {
  5289. + .open = debug_window_fopen,
  5290. + .read = debug_window_fread,
  5291. + .write = debug_window_fwrite,
  5292. + .owner = THIS_MODULE,
  5293. +};
  5294. +
  5295. +/**
  5296. + * init_debugfs - A function to initialize the debugfs interface files
  5297. + *
  5298. + * This function creates entries in debugfs for "hwlat_detector", including
  5299. + * files to read values from the detector, current samples, and the
  5300. + * maximum sample that has been captured since the hardware latency
  5301. + * dectector was started.
  5302. + */
  5303. +static int init_debugfs(void)
  5304. +{
  5305. + int ret = -ENOMEM;
  5306. +
  5307. + debug_dir = debugfs_create_dir(DRVNAME, NULL);
  5308. + if (!debug_dir)
  5309. + goto err_debug_dir;
  5310. +
  5311. + debug_sample = debugfs_create_file("sample", 0444,
  5312. + debug_dir, NULL,
  5313. + &sample_fops);
  5314. + if (!debug_sample)
  5315. + goto err_sample;
  5316. +
  5317. + debug_count = debugfs_create_file("count", 0444,
  5318. + debug_dir, NULL,
  5319. + &count_fops);
  5320. + if (!debug_count)
  5321. + goto err_count;
  5322. +
  5323. + debug_max = debugfs_create_file("max", 0444,
  5324. + debug_dir, NULL,
  5325. + &max_fops);
  5326. + if (!debug_max)
  5327. + goto err_max;
  5328. +
  5329. + debug_sample_window = debugfs_create_file("window", 0644,
  5330. + debug_dir, NULL,
  5331. + &window_fops);
  5332. + if (!debug_sample_window)
  5333. + goto err_window;
  5334. +
  5335. + debug_sample_width = debugfs_create_file("width", 0644,
  5336. + debug_dir, NULL,
  5337. + &width_fops);
  5338. + if (!debug_sample_width)
  5339. + goto err_width;
  5340. +
  5341. + debug_threshold = debugfs_create_file("threshold", 0644,
  5342. + debug_dir, NULL,
  5343. + &threshold_fops);
  5344. + if (!debug_threshold)
  5345. + goto err_threshold;
  5346. +
  5347. + debug_enable = debugfs_create_file("enable", 0644,
  5348. + debug_dir, &enabled,
  5349. + &enable_fops);
  5350. + if (!debug_enable)
  5351. + goto err_enable;
  5352. +
  5353. + else {
  5354. + ret = 0;
  5355. + goto out;
  5356. + }
  5357. +
  5358. +err_enable:
  5359. + debugfs_remove(debug_threshold);
  5360. +err_threshold:
  5361. + debugfs_remove(debug_sample_width);
  5362. +err_width:
  5363. + debugfs_remove(debug_sample_window);
  5364. +err_window:
  5365. + debugfs_remove(debug_max);
  5366. +err_max:
  5367. + debugfs_remove(debug_count);
  5368. +err_count:
  5369. + debugfs_remove(debug_sample);
  5370. +err_sample:
  5371. + debugfs_remove(debug_dir);
  5372. +err_debug_dir:
  5373. +out:
  5374. + return ret;
  5375. +}
  5376. +
  5377. +/**
  5378. + * free_debugfs - A function to cleanup the debugfs file interface
  5379. + */
  5380. +static void free_debugfs(void)
  5381. +{
  5382. + /* could also use a debugfs_remove_recursive */
  5383. + debugfs_remove(debug_enable);
  5384. + debugfs_remove(debug_threshold);
  5385. + debugfs_remove(debug_sample_width);
  5386. + debugfs_remove(debug_sample_window);
  5387. + debugfs_remove(debug_max);
  5388. + debugfs_remove(debug_count);
  5389. + debugfs_remove(debug_sample);
  5390. + debugfs_remove(debug_dir);
  5391. +}
  5392. +
  5393. +/**
  5394. + * detector_init - Standard module initialization code
  5395. + */
  5396. +static int detector_init(void)
  5397. +{
  5398. + int ret = -ENOMEM;
  5399. +
  5400. + pr_info(BANNER "version %s\n", VERSION);
  5401. +
  5402. + ret = init_stats();
  5403. + if (ret)
  5404. + goto out;
  5405. +
  5406. + ret = init_debugfs();
  5407. + if (ret)
  5408. + goto err_stats;
  5409. +
  5410. + if (enabled)
  5411. + ret = start_kthread();
  5412. +
  5413. + goto out;
  5414. +
  5415. +err_stats:
  5416. + ring_buffer_free(ring_buffer);
  5417. +out:
  5418. + return ret;
  5419. +
  5420. +}
  5421. +
  5422. +/**
  5423. + * detector_exit - Standard module cleanup code
  5424. + */
  5425. +static void detector_exit(void)
  5426. +{
  5427. + int err;
  5428. +
  5429. + if (enabled) {
  5430. + enabled = 0;
  5431. + err = stop_kthread();
  5432. + if (err)
  5433. + pr_err(BANNER "cannot stop kthread\n");
  5434. + }
  5435. +
  5436. + free_debugfs();
  5437. + ring_buffer_free(ring_buffer); /* free up the ring buffer */
  5438. +
  5439. +}
  5440. +
  5441. +module_init(detector_init);
  5442. +module_exit(detector_exit);
  5443. diff -Nur linux-4.8.15.orig/drivers/misc/Kconfig linux-4.8.15/drivers/misc/Kconfig
  5444. --- linux-4.8.15.orig/drivers/misc/Kconfig 2016-12-15 17:50:48.000000000 +0100
  5445. +++ linux-4.8.15/drivers/misc/Kconfig 2017-01-01 17:07:14.579333432 +0100
  5446. @@ -54,6 +54,7 @@
  5447. config ATMEL_TCLIB
  5448. bool "Atmel AT32/AT91 Timer/Counter Library"
  5449. depends on (AVR32 || ARCH_AT91)
  5450. + default y if PREEMPT_RT_FULL
  5451. help
  5452. Select this if you want a library to allocate the Timer/Counter
  5453. blocks found on many Atmel processors. This facilitates using
  5454. @@ -69,8 +70,7 @@
  5455. are combined to make a single 32-bit timer.
  5456. When GENERIC_CLOCKEVENTS is defined, the third timer channel
  5457. - may be used as a clock event device supporting oneshot mode
  5458. - (delays of up to two seconds) based on the 32 KiHz clock.
  5459. + may be used as a clock event device supporting oneshot mode.
  5460. config ATMEL_TCB_CLKSRC_BLOCK
  5461. int
  5462. @@ -84,6 +84,15 @@
  5463. TC can be used for other purposes, such as PWM generation and
  5464. interval timing.
  5465. +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  5466. + bool "TC Block use 32 KiHz clock"
  5467. + depends on ATMEL_TCB_CLKSRC
  5468. + default y if !PREEMPT_RT_FULL
  5469. + help
  5470. + Select this to use 32 KiHz base clock rate as TC block clock
  5471. + source for clock events.
  5472. +
  5473. +
  5474. config DUMMY_IRQ
  5475. tristate "Dummy IRQ handler"
  5476. default n
  5477. @@ -114,6 +123,35 @@
  5478. for information on the specific driver level and support statement
  5479. for your IBM server.
  5480. +config HWLAT_DETECTOR
  5481. + tristate "Testing module to detect hardware-induced latencies"
  5482. + depends on DEBUG_FS
  5483. + depends on RING_BUFFER
  5484. + default m
  5485. + ---help---
  5486. + A simple hardware latency detector. Use this module to detect
  5487. + large latencies introduced by the behavior of the underlying
  5488. + system firmware external to Linux. We do this using periodic
  5489. + use of stop_machine to grab all available CPUs and measure
  5490. + for unexplainable gaps in the CPU timestamp counter(s). By
  5491. + default, the module is not enabled until the "enable" file
  5492. + within the "hwlat_detector" debugfs directory is toggled.
  5493. +
  5494. + This module is often used to detect SMI (System Management
  5495. + Interrupts) on x86 systems, though is not x86 specific. To
  5496. + this end, we default to using a sample window of 1 second,
  5497. + during which we will sample for 0.5 seconds. If an SMI or
  5498. + similar event occurs during that time, it is recorded
  5499. + into an 8K samples global ring buffer until retreived.
  5500. +
  5501. + WARNING: This software should never be enabled (it can be built
  5502. + but should not be turned on after it is loaded) in a production
  5503. + environment where high latencies are a concern since the
  5504. + sampling mechanism actually introduces latencies for
  5505. + regular tasks while the CPU(s) are being held.
  5506. +
  5507. + If unsure, say N
  5508. +
  5509. config PHANTOM
  5510. tristate "Sensable PHANToM (PCI)"
  5511. depends on PCI
  5512. diff -Nur linux-4.8.15.orig/drivers/misc/Makefile linux-4.8.15/drivers/misc/Makefile
  5513. --- linux-4.8.15.orig/drivers/misc/Makefile 2016-12-15 17:50:48.000000000 +0100
  5514. +++ linux-4.8.15/drivers/misc/Makefile 2017-01-01 17:07:14.579333432 +0100
  5515. @@ -38,6 +38,7 @@
  5516. obj-$(CONFIG_HMC6352) += hmc6352.o
  5517. obj-y += eeprom/
  5518. obj-y += cb710/
  5519. +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o
  5520. obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o
  5521. obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o
  5522. obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o
  5523. diff -Nur linux-4.8.15.orig/drivers/mmc/host/mmci.c linux-4.8.15/drivers/mmc/host/mmci.c
  5524. --- linux-4.8.15.orig/drivers/mmc/host/mmci.c 2016-12-15 17:50:48.000000000 +0100
  5525. +++ linux-4.8.15/drivers/mmc/host/mmci.c 2017-01-01 17:07:14.591334201 +0100
  5526. @@ -1147,15 +1147,12 @@
  5527. struct sg_mapping_iter *sg_miter = &host->sg_miter;
  5528. struct variant_data *variant = host->variant;
  5529. void __iomem *base = host->base;
  5530. - unsigned long flags;
  5531. u32 status;
  5532. status = readl(base + MMCISTATUS);
  5533. dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
  5534. - local_irq_save(flags);
  5535. -
  5536. do {
  5537. unsigned int remain, len;
  5538. char *buffer;
  5539. @@ -1195,8 +1192,6 @@
  5540. sg_miter_stop(sg_miter);
  5541. - local_irq_restore(flags);
  5542. -
  5543. /*
  5544. * If we have less than the fifo 'half-full' threshold to transfer,
  5545. * trigger a PIO interrupt as soon as any data is available.
  5546. diff -Nur linux-4.8.15.orig/drivers/net/ethernet/3com/3c59x.c linux-4.8.15/drivers/net/ethernet/3com/3c59x.c
  5547. --- linux-4.8.15.orig/drivers/net/ethernet/3com/3c59x.c 2016-12-15 17:50:48.000000000 +0100
  5548. +++ linux-4.8.15/drivers/net/ethernet/3com/3c59x.c 2017-01-01 17:07:14.611335500 +0100
  5549. @@ -842,9 +842,9 @@
  5550. {
  5551. struct vortex_private *vp = netdev_priv(dev);
  5552. unsigned long flags;
  5553. - local_irq_save(flags);
  5554. + local_irq_save_nort(flags);
  5555. (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
  5556. - local_irq_restore(flags);
  5557. + local_irq_restore_nort(flags);
  5558. }
  5559. #endif
  5560. @@ -1910,12 +1910,12 @@
  5561. * Block interrupts because vortex_interrupt does a bare spin_lock()
  5562. */
  5563. unsigned long flags;
  5564. - local_irq_save(flags);
  5565. + local_irq_save_nort(flags);
  5566. if (vp->full_bus_master_tx)
  5567. boomerang_interrupt(dev->irq, dev);
  5568. else
  5569. vortex_interrupt(dev->irq, dev);
  5570. - local_irq_restore(flags);
  5571. + local_irq_restore_nort(flags);
  5572. }
  5573. }
  5574. diff -Nur linux-4.8.15.orig/drivers/net/ethernet/realtek/8139too.c linux-4.8.15/drivers/net/ethernet/realtek/8139too.c
  5575. --- linux-4.8.15.orig/drivers/net/ethernet/realtek/8139too.c 2016-12-15 17:50:48.000000000 +0100
  5576. +++ linux-4.8.15/drivers/net/ethernet/realtek/8139too.c 2017-01-01 17:07:14.639337303 +0100
  5577. @@ -2233,7 +2233,7 @@
  5578. struct rtl8139_private *tp = netdev_priv(dev);
  5579. const int irq = tp->pci_dev->irq;
  5580. - disable_irq(irq);
  5581. + disable_irq_nosync(irq);
  5582. rtl8139_interrupt(irq, dev);
  5583. enable_irq(irq);
  5584. }
  5585. diff -Nur linux-4.8.15.orig/drivers/net/wireless/intersil/orinoco/orinoco_usb.c linux-4.8.15/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
  5586. --- linux-4.8.15.orig/drivers/net/wireless/intersil/orinoco/orinoco_usb.c 2016-12-15 17:50:48.000000000 +0100
  5587. +++ linux-4.8.15/drivers/net/wireless/intersil/orinoco/orinoco_usb.c 2017-01-01 17:07:14.687340389 +0100
  5588. @@ -697,7 +697,7 @@
  5589. while (!ctx->done.done && msecs--)
  5590. udelay(1000);
  5591. } else {
  5592. - wait_event_interruptible(ctx->done.wait,
  5593. + swait_event_interruptible(ctx->done.wait,
  5594. ctx->done.done);
  5595. }
  5596. break;
  5597. diff -Nur linux-4.8.15.orig/drivers/pci/access.c linux-4.8.15/drivers/pci/access.c
  5598. --- linux-4.8.15.orig/drivers/pci/access.c 2016-12-15 17:50:48.000000000 +0100
  5599. +++ linux-4.8.15/drivers/pci/access.c 2017-01-01 17:07:14.699341161 +0100
  5600. @@ -672,7 +672,7 @@
  5601. WARN_ON(!dev->block_cfg_access);
  5602. dev->block_cfg_access = 0;
  5603. - wake_up_all(&pci_cfg_wait);
  5604. + wake_up_all_locked(&pci_cfg_wait);
  5605. raw_spin_unlock_irqrestore(&pci_lock, flags);
  5606. }
  5607. EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
  5608. diff -Nur linux-4.8.15.orig/drivers/scsi/fcoe/fcoe.c linux-4.8.15/drivers/scsi/fcoe/fcoe.c
  5609. --- linux-4.8.15.orig/drivers/scsi/fcoe/fcoe.c 2016-12-15 17:50:48.000000000 +0100
  5610. +++ linux-4.8.15/drivers/scsi/fcoe/fcoe.c 2017-01-01 17:07:14.719342449 +0100
  5611. @@ -1455,11 +1455,11 @@
  5612. static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
  5613. {
  5614. struct fcoe_percpu_s *fps;
  5615. - int rc;
  5616. + int rc, cpu = get_cpu_light();
  5617. - fps = &get_cpu_var(fcoe_percpu);
  5618. + fps = &per_cpu(fcoe_percpu, cpu);
  5619. rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
  5620. - put_cpu_var(fcoe_percpu);
  5621. + put_cpu_light();
  5622. return rc;
  5623. }
  5624. @@ -1646,11 +1646,11 @@
  5625. return 0;
  5626. }
  5627. - stats = per_cpu_ptr(lport->stats, get_cpu());
  5628. + stats = per_cpu_ptr(lport->stats, get_cpu_light());
  5629. stats->InvalidCRCCount++;
  5630. if (stats->InvalidCRCCount < 5)
  5631. printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
  5632. - put_cpu();
  5633. + put_cpu_light();
  5634. return -EINVAL;
  5635. }
  5636. @@ -1693,7 +1693,7 @@
  5637. */
  5638. hp = (struct fcoe_hdr *) skb_network_header(skb);
  5639. - stats = per_cpu_ptr(lport->stats, get_cpu());
  5640. + stats = per_cpu_ptr(lport->stats, get_cpu_light());
  5641. if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) {
  5642. if (stats->ErrorFrames < 5)
  5643. printk(KERN_WARNING "fcoe: FCoE version "
  5644. @@ -1725,13 +1725,13 @@
  5645. goto drop;
  5646. if (!fcoe_filter_frames(lport, fp)) {
  5647. - put_cpu();
  5648. + put_cpu_light();
  5649. fc_exch_recv(lport, fp);
  5650. return;
  5651. }
  5652. drop:
  5653. stats->ErrorFrames++;
  5654. - put_cpu();
  5655. + put_cpu_light();
  5656. kfree_skb(skb);
  5657. }
  5658. diff -Nur linux-4.8.15.orig/drivers/scsi/fcoe/fcoe_ctlr.c linux-4.8.15/drivers/scsi/fcoe/fcoe_ctlr.c
  5659. --- linux-4.8.15.orig/drivers/scsi/fcoe/fcoe_ctlr.c 2016-12-15 17:50:48.000000000 +0100
  5660. +++ linux-4.8.15/drivers/scsi/fcoe/fcoe_ctlr.c 2017-01-01 17:07:14.727342965 +0100
  5661. @@ -834,7 +834,7 @@
  5662. INIT_LIST_HEAD(&del_list);
  5663. - stats = per_cpu_ptr(fip->lp->stats, get_cpu());
  5664. + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
  5665. list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
  5666. deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
  5667. @@ -870,7 +870,7 @@
  5668. sel_time = fcf->time;
  5669. }
  5670. }
  5671. - put_cpu();
  5672. + put_cpu_light();
  5673. list_for_each_entry_safe(fcf, next, &del_list, list) {
  5674. /* Removes fcf from current list */
  5675. diff -Nur linux-4.8.15.orig/drivers/scsi/libfc/fc_exch.c linux-4.8.15/drivers/scsi/libfc/fc_exch.c
  5676. --- linux-4.8.15.orig/drivers/scsi/libfc/fc_exch.c 2016-12-15 17:50:48.000000000 +0100
  5677. +++ linux-4.8.15/drivers/scsi/libfc/fc_exch.c 2017-01-01 17:07:14.747344253 +0100
  5678. @@ -814,10 +814,10 @@
  5679. }
  5680. memset(ep, 0, sizeof(*ep));
  5681. - cpu = get_cpu();
  5682. + cpu = get_cpu_light();
  5683. pool = per_cpu_ptr(mp->pool, cpu);
  5684. spin_lock_bh(&pool->lock);
  5685. - put_cpu();
  5686. + put_cpu_light();
  5687. /* peek cache of free slot */
  5688. if (pool->left != FC_XID_UNKNOWN) {
  5689. diff -Nur linux-4.8.15.orig/drivers/scsi/libsas/sas_ata.c linux-4.8.15/drivers/scsi/libsas/sas_ata.c
  5690. --- linux-4.8.15.orig/drivers/scsi/libsas/sas_ata.c 2016-12-15 17:50:48.000000000 +0100
  5691. +++ linux-4.8.15/drivers/scsi/libsas/sas_ata.c 2017-01-01 17:07:14.791347085 +0100
  5692. @@ -190,7 +190,7 @@
  5693. /* TODO: audit callers to ensure they are ready for qc_issue to
  5694. * unconditionally re-enable interrupts
  5695. */
  5696. - local_irq_save(flags);
  5697. + local_irq_save_nort(flags);
  5698. spin_unlock(ap->lock);
  5699. /* If the device fell off, no sense in issuing commands */
  5700. @@ -252,7 +252,7 @@
  5701. out:
  5702. spin_lock(ap->lock);
  5703. - local_irq_restore(flags);
  5704. + local_irq_restore_nort(flags);
  5705. return ret;
  5706. }
  5707. diff -Nur linux-4.8.15.orig/drivers/scsi/qla2xxx/qla_inline.h linux-4.8.15/drivers/scsi/qla2xxx/qla_inline.h
  5708. --- linux-4.8.15.orig/drivers/scsi/qla2xxx/qla_inline.h 2016-12-15 17:50:48.000000000 +0100
  5709. +++ linux-4.8.15/drivers/scsi/qla2xxx/qla_inline.h 2017-01-01 17:07:14.807348125 +0100
  5710. @@ -59,12 +59,12 @@
  5711. {
  5712. unsigned long flags;
  5713. struct qla_hw_data *ha = rsp->hw;
  5714. - local_irq_save(flags);
  5715. + local_irq_save_nort(flags);
  5716. if (IS_P3P_TYPE(ha))
  5717. qla82xx_poll(0, rsp);
  5718. else
  5719. ha->isp_ops->intr_handler(0, rsp);
  5720. - local_irq_restore(flags);
  5721. + local_irq_restore_nort(flags);
  5722. }
  5723. static inline uint8_t *
  5724. diff -Nur linux-4.8.15.orig/drivers/scsi/qla2xxx/qla_isr.c linux-4.8.15/drivers/scsi/qla2xxx/qla_isr.c
  5725. --- linux-4.8.15.orig/drivers/scsi/qla2xxx/qla_isr.c 2016-12-15 17:50:48.000000000 +0100
  5726. +++ linux-4.8.15/drivers/scsi/qla2xxx/qla_isr.c 2017-01-01 17:07:14.831349662 +0100
  5727. @@ -3125,7 +3125,11 @@
  5728. * kref_put().
  5729. */
  5730. kref_get(&qentry->irq_notify.kref);
  5731. +#ifdef CONFIG_PREEMPT_RT_BASE
  5732. + swork_queue(&qentry->irq_notify.swork);
  5733. +#else
  5734. schedule_work(&qentry->irq_notify.work);
  5735. +#endif
  5736. }
  5737. /*
  5738. diff -Nur linux-4.8.15.orig/drivers/thermal/x86_pkg_temp_thermal.c linux-4.8.15/drivers/thermal/x86_pkg_temp_thermal.c
  5739. --- linux-4.8.15.orig/drivers/thermal/x86_pkg_temp_thermal.c 2016-12-15 17:50:48.000000000 +0100
  5740. +++ linux-4.8.15/drivers/thermal/x86_pkg_temp_thermal.c 2017-01-01 17:07:14.843350438 +0100
  5741. @@ -29,6 +29,7 @@
  5742. #include <linux/pm.h>
  5743. #include <linux/thermal.h>
  5744. #include <linux/debugfs.h>
  5745. +#include <linux/swork.h>
  5746. #include <asm/cpu_device_id.h>
  5747. #include <asm/mce.h>
  5748. @@ -352,7 +353,7 @@
  5749. }
  5750. }
  5751. -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  5752. +static void platform_thermal_notify_work(struct swork_event *event)
  5753. {
  5754. unsigned long flags;
  5755. int cpu = smp_processor_id();
  5756. @@ -369,7 +370,7 @@
  5757. pkg_work_scheduled[phy_id]) {
  5758. disable_pkg_thres_interrupt();
  5759. spin_unlock_irqrestore(&pkg_work_lock, flags);
  5760. - return -EINVAL;
  5761. + return;
  5762. }
  5763. pkg_work_scheduled[phy_id] = 1;
  5764. spin_unlock_irqrestore(&pkg_work_lock, flags);
  5765. @@ -378,9 +379,48 @@
  5766. schedule_delayed_work_on(cpu,
  5767. &per_cpu(pkg_temp_thermal_threshold_work, cpu),
  5768. msecs_to_jiffies(notify_delay_ms));
  5769. +}
  5770. +
  5771. +#ifdef CONFIG_PREEMPT_RT_FULL
  5772. +static struct swork_event notify_work;
  5773. +
  5774. +static int thermal_notify_work_init(void)
  5775. +{
  5776. + int err;
  5777. +
  5778. + err = swork_get();
  5779. + if (err)
  5780. + return err;
  5781. +
  5782. + INIT_SWORK(&notify_work, platform_thermal_notify_work);
  5783. return 0;
  5784. }
  5785. +static void thermal_notify_work_cleanup(void)
  5786. +{
  5787. + swork_put();
  5788. +}
  5789. +
  5790. +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  5791. +{
  5792. + swork_queue(&notify_work);
  5793. + return 0;
  5794. +}
  5795. +
  5796. +#else /* !CONFIG_PREEMPT_RT_FULL */
  5797. +
  5798. +static int thermal_notify_work_init(void) { return 0; }
  5799. +
  5800. +static void thermal_notify_work_cleanup(void) { }
  5801. +
  5802. +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  5803. +{
  5804. + platform_thermal_notify_work(NULL);
  5805. +
  5806. + return 0;
  5807. +}
  5808. +#endif /* CONFIG_PREEMPT_RT_FULL */
  5809. +
  5810. static int find_siblings_cpu(int cpu)
  5811. {
  5812. int i;
  5813. @@ -584,6 +624,9 @@
  5814. if (!x86_match_cpu(pkg_temp_thermal_ids))
  5815. return -ENODEV;
  5816. + if (!thermal_notify_work_init())
  5817. + return -ENODEV;
  5818. +
  5819. spin_lock_init(&pkg_work_lock);
  5820. platform_thermal_package_notify =
  5821. pkg_temp_thermal_platform_thermal_notify;
  5822. @@ -608,7 +651,7 @@
  5823. kfree(pkg_work_scheduled);
  5824. platform_thermal_package_notify = NULL;
  5825. platform_thermal_package_rate_control = NULL;
  5826. -
  5827. + thermal_notify_work_cleanup();
  5828. return -ENODEV;
  5829. }
  5830. @@ -633,6 +676,7 @@
  5831. mutex_unlock(&phy_dev_list_mutex);
  5832. platform_thermal_package_notify = NULL;
  5833. platform_thermal_package_rate_control = NULL;
  5834. + thermal_notify_work_cleanup();
  5835. for_each_online_cpu(i)
  5836. cancel_delayed_work_sync(
  5837. &per_cpu(pkg_temp_thermal_threshold_work, i));
  5838. diff -Nur linux-4.8.15.orig/drivers/tty/serial/8250/8250_core.c linux-4.8.15/drivers/tty/serial/8250/8250_core.c
  5839. --- linux-4.8.15.orig/drivers/tty/serial/8250/8250_core.c 2016-12-15 17:50:48.000000000 +0100
  5840. +++ linux-4.8.15/drivers/tty/serial/8250/8250_core.c 2017-01-01 17:07:14.859351469 +0100
  5841. @@ -58,7 +58,16 @@
  5842. static unsigned int skip_txen_test; /* force skip of txen test at init time */
  5843. -#define PASS_LIMIT 512
  5844. +/*
  5845. + * On -rt we can have a more delays, and legitimately
  5846. + * so - so don't drop work spuriously and spam the
  5847. + * syslog:
  5848. + */
  5849. +#ifdef CONFIG_PREEMPT_RT_FULL
  5850. +# define PASS_LIMIT 1000000
  5851. +#else
  5852. +# define PASS_LIMIT 512
  5853. +#endif
  5854. #include <asm/serial.h>
  5855. /*
  5856. diff -Nur linux-4.8.15.orig/drivers/tty/serial/8250/8250_port.c linux-4.8.15/drivers/tty/serial/8250/8250_port.c
  5857. --- linux-4.8.15.orig/drivers/tty/serial/8250/8250_port.c 2016-12-15 17:50:48.000000000 +0100
  5858. +++ linux-4.8.15/drivers/tty/serial/8250/8250_port.c 2017-01-01 17:07:14.895353784 +0100
  5859. @@ -35,6 +35,7 @@
  5860. #include <linux/nmi.h>
  5861. #include <linux/mutex.h>
  5862. #include <linux/slab.h>
  5863. +#include <linux/kdb.h>
  5864. #include <linux/uaccess.h>
  5865. #include <linux/pm_runtime.h>
  5866. #include <linux/timer.h>
  5867. @@ -3109,9 +3110,9 @@
  5868. serial8250_rpm_get(up);
  5869. - if (port->sysrq)
  5870. + if (port->sysrq || oops_in_progress)
  5871. locked = 0;
  5872. - else if (oops_in_progress)
  5873. + else if (in_kdb_printk())
  5874. locked = spin_trylock_irqsave(&port->lock, flags);
  5875. else
  5876. spin_lock_irqsave(&port->lock, flags);
  5877. diff -Nur linux-4.8.15.orig/drivers/tty/serial/amba-pl011.c linux-4.8.15/drivers/tty/serial/amba-pl011.c
  5878. --- linux-4.8.15.orig/drivers/tty/serial/amba-pl011.c 2016-12-15 17:50:48.000000000 +0100
  5879. +++ linux-4.8.15/drivers/tty/serial/amba-pl011.c 2017-01-01 17:07:15.011361259 +0100
  5880. @@ -2167,13 +2167,19 @@
  5881. clk_enable(uap->clk);
  5882. - local_irq_save(flags);
  5883. + /*
  5884. + * local_irq_save(flags);
  5885. + *
  5886. + * This local_irq_save() is nonsense. If we come in via sysrq
  5887. + * handling then interrupts are already disabled. Aside of
  5888. + * that the port.sysrq check is racy on SMP regardless.
  5889. + */
  5890. if (uap->port.sysrq)
  5891. locked = 0;
  5892. else if (oops_in_progress)
  5893. - locked = spin_trylock(&uap->port.lock);
  5894. + locked = spin_trylock_irqsave(&uap->port.lock, flags);
  5895. else
  5896. - spin_lock(&uap->port.lock);
  5897. + spin_lock_irqsave(&uap->port.lock, flags);
  5898. /*
  5899. * First save the CR then disable the interrupts
  5900. @@ -2197,8 +2203,7 @@
  5901. pl011_write(old_cr, uap, REG_CR);
  5902. if (locked)
  5903. - spin_unlock(&uap->port.lock);
  5904. - local_irq_restore(flags);
  5905. + spin_unlock_irqrestore(&uap->port.lock, flags);
  5906. clk_disable(uap->clk);
  5907. }
  5908. diff -Nur linux-4.8.15.orig/drivers/tty/serial/omap-serial.c linux-4.8.15/drivers/tty/serial/omap-serial.c
  5909. --- linux-4.8.15.orig/drivers/tty/serial/omap-serial.c 2016-12-15 17:50:48.000000000 +0100
  5910. +++ linux-4.8.15/drivers/tty/serial/omap-serial.c 2017-01-01 17:07:15.019361774 +0100
  5911. @@ -1257,13 +1257,10 @@
  5912. pm_runtime_get_sync(up->dev);
  5913. - local_irq_save(flags);
  5914. - if (up->port.sysrq)
  5915. - locked = 0;
  5916. - else if (oops_in_progress)
  5917. - locked = spin_trylock(&up->port.lock);
  5918. + if (up->port.sysrq || oops_in_progress)
  5919. + locked = spin_trylock_irqsave(&up->port.lock, flags);
  5920. else
  5921. - spin_lock(&up->port.lock);
  5922. + spin_lock_irqsave(&up->port.lock, flags);
  5923. /*
  5924. * First save the IER then disable the interrupts
  5925. @@ -1292,8 +1289,7 @@
  5926. pm_runtime_mark_last_busy(up->dev);
  5927. pm_runtime_put_autosuspend(up->dev);
  5928. if (locked)
  5929. - spin_unlock(&up->port.lock);
  5930. - local_irq_restore(flags);
  5931. + spin_unlock_irqrestore(&up->port.lock, flags);
  5932. }
  5933. static int __init
  5934. diff -Nur linux-4.8.15.orig/drivers/tty/serial/sc16is7xx.c linux-4.8.15/drivers/tty/serial/sc16is7xx.c
  5935. --- linux-4.8.15.orig/drivers/tty/serial/sc16is7xx.c 2016-12-15 17:50:48.000000000 +0100
  5936. +++ linux-4.8.15/drivers/tty/serial/sc16is7xx.c 2017-01-01 17:07:15.027362288 +0100
  5937. @@ -1240,7 +1240,7 @@
  5938. /* Setup interrupt */
  5939. ret = devm_request_irq(dev, irq, sc16is7xx_irq,
  5940. - IRQF_ONESHOT | flags, dev_name(dev), s);
  5941. + flags, dev_name(dev), s);
  5942. if (!ret)
  5943. return 0;
  5944. diff -Nur linux-4.8.15.orig/drivers/usb/core/hcd.c linux-4.8.15/drivers/usb/core/hcd.c
  5945. --- linux-4.8.15.orig/drivers/usb/core/hcd.c 2016-12-15 17:50:48.000000000 +0100
  5946. +++ linux-4.8.15/drivers/usb/core/hcd.c 2017-01-01 17:07:15.051363831 +0100
  5947. @@ -1760,9 +1760,9 @@
  5948. * and no one may trigger the above deadlock situation when
  5949. * running complete() in tasklet.
  5950. */
  5951. - local_irq_save(flags);
  5952. + local_irq_save_nort(flags);
  5953. urb->complete(urb);
  5954. - local_irq_restore(flags);
  5955. + local_irq_restore_nort(flags);
  5956. usb_anchor_resume_wakeups(anchor);
  5957. atomic_dec(&urb->use_count);
  5958. diff -Nur linux-4.8.15.orig/drivers/usb/gadget/function/f_fs.c linux-4.8.15/drivers/usb/gadget/function/f_fs.c
  5959. --- linux-4.8.15.orig/drivers/usb/gadget/function/f_fs.c 2016-12-15 17:50:48.000000000 +0100
  5960. +++ linux-4.8.15/drivers/usb/gadget/function/f_fs.c 2017-01-01 17:07:15.079365638 +0100
  5961. @@ -1590,7 +1590,7 @@
  5962. pr_info("%s(): freeing\n", __func__);
  5963. ffs_data_clear(ffs);
  5964. BUG_ON(waitqueue_active(&ffs->ev.waitq) ||
  5965. - waitqueue_active(&ffs->ep0req_completion.wait));
  5966. + swait_active(&ffs->ep0req_completion.wait));
  5967. kfree(ffs->dev_name);
  5968. kfree(ffs);
  5969. }
  5970. diff -Nur linux-4.8.15.orig/drivers/usb/gadget/legacy/inode.c linux-4.8.15/drivers/usb/gadget/legacy/inode.c
  5971. --- linux-4.8.15.orig/drivers/usb/gadget/legacy/inode.c 2016-12-15 17:50:48.000000000 +0100
  5972. +++ linux-4.8.15/drivers/usb/gadget/legacy/inode.c 2017-01-01 17:07:15.107367440 +0100
  5973. @@ -346,7 +346,7 @@
  5974. spin_unlock_irq (&epdata->dev->lock);
  5975. if (likely (value == 0)) {
  5976. - value = wait_event_interruptible (done.wait, done.done);
  5977. + value = swait_event_interruptible (done.wait, done.done);
  5978. if (value != 0) {
  5979. spin_lock_irq (&epdata->dev->lock);
  5980. if (likely (epdata->ep != NULL)) {
  5981. @@ -355,7 +355,7 @@
  5982. usb_ep_dequeue (epdata->ep, epdata->req);
  5983. spin_unlock_irq (&epdata->dev->lock);
  5984. - wait_event (done.wait, done.done);
  5985. + swait_event (done.wait, done.done);
  5986. if (epdata->status == -ECONNRESET)
  5987. epdata->status = -EINTR;
  5988. } else {
  5989. diff -Nur linux-4.8.15.orig/fs/aio.c linux-4.8.15/fs/aio.c
  5990. --- linux-4.8.15.orig/fs/aio.c 2016-12-15 17:50:48.000000000 +0100
  5991. +++ linux-4.8.15/fs/aio.c 2017-01-01 17:07:15.119368222 +0100
  5992. @@ -40,6 +40,7 @@
  5993. #include <linux/ramfs.h>
  5994. #include <linux/percpu-refcount.h>
  5995. #include <linux/mount.h>
  5996. +#include <linux/swork.h>
  5997. #include <asm/kmap_types.h>
  5998. #include <asm/uaccess.h>
  5999. @@ -115,7 +116,7 @@
  6000. struct page **ring_pages;
  6001. long nr_pages;
  6002. - struct work_struct free_work;
  6003. + struct swork_event free_work;
  6004. /*
  6005. * signals when all in-flight requests are done
  6006. @@ -258,6 +259,7 @@
  6007. .mount = aio_mount,
  6008. .kill_sb = kill_anon_super,
  6009. };
  6010. + BUG_ON(swork_get());
  6011. aio_mnt = kern_mount(&aio_fs);
  6012. if (IS_ERR(aio_mnt))
  6013. panic("Failed to create aio fs mount.");
  6014. @@ -578,9 +580,9 @@
  6015. return cancel(&kiocb->common);
  6016. }
  6017. -static void free_ioctx(struct work_struct *work)
  6018. +static void free_ioctx(struct swork_event *sev)
  6019. {
  6020. - struct kioctx *ctx = container_of(work, struct kioctx, free_work);
  6021. + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
  6022. pr_debug("freeing %p\n", ctx);
  6023. @@ -599,8 +601,8 @@
  6024. if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
  6025. complete(&ctx->rq_wait->comp);
  6026. - INIT_WORK(&ctx->free_work, free_ioctx);
  6027. - schedule_work(&ctx->free_work);
  6028. + INIT_SWORK(&ctx->free_work, free_ioctx);
  6029. + swork_queue(&ctx->free_work);
  6030. }
  6031. /*
  6032. @@ -608,9 +610,9 @@
  6033. * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
  6034. * now it's safe to cancel any that need to be.
  6035. */
  6036. -static void free_ioctx_users(struct percpu_ref *ref)
  6037. +static void free_ioctx_users_work(struct swork_event *sev)
  6038. {
  6039. - struct kioctx *ctx = container_of(ref, struct kioctx, users);
  6040. + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
  6041. struct aio_kiocb *req;
  6042. spin_lock_irq(&ctx->ctx_lock);
  6043. @@ -629,6 +631,14 @@
  6044. percpu_ref_put(&ctx->reqs);
  6045. }
  6046. +static void free_ioctx_users(struct percpu_ref *ref)
  6047. +{
  6048. + struct kioctx *ctx = container_of(ref, struct kioctx, users);
  6049. +
  6050. + INIT_SWORK(&ctx->free_work, free_ioctx_users_work);
  6051. + swork_queue(&ctx->free_work);
  6052. +}
  6053. +
  6054. static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
  6055. {
  6056. unsigned i, new_nr;
  6057. diff -Nur linux-4.8.15.orig/fs/autofs4/autofs_i.h linux-4.8.15/fs/autofs4/autofs_i.h
  6058. --- linux-4.8.15.orig/fs/autofs4/autofs_i.h 2016-12-15 17:50:48.000000000 +0100
  6059. +++ linux-4.8.15/fs/autofs4/autofs_i.h 2017-01-01 17:07:15.127368729 +0100
  6060. @@ -30,6 +30,7 @@
  6061. #include <linux/sched.h>
  6062. #include <linux/mount.h>
  6063. #include <linux/namei.h>
  6064. +#include <linux/delay.h>
  6065. #include <asm/current.h>
  6066. #include <linux/uaccess.h>
  6067. diff -Nur linux-4.8.15.orig/fs/autofs4/expire.c linux-4.8.15/fs/autofs4/expire.c
  6068. --- linux-4.8.15.orig/fs/autofs4/expire.c 2016-12-15 17:50:48.000000000 +0100
  6069. +++ linux-4.8.15/fs/autofs4/expire.c 2017-01-01 17:07:15.143369757 +0100
  6070. @@ -148,7 +148,7 @@
  6071. parent = p->d_parent;
  6072. if (!spin_trylock(&parent->d_lock)) {
  6073. spin_unlock(&p->d_lock);
  6074. - cpu_relax();
  6075. + cpu_chill();
  6076. goto relock;
  6077. }
  6078. spin_unlock(&p->d_lock);
  6079. diff -Nur linux-4.8.15.orig/fs/buffer.c linux-4.8.15/fs/buffer.c
  6080. --- linux-4.8.15.orig/fs/buffer.c 2016-12-15 17:50:48.000000000 +0100
  6081. +++ linux-4.8.15/fs/buffer.c 2017-01-01 17:07:15.163371048 +0100
  6082. @@ -301,8 +301,7 @@
  6083. * decide that the page is now completely done.
  6084. */
  6085. first = page_buffers(page);
  6086. - local_irq_save(flags);
  6087. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  6088. + flags = bh_uptodate_lock_irqsave(first);
  6089. clear_buffer_async_read(bh);
  6090. unlock_buffer(bh);
  6091. tmp = bh;
  6092. @@ -315,8 +314,7 @@
  6093. }
  6094. tmp = tmp->b_this_page;
  6095. } while (tmp != bh);
  6096. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6097. - local_irq_restore(flags);
  6098. + bh_uptodate_unlock_irqrestore(first, flags);
  6099. /*
  6100. * If none of the buffers had errors and they are all
  6101. @@ -328,9 +326,7 @@
  6102. return;
  6103. still_busy:
  6104. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6105. - local_irq_restore(flags);
  6106. - return;
  6107. + bh_uptodate_unlock_irqrestore(first, flags);
  6108. }
  6109. /*
  6110. @@ -358,8 +354,7 @@
  6111. }
  6112. first = page_buffers(page);
  6113. - local_irq_save(flags);
  6114. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  6115. + flags = bh_uptodate_lock_irqsave(first);
  6116. clear_buffer_async_write(bh);
  6117. unlock_buffer(bh);
  6118. @@ -371,15 +366,12 @@
  6119. }
  6120. tmp = tmp->b_this_page;
  6121. }
  6122. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6123. - local_irq_restore(flags);
  6124. + bh_uptodate_unlock_irqrestore(first, flags);
  6125. end_page_writeback(page);
  6126. return;
  6127. still_busy:
  6128. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6129. - local_irq_restore(flags);
  6130. - return;
  6131. + bh_uptodate_unlock_irqrestore(first, flags);
  6132. }
  6133. EXPORT_SYMBOL(end_buffer_async_write);
  6134. @@ -3384,6 +3376,7 @@
  6135. struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
  6136. if (ret) {
  6137. INIT_LIST_HEAD(&ret->b_assoc_buffers);
  6138. + buffer_head_init_locks(ret);
  6139. preempt_disable();
  6140. __this_cpu_inc(bh_accounting.nr);
  6141. recalc_bh_state();
  6142. diff -Nur linux-4.8.15.orig/fs/cifs/readdir.c linux-4.8.15/fs/cifs/readdir.c
  6143. --- linux-4.8.15.orig/fs/cifs/readdir.c 2016-12-15 17:50:48.000000000 +0100
  6144. +++ linux-4.8.15/fs/cifs/readdir.c 2017-01-01 17:07:15.175371817 +0100
  6145. @@ -80,7 +80,7 @@
  6146. struct inode *inode;
  6147. struct super_block *sb = parent->d_sb;
  6148. struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
  6149. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  6150. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  6151. cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
  6152. diff -Nur linux-4.8.15.orig/fs/dcache.c linux-4.8.15/fs/dcache.c
  6153. --- linux-4.8.15.orig/fs/dcache.c 2016-12-15 17:50:48.000000000 +0100
  6154. +++ linux-4.8.15/fs/dcache.c 2017-01-01 17:07:15.191372851 +0100
  6155. @@ -19,6 +19,7 @@
  6156. #include <linux/mm.h>
  6157. #include <linux/fs.h>
  6158. #include <linux/fsnotify.h>
  6159. +#include <linux/delay.h>
  6160. #include <linux/slab.h>
  6161. #include <linux/init.h>
  6162. #include <linux/hash.h>
  6163. @@ -750,6 +751,8 @@
  6164. */
  6165. void dput(struct dentry *dentry)
  6166. {
  6167. + struct dentry *parent;
  6168. +
  6169. if (unlikely(!dentry))
  6170. return;
  6171. @@ -788,9 +791,18 @@
  6172. return;
  6173. kill_it:
  6174. - dentry = dentry_kill(dentry);
  6175. - if (dentry) {
  6176. - cond_resched();
  6177. + parent = dentry_kill(dentry);
  6178. + if (parent) {
  6179. + int r;
  6180. +
  6181. + if (parent == dentry) {
  6182. + /* the task with the highest priority won't schedule */
  6183. + r = cond_resched();
  6184. + if (!r)
  6185. + cpu_chill();
  6186. + } else {
  6187. + dentry = parent;
  6188. + }
  6189. goto repeat;
  6190. }
  6191. }
  6192. @@ -2321,7 +2333,7 @@
  6193. if (dentry->d_lockref.count == 1) {
  6194. if (!spin_trylock(&inode->i_lock)) {
  6195. spin_unlock(&dentry->d_lock);
  6196. - cpu_relax();
  6197. + cpu_chill();
  6198. goto again;
  6199. }
  6200. dentry->d_flags &= ~DCACHE_CANT_MOUNT;
  6201. @@ -2381,21 +2393,24 @@
  6202. static void d_wait_lookup(struct dentry *dentry)
  6203. {
  6204. - if (d_in_lookup(dentry)) {
  6205. - DECLARE_WAITQUEUE(wait, current);
  6206. - add_wait_queue(dentry->d_wait, &wait);
  6207. - do {
  6208. - set_current_state(TASK_UNINTERRUPTIBLE);
  6209. - spin_unlock(&dentry->d_lock);
  6210. - schedule();
  6211. - spin_lock(&dentry->d_lock);
  6212. - } while (d_in_lookup(dentry));
  6213. - }
  6214. + struct swait_queue __wait;
  6215. +
  6216. + if (!d_in_lookup(dentry))
  6217. + return;
  6218. +
  6219. + INIT_LIST_HEAD(&__wait.task_list);
  6220. + do {
  6221. + prepare_to_swait(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE);
  6222. + spin_unlock(&dentry->d_lock);
  6223. + schedule();
  6224. + spin_lock(&dentry->d_lock);
  6225. + } while (d_in_lookup(dentry));
  6226. + finish_swait(dentry->d_wait, &__wait);
  6227. }
  6228. struct dentry *d_alloc_parallel(struct dentry *parent,
  6229. const struct qstr *name,
  6230. - wait_queue_head_t *wq)
  6231. + struct swait_queue_head *wq)
  6232. {
  6233. unsigned int hash = name->hash;
  6234. struct hlist_bl_head *b = in_lookup_hash(parent, hash);
  6235. @@ -2504,7 +2519,7 @@
  6236. hlist_bl_lock(b);
  6237. dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
  6238. __hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
  6239. - wake_up_all(dentry->d_wait);
  6240. + swake_up_all(dentry->d_wait);
  6241. dentry->d_wait = NULL;
  6242. hlist_bl_unlock(b);
  6243. INIT_HLIST_NODE(&dentry->d_u.d_alias);
  6244. @@ -3601,6 +3616,11 @@
  6245. void __init vfs_caches_init_early(void)
  6246. {
  6247. + int i;
  6248. +
  6249. + for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++)
  6250. + INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]);
  6251. +
  6252. dcache_init_early();
  6253. inode_init_early();
  6254. }
  6255. diff -Nur linux-4.8.15.orig/fs/eventpoll.c linux-4.8.15/fs/eventpoll.c
  6256. --- linux-4.8.15.orig/fs/eventpoll.c 2016-12-15 17:50:48.000000000 +0100
  6257. +++ linux-4.8.15/fs/eventpoll.c 2017-01-01 17:07:15.235375683 +0100
  6258. @@ -510,12 +510,12 @@
  6259. */
  6260. static void ep_poll_safewake(wait_queue_head_t *wq)
  6261. {
  6262. - int this_cpu = get_cpu();
  6263. + int this_cpu = get_cpu_light();
  6264. ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
  6265. ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
  6266. - put_cpu();
  6267. + put_cpu_light();
  6268. }
  6269. static void ep_remove_wait_queue(struct eppoll_entry *pwq)
  6270. diff -Nur linux-4.8.15.orig/fs/exec.c linux-4.8.15/fs/exec.c
  6271. --- linux-4.8.15.orig/fs/exec.c 2016-12-15 17:50:48.000000000 +0100
  6272. +++ linux-4.8.15/fs/exec.c 2017-01-01 17:07:15.275378262 +0100
  6273. @@ -1012,12 +1012,14 @@
  6274. }
  6275. }
  6276. task_lock(tsk);
  6277. + preempt_disable_rt();
  6278. active_mm = tsk->active_mm;
  6279. tsk->mm = mm;
  6280. tsk->active_mm = mm;
  6281. activate_mm(active_mm, mm);
  6282. tsk->mm->vmacache_seqnum = 0;
  6283. vmacache_flush(tsk);
  6284. + preempt_enable_rt();
  6285. task_unlock(tsk);
  6286. if (old_mm) {
  6287. up_read(&old_mm->mmap_sem);
  6288. diff -Nur linux-4.8.15.orig/fs/fuse/dir.c linux-4.8.15/fs/fuse/dir.c
  6289. --- linux-4.8.15.orig/fs/fuse/dir.c 2016-12-15 17:50:48.000000000 +0100
  6290. +++ linux-4.8.15/fs/fuse/dir.c 2017-01-01 17:07:15.315380835 +0100
  6291. @@ -1174,7 +1174,7 @@
  6292. struct inode *dir = d_inode(parent);
  6293. struct fuse_conn *fc;
  6294. struct inode *inode;
  6295. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  6296. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  6297. if (!o->nodeid) {
  6298. /*
  6299. diff -Nur linux-4.8.15.orig/fs/jbd2/checkpoint.c linux-4.8.15/fs/jbd2/checkpoint.c
  6300. --- linux-4.8.15.orig/fs/jbd2/checkpoint.c 2016-12-15 17:50:48.000000000 +0100
  6301. +++ linux-4.8.15/fs/jbd2/checkpoint.c 2017-01-01 17:07:15.403386504 +0100
  6302. @@ -116,6 +116,8 @@
  6303. nblocks = jbd2_space_needed(journal);
  6304. while (jbd2_log_space_left(journal) < nblocks) {
  6305. write_unlock(&journal->j_state_lock);
  6306. + if (current->plug)
  6307. + io_schedule();
  6308. mutex_lock(&journal->j_checkpoint_mutex);
  6309. /*
  6310. diff -Nur linux-4.8.15.orig/fs/namei.c linux-4.8.15/fs/namei.c
  6311. --- linux-4.8.15.orig/fs/namei.c 2016-12-15 17:50:48.000000000 +0100
  6312. +++ linux-4.8.15/fs/namei.c 2017-01-01 17:07:15.435388569 +0100
  6313. @@ -1629,7 +1629,7 @@
  6314. {
  6315. struct dentry *dentry = ERR_PTR(-ENOENT), *old;
  6316. struct inode *inode = dir->d_inode;
  6317. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  6318. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  6319. inode_lock_shared(inode);
  6320. /* Don't go there if it's already dead */
  6321. @@ -3086,7 +3086,7 @@
  6322. struct dentry *dentry;
  6323. int error, create_error = 0;
  6324. umode_t mode = op->mode;
  6325. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  6326. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  6327. if (unlikely(IS_DEADDIR(dir_inode)))
  6328. return -ENOENT;
  6329. diff -Nur linux-4.8.15.orig/fs/namespace.c linux-4.8.15/fs/namespace.c
  6330. --- linux-4.8.15.orig/fs/namespace.c 2016-12-15 17:50:48.000000000 +0100
  6331. +++ linux-4.8.15/fs/namespace.c 2017-01-01 17:07:15.447389339 +0100
  6332. @@ -14,6 +14,7 @@
  6333. #include <linux/mnt_namespace.h>
  6334. #include <linux/user_namespace.h>
  6335. #include <linux/namei.h>
  6336. +#include <linux/delay.h>
  6337. #include <linux/security.h>
  6338. #include <linux/idr.h>
  6339. #include <linux/init.h> /* init_rootfs */
  6340. @@ -353,8 +354,11 @@
  6341. * incremented count after it has set MNT_WRITE_HOLD.
  6342. */
  6343. smp_mb();
  6344. - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
  6345. - cpu_relax();
  6346. + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
  6347. + preempt_enable();
  6348. + cpu_chill();
  6349. + preempt_disable();
  6350. + }
  6351. /*
  6352. * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
  6353. * be set to match its requirements. So we must not load that until
  6354. diff -Nur linux-4.8.15.orig/fs/nfs/delegation.c linux-4.8.15/fs/nfs/delegation.c
  6355. --- linux-4.8.15.orig/fs/nfs/delegation.c 2016-12-15 17:50:48.000000000 +0100
  6356. +++ linux-4.8.15/fs/nfs/delegation.c 2017-01-01 17:07:15.451389604 +0100
  6357. @@ -150,11 +150,11 @@
  6358. sp = state->owner;
  6359. /* Block nfs4_proc_unlck */
  6360. mutex_lock(&sp->so_delegreturn_mutex);
  6361. - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
  6362. + seq = read_seqbegin(&sp->so_reclaim_seqlock);
  6363. err = nfs4_open_delegation_recall(ctx, state, stateid, type);
  6364. if (!err)
  6365. err = nfs_delegation_claim_locks(ctx, state, stateid);
  6366. - if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
  6367. + if (!err && read_seqretry(&sp->so_reclaim_seqlock, seq))
  6368. err = -EAGAIN;
  6369. mutex_unlock(&sp->so_delegreturn_mutex);
  6370. put_nfs_open_context(ctx);
  6371. diff -Nur linux-4.8.15.orig/fs/nfs/dir.c linux-4.8.15/fs/nfs/dir.c
  6372. --- linux-4.8.15.orig/fs/nfs/dir.c 2016-12-15 17:50:48.000000000 +0100
  6373. +++ linux-4.8.15/fs/nfs/dir.c 2017-01-01 17:07:15.455389855 +0100
  6374. @@ -485,7 +485,7 @@
  6375. void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
  6376. {
  6377. struct qstr filename = QSTR_INIT(entry->name, entry->len);
  6378. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  6379. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  6380. struct dentry *dentry;
  6381. struct dentry *alias;
  6382. struct inode *dir = d_inode(parent);
  6383. @@ -1490,7 +1490,7 @@
  6384. struct file *file, unsigned open_flags,
  6385. umode_t mode, int *opened)
  6386. {
  6387. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  6388. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  6389. struct nfs_open_context *ctx;
  6390. struct dentry *res;
  6391. struct iattr attr = { .ia_valid = ATTR_OPEN };
  6392. @@ -1805,7 +1805,11 @@
  6393. trace_nfs_rmdir_enter(dir, dentry);
  6394. if (d_really_is_positive(dentry)) {
  6395. +#ifdef CONFIG_PREEMPT_RT_BASE
  6396. + down(&NFS_I(d_inode(dentry))->rmdir_sem);
  6397. +#else
  6398. down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
  6399. +#endif
  6400. error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
  6401. /* Ensure the VFS deletes this inode */
  6402. switch (error) {
  6403. @@ -1815,7 +1819,11 @@
  6404. case -ENOENT:
  6405. nfs_dentry_handle_enoent(dentry);
  6406. }
  6407. +#ifdef CONFIG_PREEMPT_RT_BASE
  6408. + up(&NFS_I(d_inode(dentry))->rmdir_sem);
  6409. +#else
  6410. up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
  6411. +#endif
  6412. } else
  6413. error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
  6414. trace_nfs_rmdir_exit(dir, dentry, error);
  6415. diff -Nur linux-4.8.15.orig/fs/nfs/inode.c linux-4.8.15/fs/nfs/inode.c
  6416. --- linux-4.8.15.orig/fs/nfs/inode.c 2016-12-15 17:50:48.000000000 +0100
  6417. +++ linux-4.8.15/fs/nfs/inode.c 2017-01-01 17:07:15.463390369 +0100
  6418. @@ -1957,7 +1957,11 @@
  6419. nfsi->nrequests = 0;
  6420. nfsi->commit_info.ncommit = 0;
  6421. atomic_set(&nfsi->commit_info.rpcs_out, 0);
  6422. +#ifdef CONFIG_PREEMPT_RT_BASE
  6423. + sema_init(&nfsi->rmdir_sem, 1);
  6424. +#else
  6425. init_rwsem(&nfsi->rmdir_sem);
  6426. +#endif
  6427. nfs4_init_once(nfsi);
  6428. }
  6429. diff -Nur linux-4.8.15.orig/fs/nfs/nfs4_fs.h linux-4.8.15/fs/nfs/nfs4_fs.h
  6430. --- linux-4.8.15.orig/fs/nfs/nfs4_fs.h 2016-12-15 17:50:48.000000000 +0100
  6431. +++ linux-4.8.15/fs/nfs/nfs4_fs.h 2017-01-01 17:07:15.467390636 +0100
  6432. @@ -107,7 +107,7 @@
  6433. unsigned long so_flags;
  6434. struct list_head so_states;
  6435. struct nfs_seqid_counter so_seqid;
  6436. - seqcount_t so_reclaim_seqcount;
  6437. + seqlock_t so_reclaim_seqlock;
  6438. struct mutex so_delegreturn_mutex;
  6439. };
  6440. diff -Nur linux-4.8.15.orig/fs/nfs/nfs4proc.c linux-4.8.15/fs/nfs/nfs4proc.c
  6441. --- linux-4.8.15.orig/fs/nfs/nfs4proc.c 2016-12-15 17:50:48.000000000 +0100
  6442. +++ linux-4.8.15/fs/nfs/nfs4proc.c 2017-01-01 17:07:15.491392171 +0100
  6443. @@ -2525,7 +2525,7 @@
  6444. unsigned int seq;
  6445. int ret;
  6446. - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
  6447. + seq = raw_seqcount_begin(&sp->so_reclaim_seqlock.seqcount);
  6448. ret = _nfs4_proc_open(opendata);
  6449. if (ret != 0)
  6450. @@ -2561,7 +2561,7 @@
  6451. ctx->state = state;
  6452. if (d_inode(dentry) == state->inode) {
  6453. nfs_inode_attach_open_context(ctx);
  6454. - if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
  6455. + if (read_seqretry(&sp->so_reclaim_seqlock, seq))
  6456. nfs4_schedule_stateid_recovery(server, state);
  6457. }
  6458. out:
  6459. diff -Nur linux-4.8.15.orig/fs/nfs/nfs4state.c linux-4.8.15/fs/nfs/nfs4state.c
  6460. --- linux-4.8.15.orig/fs/nfs/nfs4state.c 2016-12-15 17:50:48.000000000 +0100
  6461. +++ linux-4.8.15/fs/nfs/nfs4state.c 2017-01-01 17:07:15.527394493 +0100
  6462. @@ -488,7 +488,7 @@
  6463. nfs4_init_seqid_counter(&sp->so_seqid);
  6464. atomic_set(&sp->so_count, 1);
  6465. INIT_LIST_HEAD(&sp->so_lru);
  6466. - seqcount_init(&sp->so_reclaim_seqcount);
  6467. + seqlock_init(&sp->so_reclaim_seqlock);
  6468. mutex_init(&sp->so_delegreturn_mutex);
  6469. return sp;
  6470. }
  6471. @@ -1459,8 +1459,12 @@
  6472. * recovering after a network partition or a reboot from a
  6473. * server that doesn't support a grace period.
  6474. */
  6475. +#ifdef CONFIG_PREEMPT_RT_FULL
  6476. + write_seqlock(&sp->so_reclaim_seqlock);
  6477. +#else
  6478. + write_seqcount_begin(&sp->so_reclaim_seqlock.seqcount);
  6479. +#endif
  6480. spin_lock(&sp->so_lock);
  6481. - raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
  6482. restart:
  6483. list_for_each_entry(state, &sp->so_states, open_states) {
  6484. if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
  6485. @@ -1528,14 +1532,20 @@
  6486. spin_lock(&sp->so_lock);
  6487. goto restart;
  6488. }
  6489. - raw_write_seqcount_end(&sp->so_reclaim_seqcount);
  6490. spin_unlock(&sp->so_lock);
  6491. +#ifdef CONFIG_PREEMPT_RT_FULL
  6492. + write_sequnlock(&sp->so_reclaim_seqlock);
  6493. +#else
  6494. + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount);
  6495. +#endif
  6496. return 0;
  6497. out_err:
  6498. nfs4_put_open_state(state);
  6499. - spin_lock(&sp->so_lock);
  6500. - raw_write_seqcount_end(&sp->so_reclaim_seqcount);
  6501. - spin_unlock(&sp->so_lock);
  6502. +#ifdef CONFIG_PREEMPT_RT_FULL
  6503. + write_sequnlock(&sp->so_reclaim_seqlock);
  6504. +#else
  6505. + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount);
  6506. +#endif
  6507. return status;
  6508. }
  6509. diff -Nur linux-4.8.15.orig/fs/nfs/unlink.c linux-4.8.15/fs/nfs/unlink.c
  6510. --- linux-4.8.15.orig/fs/nfs/unlink.c 2016-12-15 17:50:48.000000000 +0100
  6511. +++ linux-4.8.15/fs/nfs/unlink.c 2017-01-01 17:07:15.531394748 +0100
  6512. @@ -12,7 +12,7 @@
  6513. #include <linux/sunrpc/clnt.h>
  6514. #include <linux/nfs_fs.h>
  6515. #include <linux/sched.h>
  6516. -#include <linux/wait.h>
  6517. +#include <linux/swait.h>
  6518. #include <linux/namei.h>
  6519. #include <linux/fsnotify.h>
  6520. @@ -51,6 +51,29 @@
  6521. rpc_restart_call_prepare(task);
  6522. }
  6523. +#ifdef CONFIG_PREEMPT_RT_BASE
  6524. +static void nfs_down_anon(struct semaphore *sema)
  6525. +{
  6526. + down(sema);
  6527. +}
  6528. +
  6529. +static void nfs_up_anon(struct semaphore *sema)
  6530. +{
  6531. + up(sema);
  6532. +}
  6533. +
  6534. +#else
  6535. +static void nfs_down_anon(struct rw_semaphore *rwsem)
  6536. +{
  6537. + down_read_non_owner(rwsem);
  6538. +}
  6539. +
  6540. +static void nfs_up_anon(struct rw_semaphore *rwsem)
  6541. +{
  6542. + up_read_non_owner(rwsem);
  6543. +}
  6544. +#endif
  6545. +
  6546. /**
  6547. * nfs_async_unlink_release - Release the sillydelete data.
  6548. * @task: rpc_task of the sillydelete
  6549. @@ -64,7 +87,7 @@
  6550. struct dentry *dentry = data->dentry;
  6551. struct super_block *sb = dentry->d_sb;
  6552. - up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem);
  6553. + nfs_up_anon(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem);
  6554. d_lookup_done(dentry);
  6555. nfs_free_unlinkdata(data);
  6556. dput(dentry);
  6557. @@ -117,10 +140,10 @@
  6558. struct inode *dir = d_inode(dentry->d_parent);
  6559. struct dentry *alias;
  6560. - down_read_non_owner(&NFS_I(dir)->rmdir_sem);
  6561. + nfs_down_anon(&NFS_I(dir)->rmdir_sem);
  6562. alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq);
  6563. if (IS_ERR(alias)) {
  6564. - up_read_non_owner(&NFS_I(dir)->rmdir_sem);
  6565. + nfs_up_anon(&NFS_I(dir)->rmdir_sem);
  6566. return 0;
  6567. }
  6568. if (!d_in_lookup(alias)) {
  6569. @@ -142,7 +165,7 @@
  6570. ret = 0;
  6571. spin_unlock(&alias->d_lock);
  6572. dput(alias);
  6573. - up_read_non_owner(&NFS_I(dir)->rmdir_sem);
  6574. + nfs_up_anon(&NFS_I(dir)->rmdir_sem);
  6575. /*
  6576. * If we'd displaced old cached devname, free it. At that
  6577. * point dentry is definitely not a root, so we won't need
  6578. @@ -182,7 +205,7 @@
  6579. goto out_free_name;
  6580. }
  6581. data->res.dir_attr = &data->dir_attr;
  6582. - init_waitqueue_head(&data->wq);
  6583. + init_swait_queue_head(&data->wq);
  6584. status = -EBUSY;
  6585. spin_lock(&dentry->d_lock);
  6586. diff -Nur linux-4.8.15.orig/fs/ntfs/aops.c linux-4.8.15/fs/ntfs/aops.c
  6587. --- linux-4.8.15.orig/fs/ntfs/aops.c 2016-12-15 17:50:48.000000000 +0100
  6588. +++ linux-4.8.15/fs/ntfs/aops.c 2017-01-01 17:07:15.547395781 +0100
  6589. @@ -92,13 +92,13 @@
  6590. ofs = 0;
  6591. if (file_ofs < init_size)
  6592. ofs = init_size - file_ofs;
  6593. - local_irq_save(flags);
  6594. + local_irq_save_nort(flags);
  6595. kaddr = kmap_atomic(page);
  6596. memset(kaddr + bh_offset(bh) + ofs, 0,
  6597. bh->b_size - ofs);
  6598. flush_dcache_page(page);
  6599. kunmap_atomic(kaddr);
  6600. - local_irq_restore(flags);
  6601. + local_irq_restore_nort(flags);
  6602. }
  6603. } else {
  6604. clear_buffer_uptodate(bh);
  6605. @@ -107,8 +107,7 @@
  6606. "0x%llx.", (unsigned long long)bh->b_blocknr);
  6607. }
  6608. first = page_buffers(page);
  6609. - local_irq_save(flags);
  6610. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  6611. + flags = bh_uptodate_lock_irqsave(first);
  6612. clear_buffer_async_read(bh);
  6613. unlock_buffer(bh);
  6614. tmp = bh;
  6615. @@ -123,8 +122,7 @@
  6616. }
  6617. tmp = tmp->b_this_page;
  6618. } while (tmp != bh);
  6619. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6620. - local_irq_restore(flags);
  6621. + bh_uptodate_unlock_irqrestore(first, flags);
  6622. /*
  6623. * If none of the buffers had errors then we can set the page uptodate,
  6624. * but we first have to perform the post read mst fixups, if the
  6625. @@ -145,13 +143,13 @@
  6626. recs = PAGE_SIZE / rec_size;
  6627. /* Should have been verified before we got here... */
  6628. BUG_ON(!recs);
  6629. - local_irq_save(flags);
  6630. + local_irq_save_nort(flags);
  6631. kaddr = kmap_atomic(page);
  6632. for (i = 0; i < recs; i++)
  6633. post_read_mst_fixup((NTFS_RECORD*)(kaddr +
  6634. i * rec_size), rec_size);
  6635. kunmap_atomic(kaddr);
  6636. - local_irq_restore(flags);
  6637. + local_irq_restore_nort(flags);
  6638. flush_dcache_page(page);
  6639. if (likely(page_uptodate && !PageError(page)))
  6640. SetPageUptodate(page);
  6641. @@ -159,9 +157,7 @@
  6642. unlock_page(page);
  6643. return;
  6644. still_busy:
  6645. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  6646. - local_irq_restore(flags);
  6647. - return;
  6648. + bh_uptodate_unlock_irqrestore(first, flags);
  6649. }
  6650. /**
  6651. diff -Nur linux-4.8.15.orig/fs/proc/base.c linux-4.8.15/fs/proc/base.c
  6652. --- linux-4.8.15.orig/fs/proc/base.c 2016-12-15 17:50:48.000000000 +0100
  6653. +++ linux-4.8.15/fs/proc/base.c 2017-01-01 17:07:15.571397336 +0100
  6654. @@ -1819,7 +1819,7 @@
  6655. child = d_hash_and_lookup(dir, &qname);
  6656. if (!child) {
  6657. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  6658. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  6659. child = d_alloc_parallel(dir, &qname, &wq);
  6660. if (IS_ERR(child))
  6661. goto end_instantiate;
  6662. diff -Nur linux-4.8.15.orig/fs/proc/proc_sysctl.c linux-4.8.15/fs/proc/proc_sysctl.c
  6663. --- linux-4.8.15.orig/fs/proc/proc_sysctl.c 2016-12-15 17:50:48.000000000 +0100
  6664. +++ linux-4.8.15/fs/proc/proc_sysctl.c 2017-01-01 17:07:15.575397587 +0100
  6665. @@ -627,7 +627,7 @@
  6666. child = d_lookup(dir, &qname);
  6667. if (!child) {
  6668. - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
  6669. + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
  6670. child = d_alloc_parallel(dir, &qname, &wq);
  6671. if (IS_ERR(child))
  6672. return false;
  6673. diff -Nur linux-4.8.15.orig/fs/timerfd.c linux-4.8.15/fs/timerfd.c
  6674. --- linux-4.8.15.orig/fs/timerfd.c 2016-12-15 17:50:48.000000000 +0100
  6675. +++ linux-4.8.15/fs/timerfd.c 2017-01-01 17:07:15.587398365 +0100
  6676. @@ -460,7 +460,10 @@
  6677. break;
  6678. }
  6679. spin_unlock_irq(&ctx->wqh.lock);
  6680. - cpu_relax();
  6681. + if (isalarm(ctx))
  6682. + hrtimer_wait_for_timer(&ctx->t.alarm.timer);
  6683. + else
  6684. + hrtimer_wait_for_timer(&ctx->t.tmr);
  6685. }
  6686. /*
  6687. diff -Nur linux-4.8.15.orig/include/acpi/platform/aclinux.h linux-4.8.15/include/acpi/platform/aclinux.h
  6688. --- linux-4.8.15.orig/include/acpi/platform/aclinux.h 2016-12-15 17:50:48.000000000 +0100
  6689. +++ linux-4.8.15/include/acpi/platform/aclinux.h 2017-01-01 17:07:15.591398616 +0100
  6690. @@ -131,6 +131,7 @@
  6691. #define acpi_cache_t struct kmem_cache
  6692. #define acpi_spinlock spinlock_t *
  6693. +#define acpi_raw_spinlock raw_spinlock_t *
  6694. #define acpi_cpu_flags unsigned long
  6695. /* Use native linux version of acpi_os_allocate_zeroed */
  6696. @@ -149,6 +150,20 @@
  6697. #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id
  6698. #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock
  6699. +#define acpi_os_create_raw_lock(__handle) \
  6700. +({ \
  6701. + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \
  6702. + \
  6703. + if (lock) { \
  6704. + *(__handle) = lock; \
  6705. + raw_spin_lock_init(*(__handle)); \
  6706. + } \
  6707. + lock ? AE_OK : AE_NO_MEMORY; \
  6708. + })
  6709. +
  6710. +#define acpi_os_delete_raw_lock(__handle) kfree(__handle)
  6711. +
  6712. +
  6713. /*
  6714. * OSL interfaces used by debugger/disassembler
  6715. */
  6716. diff -Nur linux-4.8.15.orig/include/asm-generic/bug.h linux-4.8.15/include/asm-generic/bug.h
  6717. --- linux-4.8.15.orig/include/asm-generic/bug.h 2016-12-15 17:50:48.000000000 +0100
  6718. +++ linux-4.8.15/include/asm-generic/bug.h 2017-01-01 17:07:15.591398616 +0100
  6719. @@ -215,6 +215,20 @@
  6720. # define WARN_ON_SMP(x) ({0;})
  6721. #endif
  6722. +#ifdef CONFIG_PREEMPT_RT_BASE
  6723. +# define BUG_ON_RT(c) BUG_ON(c)
  6724. +# define BUG_ON_NONRT(c) do { } while (0)
  6725. +# define WARN_ON_RT(condition) WARN_ON(condition)
  6726. +# define WARN_ON_NONRT(condition) do { } while (0)
  6727. +# define WARN_ON_ONCE_NONRT(condition) do { } while (0)
  6728. +#else
  6729. +# define BUG_ON_RT(c) do { } while (0)
  6730. +# define BUG_ON_NONRT(c) BUG_ON(c)
  6731. +# define WARN_ON_RT(condition) do { } while (0)
  6732. +# define WARN_ON_NONRT(condition) WARN_ON(condition)
  6733. +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition)
  6734. +#endif
  6735. +
  6736. #endif /* __ASSEMBLY__ */
  6737. #endif
  6738. diff -Nur linux-4.8.15.orig/include/linux/blkdev.h linux-4.8.15/include/linux/blkdev.h
  6739. --- linux-4.8.15.orig/include/linux/blkdev.h 2016-12-15 17:50:48.000000000 +0100
  6740. +++ linux-4.8.15/include/linux/blkdev.h 2017-01-01 17:07:15.623400677 +0100
  6741. @@ -89,6 +89,7 @@
  6742. struct list_head queuelist;
  6743. union {
  6744. struct call_single_data csd;
  6745. + struct work_struct work;
  6746. u64 fifo_time;
  6747. };
  6748. @@ -467,7 +468,7 @@
  6749. struct throtl_data *td;
  6750. #endif
  6751. struct rcu_head rcu_head;
  6752. - wait_queue_head_t mq_freeze_wq;
  6753. + struct swait_queue_head mq_freeze_wq;
  6754. struct percpu_ref q_usage_counter;
  6755. struct list_head all_q_node;
  6756. diff -Nur linux-4.8.15.orig/include/linux/blk-mq.h linux-4.8.15/include/linux/blk-mq.h
  6757. --- linux-4.8.15.orig/include/linux/blk-mq.h 2016-12-15 17:50:48.000000000 +0100
  6758. +++ linux-4.8.15/include/linux/blk-mq.h 2017-01-01 17:07:15.599399127 +0100
  6759. @@ -222,6 +222,7 @@
  6760. struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
  6761. struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
  6762. +void __blk_mq_complete_request_remote_work(struct work_struct *work);
  6763. int blk_mq_request_started(struct request *rq);
  6764. void blk_mq_start_request(struct request *rq);
  6765. diff -Nur linux-4.8.15.orig/include/linux/bottom_half.h linux-4.8.15/include/linux/bottom_half.h
  6766. --- linux-4.8.15.orig/include/linux/bottom_half.h 2016-12-15 17:50:48.000000000 +0100
  6767. +++ linux-4.8.15/include/linux/bottom_half.h 2017-01-01 17:07:15.623400677 +0100
  6768. @@ -3,6 +3,39 @@
  6769. #include <linux/preempt.h>
  6770. +#ifdef CONFIG_PREEMPT_RT_FULL
  6771. +
  6772. +extern void __local_bh_disable(void);
  6773. +extern void _local_bh_enable(void);
  6774. +extern void __local_bh_enable(void);
  6775. +
  6776. +static inline void local_bh_disable(void)
  6777. +{
  6778. + __local_bh_disable();
  6779. +}
  6780. +
  6781. +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
  6782. +{
  6783. + __local_bh_disable();
  6784. +}
  6785. +
  6786. +static inline void local_bh_enable(void)
  6787. +{
  6788. + __local_bh_enable();
  6789. +}
  6790. +
  6791. +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
  6792. +{
  6793. + __local_bh_enable();
  6794. +}
  6795. +
  6796. +static inline void local_bh_enable_ip(unsigned long ip)
  6797. +{
  6798. + __local_bh_enable();
  6799. +}
  6800. +
  6801. +#else
  6802. +
  6803. #ifdef CONFIG_TRACE_IRQFLAGS
  6804. extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
  6805. #else
  6806. @@ -30,5 +63,6 @@
  6807. {
  6808. __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
  6809. }
  6810. +#endif
  6811. #endif /* _LINUX_BH_H */
  6812. diff -Nur linux-4.8.15.orig/include/linux/buffer_head.h linux-4.8.15/include/linux/buffer_head.h
  6813. --- linux-4.8.15.orig/include/linux/buffer_head.h 2016-12-15 17:50:48.000000000 +0100
  6814. +++ linux-4.8.15/include/linux/buffer_head.h 2017-01-01 17:07:15.623400677 +0100
  6815. @@ -75,8 +75,50 @@
  6816. struct address_space *b_assoc_map; /* mapping this buffer is
  6817. associated with */
  6818. atomic_t b_count; /* users using this buffer_head */
  6819. +#ifdef CONFIG_PREEMPT_RT_BASE
  6820. + spinlock_t b_uptodate_lock;
  6821. +#if IS_ENABLED(CONFIG_JBD2)
  6822. + spinlock_t b_state_lock;
  6823. + spinlock_t b_journal_head_lock;
  6824. +#endif
  6825. +#endif
  6826. };
  6827. +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
  6828. +{
  6829. + unsigned long flags;
  6830. +
  6831. +#ifndef CONFIG_PREEMPT_RT_BASE
  6832. + local_irq_save(flags);
  6833. + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
  6834. +#else
  6835. + spin_lock_irqsave(&bh->b_uptodate_lock, flags);
  6836. +#endif
  6837. + return flags;
  6838. +}
  6839. +
  6840. +static inline void
  6841. +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
  6842. +{
  6843. +#ifndef CONFIG_PREEMPT_RT_BASE
  6844. + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
  6845. + local_irq_restore(flags);
  6846. +#else
  6847. + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
  6848. +#endif
  6849. +}
  6850. +
  6851. +static inline void buffer_head_init_locks(struct buffer_head *bh)
  6852. +{
  6853. +#ifdef CONFIG_PREEMPT_RT_BASE
  6854. + spin_lock_init(&bh->b_uptodate_lock);
  6855. +#if IS_ENABLED(CONFIG_JBD2)
  6856. + spin_lock_init(&bh->b_state_lock);
  6857. + spin_lock_init(&bh->b_journal_head_lock);
  6858. +#endif
  6859. +#endif
  6860. +}
  6861. +
  6862. /*
  6863. * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
  6864. * and buffer_foo() functions.
  6865. diff -Nur linux-4.8.15.orig/include/linux/cgroup-defs.h linux-4.8.15/include/linux/cgroup-defs.h
  6866. --- linux-4.8.15.orig/include/linux/cgroup-defs.h 2016-12-15 17:50:48.000000000 +0100
  6867. +++ linux-4.8.15/include/linux/cgroup-defs.h 2017-01-01 17:07:15.631401192 +0100
  6868. @@ -16,6 +16,7 @@
  6869. #include <linux/percpu-refcount.h>
  6870. #include <linux/percpu-rwsem.h>
  6871. #include <linux/workqueue.h>
  6872. +#include <linux/swork.h>
  6873. #ifdef CONFIG_CGROUPS
  6874. @@ -137,6 +138,7 @@
  6875. /* percpu_ref killing and RCU release */
  6876. struct rcu_head rcu_head;
  6877. struct work_struct destroy_work;
  6878. + struct swork_event destroy_swork;
  6879. };
  6880. /*
  6881. diff -Nur linux-4.8.15.orig/include/linux/completion.h linux-4.8.15/include/linux/completion.h
  6882. --- linux-4.8.15.orig/include/linux/completion.h 2016-12-15 17:50:48.000000000 +0100
  6883. +++ linux-4.8.15/include/linux/completion.h 2017-01-01 17:07:15.659402994 +0100
  6884. @@ -7,8 +7,7 @@
  6885. * Atomic wait-for-completion handler data structures.
  6886. * See kernel/sched/completion.c for details.
  6887. */
  6888. -
  6889. -#include <linux/wait.h>
  6890. +#include <linux/swait.h>
  6891. /*
  6892. * struct completion - structure used to maintain state for a "completion"
  6893. @@ -24,11 +23,11 @@
  6894. */
  6895. struct completion {
  6896. unsigned int done;
  6897. - wait_queue_head_t wait;
  6898. + struct swait_queue_head wait;
  6899. };
  6900. #define COMPLETION_INITIALIZER(work) \
  6901. - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
  6902. + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
  6903. #define COMPLETION_INITIALIZER_ONSTACK(work) \
  6904. ({ init_completion(&work); work; })
  6905. @@ -73,7 +72,7 @@
  6906. static inline void init_completion(struct completion *x)
  6907. {
  6908. x->done = 0;
  6909. - init_waitqueue_head(&x->wait);
  6910. + init_swait_queue_head(&x->wait);
  6911. }
  6912. /**
  6913. diff -Nur linux-4.8.15.orig/include/linux/cpu.h linux-4.8.15/include/linux/cpu.h
  6914. --- linux-4.8.15.orig/include/linux/cpu.h 2016-12-15 17:50:48.000000000 +0100
  6915. +++ linux-4.8.15/include/linux/cpu.h 2017-01-01 17:07:15.703405827 +0100
  6916. @@ -194,6 +194,8 @@
  6917. extern void put_online_cpus(void);
  6918. extern void cpu_hotplug_disable(void);
  6919. extern void cpu_hotplug_enable(void);
  6920. +extern void pin_current_cpu(void);
  6921. +extern void unpin_current_cpu(void);
  6922. #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
  6923. #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri)
  6924. #define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
  6925. @@ -211,6 +213,8 @@
  6926. #define put_online_cpus() do { } while (0)
  6927. #define cpu_hotplug_disable() do { } while (0)
  6928. #define cpu_hotplug_enable() do { } while (0)
  6929. +static inline void pin_current_cpu(void) { }
  6930. +static inline void unpin_current_cpu(void) { }
  6931. #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
  6932. #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
  6933. /* These aren't inline functions due to a GCC bug. */
  6934. diff -Nur linux-4.8.15.orig/include/linux/dcache.h linux-4.8.15/include/linux/dcache.h
  6935. --- linux-4.8.15.orig/include/linux/dcache.h 2016-12-15 17:50:48.000000000 +0100
  6936. +++ linux-4.8.15/include/linux/dcache.h 2017-01-01 17:07:15.719406860 +0100
  6937. @@ -11,6 +11,7 @@
  6938. #include <linux/rcupdate.h>
  6939. #include <linux/lockref.h>
  6940. #include <linux/stringhash.h>
  6941. +#include <linux/wait.h>
  6942. struct path;
  6943. struct vfsmount;
  6944. @@ -100,7 +101,7 @@
  6945. union {
  6946. struct list_head d_lru; /* LRU list */
  6947. - wait_queue_head_t *d_wait; /* in-lookup ones only */
  6948. + struct swait_queue_head *d_wait; /* in-lookup ones only */
  6949. };
  6950. struct list_head d_child; /* child of parent list */
  6951. struct list_head d_subdirs; /* our children */
  6952. @@ -230,7 +231,7 @@
  6953. extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
  6954. extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
  6955. extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
  6956. - wait_queue_head_t *);
  6957. + struct swait_queue_head *);
  6958. extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
  6959. extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
  6960. extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
  6961. diff -Nur linux-4.8.15.orig/include/linux/delay.h linux-4.8.15/include/linux/delay.h
  6962. --- linux-4.8.15.orig/include/linux/delay.h 2016-12-15 17:50:48.000000000 +0100
  6963. +++ linux-4.8.15/include/linux/delay.h 2017-01-01 17:07:15.719406860 +0100
  6964. @@ -52,4 +52,10 @@
  6965. msleep(seconds * 1000);
  6966. }
  6967. +#ifdef CONFIG_PREEMPT_RT_FULL
  6968. +extern void cpu_chill(void);
  6969. +#else
  6970. +# define cpu_chill() cpu_relax()
  6971. +#endif
  6972. +
  6973. #endif /* defined(_LINUX_DELAY_H) */
  6974. diff -Nur linux-4.8.15.orig/include/linux/ftrace.h linux-4.8.15/include/linux/ftrace.h
  6975. --- linux-4.8.15.orig/include/linux/ftrace.h 2016-12-15 17:50:48.000000000 +0100
  6976. +++ linux-4.8.15/include/linux/ftrace.h 2017-01-01 17:07:15.743408406 +0100
  6977. @@ -714,6 +714,7 @@
  6978. #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
  6979. #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
  6980. +#ifdef CONFIG_USING_GET_LOCK_PARENT_IP
  6981. static inline unsigned long get_lock_parent_ip(void)
  6982. {
  6983. unsigned long addr = CALLER_ADDR0;
  6984. @@ -725,6 +726,7 @@
  6985. return addr;
  6986. return CALLER_ADDR2;
  6987. }
  6988. +#endif
  6989. #ifdef CONFIG_IRQSOFF_TRACER
  6990. extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
  6991. diff -Nur linux-4.8.15.orig/include/linux/highmem.h linux-4.8.15/include/linux/highmem.h
  6992. --- linux-4.8.15.orig/include/linux/highmem.h 2016-12-15 17:50:48.000000000 +0100
  6993. +++ linux-4.8.15/include/linux/highmem.h 2017-01-01 17:07:15.743408406 +0100
  6994. @@ -7,6 +7,7 @@
  6995. #include <linux/mm.h>
  6996. #include <linux/uaccess.h>
  6997. #include <linux/hardirq.h>
  6998. +#include <linux/sched.h>
  6999. #include <asm/cacheflush.h>
  7000. @@ -65,7 +66,7 @@
  7001. static inline void *kmap_atomic(struct page *page)
  7002. {
  7003. - preempt_disable();
  7004. + preempt_disable_nort();
  7005. pagefault_disable();
  7006. return page_address(page);
  7007. }
  7008. @@ -74,7 +75,7 @@
  7009. static inline void __kunmap_atomic(void *addr)
  7010. {
  7011. pagefault_enable();
  7012. - preempt_enable();
  7013. + preempt_enable_nort();
  7014. }
  7015. #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn))
  7016. @@ -86,32 +87,51 @@
  7017. #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
  7018. +#ifndef CONFIG_PREEMPT_RT_FULL
  7019. DECLARE_PER_CPU(int, __kmap_atomic_idx);
  7020. +#endif
  7021. static inline int kmap_atomic_idx_push(void)
  7022. {
  7023. +#ifndef CONFIG_PREEMPT_RT_FULL
  7024. int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
  7025. -#ifdef CONFIG_DEBUG_HIGHMEM
  7026. +# ifdef CONFIG_DEBUG_HIGHMEM
  7027. WARN_ON_ONCE(in_irq() && !irqs_disabled());
  7028. BUG_ON(idx >= KM_TYPE_NR);
  7029. -#endif
  7030. +# endif
  7031. return idx;
  7032. +#else
  7033. + current->kmap_idx++;
  7034. + BUG_ON(current->kmap_idx > KM_TYPE_NR);
  7035. + return current->kmap_idx - 1;
  7036. +#endif
  7037. }
  7038. static inline int kmap_atomic_idx(void)
  7039. {
  7040. +#ifndef CONFIG_PREEMPT_RT_FULL
  7041. return __this_cpu_read(__kmap_atomic_idx) - 1;
  7042. +#else
  7043. + return current->kmap_idx - 1;
  7044. +#endif
  7045. }
  7046. static inline void kmap_atomic_idx_pop(void)
  7047. {
  7048. -#ifdef CONFIG_DEBUG_HIGHMEM
  7049. +#ifndef CONFIG_PREEMPT_RT_FULL
  7050. +# ifdef CONFIG_DEBUG_HIGHMEM
  7051. int idx = __this_cpu_dec_return(__kmap_atomic_idx);
  7052. BUG_ON(idx < 0);
  7053. -#else
  7054. +# else
  7055. __this_cpu_dec(__kmap_atomic_idx);
  7056. +# endif
  7057. +#else
  7058. + current->kmap_idx--;
  7059. +# ifdef CONFIG_DEBUG_HIGHMEM
  7060. + BUG_ON(current->kmap_idx < 0);
  7061. +# endif
  7062. #endif
  7063. }
  7064. diff -Nur linux-4.8.15.orig/include/linux/hrtimer.h linux-4.8.15/include/linux/hrtimer.h
  7065. --- linux-4.8.15.orig/include/linux/hrtimer.h 2016-12-15 17:50:48.000000000 +0100
  7066. +++ linux-4.8.15/include/linux/hrtimer.h 2017-01-01 17:07:15.751408919 +0100
  7067. @@ -87,6 +87,9 @@
  7068. * @function: timer expiry callback function
  7069. * @base: pointer to the timer base (per cpu and per clock)
  7070. * @state: state information (See bit values above)
  7071. + * @cb_entry: list entry to defer timers from hardirq context
  7072. + * @irqsafe: timer can run in hardirq context
  7073. + * @praecox: timer expiry time if expired at the time of programming
  7074. * @is_rel: Set if the timer was armed relative
  7075. * @start_pid: timer statistics field to store the pid of the task which
  7076. * started the timer
  7077. @@ -103,6 +106,11 @@
  7078. enum hrtimer_restart (*function)(struct hrtimer *);
  7079. struct hrtimer_clock_base *base;
  7080. u8 state;
  7081. + struct list_head cb_entry;
  7082. + int irqsafe;
  7083. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  7084. + ktime_t praecox;
  7085. +#endif
  7086. u8 is_rel;
  7087. #ifdef CONFIG_TIMER_STATS
  7088. int start_pid;
  7089. @@ -123,11 +131,7 @@
  7090. struct task_struct *task;
  7091. };
  7092. -#ifdef CONFIG_64BIT
  7093. # define HRTIMER_CLOCK_BASE_ALIGN 64
  7094. -#else
  7095. -# define HRTIMER_CLOCK_BASE_ALIGN 32
  7096. -#endif
  7097. /**
  7098. * struct hrtimer_clock_base - the timer base for a specific clock
  7099. @@ -136,6 +140,7 @@
  7100. * timer to a base on another cpu.
  7101. * @clockid: clock id for per_cpu support
  7102. * @active: red black tree root node for the active timers
  7103. + * @expired: list head for deferred timers.
  7104. * @get_time: function to retrieve the current time of the clock
  7105. * @offset: offset of this clock to the monotonic base
  7106. */
  7107. @@ -144,6 +149,7 @@
  7108. int index;
  7109. clockid_t clockid;
  7110. struct timerqueue_head active;
  7111. + struct list_head expired;
  7112. ktime_t (*get_time)(void);
  7113. ktime_t offset;
  7114. } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));
  7115. @@ -187,6 +193,7 @@
  7116. raw_spinlock_t lock;
  7117. seqcount_t seq;
  7118. struct hrtimer *running;
  7119. + struct hrtimer *running_soft;
  7120. unsigned int cpu;
  7121. unsigned int active_bases;
  7122. unsigned int clock_was_set_seq;
  7123. @@ -203,6 +210,9 @@
  7124. unsigned int nr_hangs;
  7125. unsigned int max_hang_time;
  7126. #endif
  7127. +#ifdef CONFIG_PREEMPT_RT_BASE
  7128. + wait_queue_head_t wait;
  7129. +#endif
  7130. struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
  7131. } ____cacheline_aligned;
  7132. @@ -412,6 +422,13 @@
  7133. hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
  7134. }
  7135. +/* Softirq preemption could deadlock timer removal */
  7136. +#ifdef CONFIG_PREEMPT_RT_BASE
  7137. + extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
  7138. +#else
  7139. +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
  7140. +#endif
  7141. +
  7142. /* Query timers: */
  7143. extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
  7144. @@ -436,7 +453,7 @@
  7145. * Helper function to check, whether the timer is running the callback
  7146. * function
  7147. */
  7148. -static inline int hrtimer_callback_running(struct hrtimer *timer)
  7149. +static inline int hrtimer_callback_running(const struct hrtimer *timer)
  7150. {
  7151. return timer->base->cpu_base->running == timer;
  7152. }
  7153. diff -Nur linux-4.8.15.orig/include/linux/idr.h linux-4.8.15/include/linux/idr.h
  7154. --- linux-4.8.15.orig/include/linux/idr.h 2016-12-15 17:50:48.000000000 +0100
  7155. +++ linux-4.8.15/include/linux/idr.h 2017-01-01 17:07:15.751408919 +0100
  7156. @@ -95,10 +95,14 @@
  7157. * Each idr_preload() should be matched with an invocation of this
  7158. * function. See idr_preload() for details.
  7159. */
  7160. +#ifdef CONFIG_PREEMPT_RT_FULL
  7161. +void idr_preload_end(void);
  7162. +#else
  7163. static inline void idr_preload_end(void)
  7164. {
  7165. preempt_enable();
  7166. }
  7167. +#endif
  7168. /**
  7169. * idr_find - return pointer for given id
  7170. diff -Nur linux-4.8.15.orig/include/linux/init_task.h linux-4.8.15/include/linux/init_task.h
  7171. --- linux-4.8.15.orig/include/linux/init_task.h 2016-12-15 17:50:48.000000000 +0100
  7172. +++ linux-4.8.15/include/linux/init_task.h 2017-01-01 17:07:15.751408919 +0100
  7173. @@ -148,6 +148,12 @@
  7174. # define INIT_PERF_EVENTS(tsk)
  7175. #endif
  7176. +#ifdef CONFIG_PREEMPT_RT_BASE
  7177. +# define INIT_TIMER_LIST .posix_timer_list = NULL,
  7178. +#else
  7179. +# define INIT_TIMER_LIST
  7180. +#endif
  7181. +
  7182. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  7183. # define INIT_VTIME(tsk) \
  7184. .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \
  7185. @@ -239,6 +245,7 @@
  7186. .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
  7187. .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
  7188. .timer_slack_ns = 50000, /* 50 usec default slack */ \
  7189. + INIT_TIMER_LIST \
  7190. .pids = { \
  7191. [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
  7192. [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
  7193. diff -Nur linux-4.8.15.orig/include/linux/interrupt.h linux-4.8.15/include/linux/interrupt.h
  7194. --- linux-4.8.15.orig/include/linux/interrupt.h 2016-12-15 17:50:48.000000000 +0100
  7195. +++ linux-4.8.15/include/linux/interrupt.h 2017-01-01 17:07:15.759409443 +0100
  7196. @@ -14,6 +14,7 @@
  7197. #include <linux/hrtimer.h>
  7198. #include <linux/kref.h>
  7199. #include <linux/workqueue.h>
  7200. +#include <linux/swork.h>
  7201. #include <linux/atomic.h>
  7202. #include <asm/ptrace.h>
  7203. @@ -61,6 +62,7 @@
  7204. * interrupt handler after suspending interrupts. For system
  7205. * wakeup devices users need to implement wakeup detection in
  7206. * their interrupt handlers.
  7207. + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT)
  7208. */
  7209. #define IRQF_SHARED 0x00000080
  7210. #define IRQF_PROBE_SHARED 0x00000100
  7211. @@ -74,6 +76,7 @@
  7212. #define IRQF_NO_THREAD 0x00010000
  7213. #define IRQF_EARLY_RESUME 0x00020000
  7214. #define IRQF_COND_SUSPEND 0x00040000
  7215. +#define IRQF_NO_SOFTIRQ_CALL 0x00080000
  7216. #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
  7217. @@ -196,7 +199,7 @@
  7218. #ifdef CONFIG_LOCKDEP
  7219. # define local_irq_enable_in_hardirq() do { } while (0)
  7220. #else
  7221. -# define local_irq_enable_in_hardirq() local_irq_enable()
  7222. +# define local_irq_enable_in_hardirq() local_irq_enable_nort()
  7223. #endif
  7224. extern void disable_irq_nosync(unsigned int irq);
  7225. @@ -216,6 +219,7 @@
  7226. * struct irq_affinity_notify - context for notification of IRQ affinity changes
  7227. * @irq: Interrupt to which notification applies
  7228. * @kref: Reference count, for internal use
  7229. + * @swork: Swork item, for internal use
  7230. * @work: Work item, for internal use
  7231. * @notify: Function to be called on change. This will be
  7232. * called in process context.
  7233. @@ -227,7 +231,11 @@
  7234. struct irq_affinity_notify {
  7235. unsigned int irq;
  7236. struct kref kref;
  7237. +#ifdef CONFIG_PREEMPT_RT_BASE
  7238. + struct swork_event swork;
  7239. +#else
  7240. struct work_struct work;
  7241. +#endif
  7242. void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
  7243. void (*release)(struct kref *ref);
  7244. };
  7245. @@ -398,9 +406,13 @@
  7246. bool state);
  7247. #ifdef CONFIG_IRQ_FORCED_THREADING
  7248. +# ifndef CONFIG_PREEMPT_RT_BASE
  7249. extern bool force_irqthreads;
  7250. +# else
  7251. +# define force_irqthreads (true)
  7252. +# endif
  7253. #else
  7254. -#define force_irqthreads (0)
  7255. +#define force_irqthreads (false)
  7256. #endif
  7257. #ifndef __ARCH_SET_SOFTIRQ_PENDING
  7258. @@ -457,9 +469,10 @@
  7259. void (*action)(struct softirq_action *);
  7260. };
  7261. +#ifndef CONFIG_PREEMPT_RT_FULL
  7262. asmlinkage void do_softirq(void);
  7263. asmlinkage void __do_softirq(void);
  7264. -
  7265. +static inline void thread_do_softirq(void) { do_softirq(); }
  7266. #ifdef __ARCH_HAS_DO_SOFTIRQ
  7267. void do_softirq_own_stack(void);
  7268. #else
  7269. @@ -468,13 +481,25 @@
  7270. __do_softirq();
  7271. }
  7272. #endif
  7273. +#else
  7274. +extern void thread_do_softirq(void);
  7275. +#endif
  7276. extern void open_softirq(int nr, void (*action)(struct softirq_action *));
  7277. extern void softirq_init(void);
  7278. extern void __raise_softirq_irqoff(unsigned int nr);
  7279. +#ifdef CONFIG_PREEMPT_RT_FULL
  7280. +extern void __raise_softirq_irqoff_ksoft(unsigned int nr);
  7281. +#else
  7282. +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr)
  7283. +{
  7284. + __raise_softirq_irqoff(nr);
  7285. +}
  7286. +#endif
  7287. extern void raise_softirq_irqoff(unsigned int nr);
  7288. extern void raise_softirq(unsigned int nr);
  7289. +extern void softirq_check_pending_idle(void);
  7290. DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
  7291. @@ -496,8 +521,9 @@
  7292. to be executed on some cpu at least once after this.
  7293. * If the tasklet is already scheduled, but its execution is still not
  7294. started, it will be executed only once.
  7295. - * If this tasklet is already running on another CPU (or schedule is called
  7296. - from tasklet itself), it is rescheduled for later.
  7297. + * If this tasklet is already running on another CPU, it is rescheduled
  7298. + for later.
  7299. + * Schedule must not be called from the tasklet itself (a lockup occurs)
  7300. * Tasklet is strictly serialized wrt itself, but not
  7301. wrt another tasklets. If client needs some intertask synchronization,
  7302. he makes it with spinlocks.
  7303. @@ -522,27 +548,36 @@
  7304. enum
  7305. {
  7306. TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */
  7307. - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */
  7308. + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */
  7309. + TASKLET_STATE_PENDING /* Tasklet is pending */
  7310. };
  7311. -#ifdef CONFIG_SMP
  7312. +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED)
  7313. +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN)
  7314. +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING)
  7315. +
  7316. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  7317. static inline int tasklet_trylock(struct tasklet_struct *t)
  7318. {
  7319. return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
  7320. }
  7321. +static inline int tasklet_tryunlock(struct tasklet_struct *t)
  7322. +{
  7323. + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN;
  7324. +}
  7325. +
  7326. static inline void tasklet_unlock(struct tasklet_struct *t)
  7327. {
  7328. smp_mb__before_atomic();
  7329. clear_bit(TASKLET_STATE_RUN, &(t)->state);
  7330. }
  7331. -static inline void tasklet_unlock_wait(struct tasklet_struct *t)
  7332. -{
  7333. - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
  7334. -}
  7335. +extern void tasklet_unlock_wait(struct tasklet_struct *t);
  7336. +
  7337. #else
  7338. #define tasklet_trylock(t) 1
  7339. +#define tasklet_tryunlock(t) 1
  7340. #define tasklet_unlock_wait(t) do { } while (0)
  7341. #define tasklet_unlock(t) do { } while (0)
  7342. #endif
  7343. @@ -591,12 +626,7 @@
  7344. smp_mb();
  7345. }
  7346. -static inline void tasklet_enable(struct tasklet_struct *t)
  7347. -{
  7348. - smp_mb__before_atomic();
  7349. - atomic_dec(&t->count);
  7350. -}
  7351. -
  7352. +extern void tasklet_enable(struct tasklet_struct *t);
  7353. extern void tasklet_kill(struct tasklet_struct *t);
  7354. extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
  7355. extern void tasklet_init(struct tasklet_struct *t,
  7356. @@ -627,6 +657,12 @@
  7357. tasklet_kill(&ttimer->tasklet);
  7358. }
  7359. +#ifdef CONFIG_PREEMPT_RT_FULL
  7360. +extern void softirq_early_init(void);
  7361. +#else
  7362. +static inline void softirq_early_init(void) { }
  7363. +#endif
  7364. +
  7365. /*
  7366. * Autoprobing for irqs:
  7367. *
  7368. diff -Nur linux-4.8.15.orig/include/linux/irqdesc.h linux-4.8.15/include/linux/irqdesc.h
  7369. --- linux-4.8.15.orig/include/linux/irqdesc.h 2016-12-15 17:50:48.000000000 +0100
  7370. +++ linux-4.8.15/include/linux/irqdesc.h 2017-01-01 17:07:15.771410216 +0100
  7371. @@ -64,6 +64,7 @@
  7372. unsigned int irqs_unhandled;
  7373. atomic_t threads_handled;
  7374. int threads_handled_last;
  7375. + u64 random_ip;
  7376. raw_spinlock_t lock;
  7377. struct cpumask *percpu_enabled;
  7378. const struct cpumask *percpu_affinity;
  7379. diff -Nur linux-4.8.15.orig/include/linux/irqflags.h linux-4.8.15/include/linux/irqflags.h
  7380. --- linux-4.8.15.orig/include/linux/irqflags.h 2016-12-15 17:50:48.000000000 +0100
  7381. +++ linux-4.8.15/include/linux/irqflags.h 2017-01-01 17:07:15.771410216 +0100
  7382. @@ -25,8 +25,6 @@
  7383. # define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
  7384. # define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
  7385. # define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
  7386. -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
  7387. -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
  7388. # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
  7389. #else
  7390. # define trace_hardirqs_on() do { } while (0)
  7391. @@ -39,9 +37,15 @@
  7392. # define trace_softirqs_enabled(p) 0
  7393. # define trace_hardirq_enter() do { } while (0)
  7394. # define trace_hardirq_exit() do { } while (0)
  7395. +# define INIT_TRACE_IRQFLAGS
  7396. +#endif
  7397. +
  7398. +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL)
  7399. +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
  7400. +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
  7401. +#else
  7402. # define lockdep_softirq_enter() do { } while (0)
  7403. # define lockdep_softirq_exit() do { } while (0)
  7404. -# define INIT_TRACE_IRQFLAGS
  7405. #endif
  7406. #if defined(CONFIG_IRQSOFF_TRACER) || \
  7407. @@ -148,4 +152,23 @@
  7408. #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
  7409. +/*
  7410. + * local_irq* variants depending on RT/!RT
  7411. + */
  7412. +#ifdef CONFIG_PREEMPT_RT_FULL
  7413. +# define local_irq_disable_nort() do { } while (0)
  7414. +# define local_irq_enable_nort() do { } while (0)
  7415. +# define local_irq_save_nort(flags) local_save_flags(flags)
  7416. +# define local_irq_restore_nort(flags) (void)(flags)
  7417. +# define local_irq_disable_rt() local_irq_disable()
  7418. +# define local_irq_enable_rt() local_irq_enable()
  7419. +#else
  7420. +# define local_irq_disable_nort() local_irq_disable()
  7421. +# define local_irq_enable_nort() local_irq_enable()
  7422. +# define local_irq_save_nort(flags) local_irq_save(flags)
  7423. +# define local_irq_restore_nort(flags) local_irq_restore(flags)
  7424. +# define local_irq_disable_rt() do { } while (0)
  7425. +# define local_irq_enable_rt() do { } while (0)
  7426. +#endif
  7427. +
  7428. #endif
  7429. diff -Nur linux-4.8.15.orig/include/linux/irq.h linux-4.8.15/include/linux/irq.h
  7430. --- linux-4.8.15.orig/include/linux/irq.h 2016-12-15 17:50:48.000000000 +0100
  7431. +++ linux-4.8.15/include/linux/irq.h 2017-01-01 17:07:15.767409952 +0100
  7432. @@ -72,6 +72,7 @@
  7433. * IRQ_IS_POLLED - Always polled by another interrupt. Exclude
  7434. * it from the spurious interrupt detection
  7435. * mechanism and from core side polling.
  7436. + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT)
  7437. * IRQ_DISABLE_UNLAZY - Disable lazy irq disable
  7438. */
  7439. enum {
  7440. @@ -99,13 +100,14 @@
  7441. IRQ_PER_CPU_DEVID = (1 << 17),
  7442. IRQ_IS_POLLED = (1 << 18),
  7443. IRQ_DISABLE_UNLAZY = (1 << 19),
  7444. + IRQ_NO_SOFTIRQ_CALL = (1 << 20),
  7445. };
  7446. #define IRQF_MODIFY_MASK \
  7447. (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
  7448. IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
  7449. IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
  7450. - IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY)
  7451. + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_NO_SOFTIRQ_CALL)
  7452. #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
  7453. diff -Nur linux-4.8.15.orig/include/linux/irq_work.h linux-4.8.15/include/linux/irq_work.h
  7454. --- linux-4.8.15.orig/include/linux/irq_work.h 2016-12-15 17:50:48.000000000 +0100
  7455. +++ linux-4.8.15/include/linux/irq_work.h 2017-01-01 17:07:15.767409952 +0100
  7456. @@ -16,6 +16,7 @@
  7457. #define IRQ_WORK_BUSY 2UL
  7458. #define IRQ_WORK_FLAGS 3UL
  7459. #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */
  7460. +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */
  7461. struct irq_work {
  7462. unsigned long flags;
  7463. @@ -51,4 +52,10 @@
  7464. static inline void irq_work_run(void) { }
  7465. #endif
  7466. +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
  7467. +void irq_work_tick_soft(void);
  7468. +#else
  7469. +static inline void irq_work_tick_soft(void) { }
  7470. +#endif
  7471. +
  7472. #endif /* _LINUX_IRQ_WORK_H */
  7473. diff -Nur linux-4.8.15.orig/include/linux/jbd2.h linux-4.8.15/include/linux/jbd2.h
  7474. --- linux-4.8.15.orig/include/linux/jbd2.h 2016-12-15 17:50:48.000000000 +0100
  7475. +++ linux-4.8.15/include/linux/jbd2.h 2017-01-01 17:07:15.775410464 +0100
  7476. @@ -347,32 +347,56 @@
  7477. static inline void jbd_lock_bh_state(struct buffer_head *bh)
  7478. {
  7479. +#ifndef CONFIG_PREEMPT_RT_BASE
  7480. bit_spin_lock(BH_State, &bh->b_state);
  7481. +#else
  7482. + spin_lock(&bh->b_state_lock);
  7483. +#endif
  7484. }
  7485. static inline int jbd_trylock_bh_state(struct buffer_head *bh)
  7486. {
  7487. +#ifndef CONFIG_PREEMPT_RT_BASE
  7488. return bit_spin_trylock(BH_State, &bh->b_state);
  7489. +#else
  7490. + return spin_trylock(&bh->b_state_lock);
  7491. +#endif
  7492. }
  7493. static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
  7494. {
  7495. +#ifndef CONFIG_PREEMPT_RT_BASE
  7496. return bit_spin_is_locked(BH_State, &bh->b_state);
  7497. +#else
  7498. + return spin_is_locked(&bh->b_state_lock);
  7499. +#endif
  7500. }
  7501. static inline void jbd_unlock_bh_state(struct buffer_head *bh)
  7502. {
  7503. +#ifndef CONFIG_PREEMPT_RT_BASE
  7504. bit_spin_unlock(BH_State, &bh->b_state);
  7505. +#else
  7506. + spin_unlock(&bh->b_state_lock);
  7507. +#endif
  7508. }
  7509. static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
  7510. {
  7511. +#ifndef CONFIG_PREEMPT_RT_BASE
  7512. bit_spin_lock(BH_JournalHead, &bh->b_state);
  7513. +#else
  7514. + spin_lock(&bh->b_journal_head_lock);
  7515. +#endif
  7516. }
  7517. static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
  7518. {
  7519. +#ifndef CONFIG_PREEMPT_RT_BASE
  7520. bit_spin_unlock(BH_JournalHead, &bh->b_state);
  7521. +#else
  7522. + spin_unlock(&bh->b_journal_head_lock);
  7523. +#endif
  7524. }
  7525. #define J_ASSERT(assert) BUG_ON(!(assert))
  7526. diff -Nur linux-4.8.15.orig/include/linux/kdb.h linux-4.8.15/include/linux/kdb.h
  7527. --- linux-4.8.15.orig/include/linux/kdb.h 2016-12-15 17:50:48.000000000 +0100
  7528. +++ linux-4.8.15/include/linux/kdb.h 2017-01-01 17:07:15.775410464 +0100
  7529. @@ -167,6 +167,7 @@
  7530. extern __printf(1, 2) int kdb_printf(const char *, ...);
  7531. typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
  7532. +#define in_kdb_printk() (kdb_trap_printk)
  7533. extern void kdb_init(int level);
  7534. /* Access to kdb specific polling devices */
  7535. @@ -201,6 +202,7 @@
  7536. extern int kdb_unregister(char *);
  7537. #else /* ! CONFIG_KGDB_KDB */
  7538. static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
  7539. +#define in_kdb_printk() (0)
  7540. static inline void kdb_init(int level) {}
  7541. static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
  7542. char *help, short minlen) { return 0; }
  7543. diff -Nur linux-4.8.15.orig/include/linux/kernel.h linux-4.8.15/include/linux/kernel.h
  7544. --- linux-4.8.15.orig/include/linux/kernel.h 2016-12-15 17:50:48.000000000 +0100
  7545. +++ linux-4.8.15/include/linux/kernel.h 2017-01-01 17:07:15.775410464 +0100
  7546. @@ -194,6 +194,9 @@
  7547. */
  7548. # define might_sleep() \
  7549. do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
  7550. +
  7551. +# define might_sleep_no_state_check() \
  7552. + do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
  7553. # define sched_annotate_sleep() (current->task_state_change = 0)
  7554. #else
  7555. static inline void ___might_sleep(const char *file, int line,
  7556. @@ -201,6 +204,7 @@
  7557. static inline void __might_sleep(const char *file, int line,
  7558. int preempt_offset) { }
  7559. # define might_sleep() do { might_resched(); } while (0)
  7560. +# define might_sleep_no_state_check() do { might_resched(); } while (0)
  7561. # define sched_annotate_sleep() do { } while (0)
  7562. #endif
  7563. @@ -491,6 +495,7 @@
  7564. SYSTEM_HALT,
  7565. SYSTEM_POWER_OFF,
  7566. SYSTEM_RESTART,
  7567. + SYSTEM_SUSPEND,
  7568. } system_state;
  7569. #define TAINT_PROPRIETARY_MODULE 0
  7570. diff -Nur linux-4.8.15.orig/include/linux/lglock.h linux-4.8.15/include/linux/lglock.h
  7571. --- linux-4.8.15.orig/include/linux/lglock.h 2016-12-15 17:50:48.000000000 +0100
  7572. +++ linux-4.8.15/include/linux/lglock.h 2017-01-01 17:07:15.779410723 +0100
  7573. @@ -34,13 +34,30 @@
  7574. #endif
  7575. struct lglock {
  7576. +#ifdef CONFIG_PREEMPT_RT_FULL
  7577. + struct rt_mutex __percpu *lock;
  7578. +#else
  7579. arch_spinlock_t __percpu *lock;
  7580. +#endif
  7581. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  7582. struct lock_class_key lock_key;
  7583. struct lockdep_map lock_dep_map;
  7584. #endif
  7585. };
  7586. +#ifdef CONFIG_PREEMPT_RT_FULL
  7587. +# define DEFINE_LGLOCK(name) \
  7588. + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \
  7589. + = __RT_MUTEX_INITIALIZER( name ## _lock); \
  7590. + struct lglock name = { .lock = &name ## _lock }
  7591. +
  7592. +# define DEFINE_STATIC_LGLOCK(name) \
  7593. + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \
  7594. + = __RT_MUTEX_INITIALIZER( name ## _lock); \
  7595. + static struct lglock name = { .lock = &name ## _lock }
  7596. +
  7597. +#else
  7598. +
  7599. #define DEFINE_LGLOCK(name) \
  7600. static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
  7601. = __ARCH_SPIN_LOCK_UNLOCKED; \
  7602. @@ -50,6 +67,7 @@
  7603. static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
  7604. = __ARCH_SPIN_LOCK_UNLOCKED; \
  7605. static struct lglock name = { .lock = &name ## _lock }
  7606. +#endif
  7607. void lg_lock_init(struct lglock *lg, char *name);
  7608. @@ -64,6 +82,12 @@
  7609. void lg_global_lock(struct lglock *lg);
  7610. void lg_global_unlock(struct lglock *lg);
  7611. +#ifndef CONFIG_PREEMPT_RT_FULL
  7612. +#define lg_global_trylock_relax(name) lg_global_lock(name)
  7613. +#else
  7614. +void lg_global_trylock_relax(struct lglock *lg);
  7615. +#endif
  7616. +
  7617. #else
  7618. /* When !CONFIG_SMP, map lglock to spinlock */
  7619. #define lglock spinlock
  7620. diff -Nur linux-4.8.15.orig/include/linux/list_bl.h linux-4.8.15/include/linux/list_bl.h
  7621. --- linux-4.8.15.orig/include/linux/list_bl.h 2016-12-15 17:50:48.000000000 +0100
  7622. +++ linux-4.8.15/include/linux/list_bl.h 2017-01-01 17:07:15.779410723 +0100
  7623. @@ -2,6 +2,7 @@
  7624. #define _LINUX_LIST_BL_H
  7625. #include <linux/list.h>
  7626. +#include <linux/spinlock.h>
  7627. #include <linux/bit_spinlock.h>
  7628. /*
  7629. @@ -32,13 +33,24 @@
  7630. struct hlist_bl_head {
  7631. struct hlist_bl_node *first;
  7632. +#ifdef CONFIG_PREEMPT_RT_BASE
  7633. + raw_spinlock_t lock;
  7634. +#endif
  7635. };
  7636. struct hlist_bl_node {
  7637. struct hlist_bl_node *next, **pprev;
  7638. };
  7639. -#define INIT_HLIST_BL_HEAD(ptr) \
  7640. - ((ptr)->first = NULL)
  7641. +
  7642. +#ifdef CONFIG_PREEMPT_RT_BASE
  7643. +#define INIT_HLIST_BL_HEAD(h) \
  7644. +do { \
  7645. + (h)->first = NULL; \
  7646. + raw_spin_lock_init(&(h)->lock); \
  7647. +} while (0)
  7648. +#else
  7649. +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL
  7650. +#endif
  7651. static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
  7652. {
  7653. @@ -118,12 +130,26 @@
  7654. static inline void hlist_bl_lock(struct hlist_bl_head *b)
  7655. {
  7656. +#ifndef CONFIG_PREEMPT_RT_BASE
  7657. bit_spin_lock(0, (unsigned long *)b);
  7658. +#else
  7659. + raw_spin_lock(&b->lock);
  7660. +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  7661. + __set_bit(0, (unsigned long *)b);
  7662. +#endif
  7663. +#endif
  7664. }
  7665. static inline void hlist_bl_unlock(struct hlist_bl_head *b)
  7666. {
  7667. +#ifndef CONFIG_PREEMPT_RT_BASE
  7668. __bit_spin_unlock(0, (unsigned long *)b);
  7669. +#else
  7670. +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  7671. + __clear_bit(0, (unsigned long *)b);
  7672. +#endif
  7673. + raw_spin_unlock(&b->lock);
  7674. +#endif
  7675. }
  7676. static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
  7677. diff -Nur linux-4.8.15.orig/include/linux/locallock.h linux-4.8.15/include/linux/locallock.h
  7678. --- linux-4.8.15.orig/include/linux/locallock.h 1970-01-01 01:00:00.000000000 +0100
  7679. +++ linux-4.8.15/include/linux/locallock.h 2017-01-01 17:07:15.779410723 +0100
  7680. @@ -0,0 +1,278 @@
  7681. +#ifndef _LINUX_LOCALLOCK_H
  7682. +#define _LINUX_LOCALLOCK_H
  7683. +
  7684. +#include <linux/percpu.h>
  7685. +#include <linux/spinlock.h>
  7686. +
  7687. +#ifdef CONFIG_PREEMPT_RT_BASE
  7688. +
  7689. +#ifdef CONFIG_DEBUG_SPINLOCK
  7690. +# define LL_WARN(cond) WARN_ON(cond)
  7691. +#else
  7692. +# define LL_WARN(cond) do { } while (0)
  7693. +#endif
  7694. +
  7695. +/*
  7696. + * per cpu lock based substitute for local_irq_*()
  7697. + */
  7698. +struct local_irq_lock {
  7699. + spinlock_t lock;
  7700. + struct task_struct *owner;
  7701. + int nestcnt;
  7702. + unsigned long flags;
  7703. +};
  7704. +
  7705. +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \
  7706. + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \
  7707. + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
  7708. +
  7709. +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \
  7710. + DECLARE_PER_CPU(struct local_irq_lock, lvar)
  7711. +
  7712. +#define local_irq_lock_init(lvar) \
  7713. + do { \
  7714. + int __cpu; \
  7715. + for_each_possible_cpu(__cpu) \
  7716. + spin_lock_init(&per_cpu(lvar, __cpu).lock); \
  7717. + } while (0)
  7718. +
  7719. +/*
  7720. + * spin_lock|trylock|unlock_local flavour that does not migrate disable
  7721. + * used for __local_lock|trylock|unlock where get_local_var/put_local_var
  7722. + * already takes care of the migrate_disable/enable
  7723. + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls.
  7724. + */
  7725. +#ifdef CONFIG_PREEMPT_RT_FULL
  7726. +# define spin_lock_local(lock) rt_spin_lock__no_mg(lock)
  7727. +# define spin_trylock_local(lock) rt_spin_trylock__no_mg(lock)
  7728. +# define spin_unlock_local(lock) rt_spin_unlock__no_mg(lock)
  7729. +#else
  7730. +# define spin_lock_local(lock) spin_lock(lock)
  7731. +# define spin_trylock_local(lock) spin_trylock(lock)
  7732. +# define spin_unlock_local(lock) spin_unlock(lock)
  7733. +#endif
  7734. +
  7735. +static inline void __local_lock(struct local_irq_lock *lv)
  7736. +{
  7737. + if (lv->owner != current) {
  7738. + spin_lock_local(&lv->lock);
  7739. + LL_WARN(lv->owner);
  7740. + LL_WARN(lv->nestcnt);
  7741. + lv->owner = current;
  7742. + }
  7743. + lv->nestcnt++;
  7744. +}
  7745. +
  7746. +#define local_lock(lvar) \
  7747. + do { __local_lock(&get_local_var(lvar)); } while (0)
  7748. +
  7749. +#define local_lock_on(lvar, cpu) \
  7750. + do { __local_lock(&per_cpu(lvar, cpu)); } while (0)
  7751. +
  7752. +static inline int __local_trylock(struct local_irq_lock *lv)
  7753. +{
  7754. + if (lv->owner != current && spin_trylock_local(&lv->lock)) {
  7755. + LL_WARN(lv->owner);
  7756. + LL_WARN(lv->nestcnt);
  7757. + lv->owner = current;
  7758. + lv->nestcnt = 1;
  7759. + return 1;
  7760. + }
  7761. + return 0;
  7762. +}
  7763. +
  7764. +#define local_trylock(lvar) \
  7765. + ({ \
  7766. + int __locked; \
  7767. + __locked = __local_trylock(&get_local_var(lvar)); \
  7768. + if (!__locked) \
  7769. + put_local_var(lvar); \
  7770. + __locked; \
  7771. + })
  7772. +
  7773. +static inline void __local_unlock(struct local_irq_lock *lv)
  7774. +{
  7775. + LL_WARN(lv->nestcnt == 0);
  7776. + LL_WARN(lv->owner != current);
  7777. + if (--lv->nestcnt)
  7778. + return;
  7779. +
  7780. + lv->owner = NULL;
  7781. + spin_unlock_local(&lv->lock);
  7782. +}
  7783. +
  7784. +#define local_unlock(lvar) \
  7785. + do { \
  7786. + __local_unlock(this_cpu_ptr(&lvar)); \
  7787. + put_local_var(lvar); \
  7788. + } while (0)
  7789. +
  7790. +#define local_unlock_on(lvar, cpu) \
  7791. + do { __local_unlock(&per_cpu(lvar, cpu)); } while (0)
  7792. +
  7793. +static inline void __local_lock_irq(struct local_irq_lock *lv)
  7794. +{
  7795. + spin_lock_irqsave(&lv->lock, lv->flags);
  7796. + LL_WARN(lv->owner);
  7797. + LL_WARN(lv->nestcnt);
  7798. + lv->owner = current;
  7799. + lv->nestcnt = 1;
  7800. +}
  7801. +
  7802. +#define local_lock_irq(lvar) \
  7803. + do { __local_lock_irq(&get_local_var(lvar)); } while (0)
  7804. +
  7805. +#define local_lock_irq_on(lvar, cpu) \
  7806. + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0)
  7807. +
  7808. +static inline void __local_unlock_irq(struct local_irq_lock *lv)
  7809. +{
  7810. + LL_WARN(!lv->nestcnt);
  7811. + LL_WARN(lv->owner != current);
  7812. + lv->owner = NULL;
  7813. + lv->nestcnt = 0;
  7814. + spin_unlock_irq(&lv->lock);
  7815. +}
  7816. +
  7817. +#define local_unlock_irq(lvar) \
  7818. + do { \
  7819. + __local_unlock_irq(this_cpu_ptr(&lvar)); \
  7820. + put_local_var(lvar); \
  7821. + } while (0)
  7822. +
  7823. +#define local_unlock_irq_on(lvar, cpu) \
  7824. + do { \
  7825. + __local_unlock_irq(&per_cpu(lvar, cpu)); \
  7826. + } while (0)
  7827. +
  7828. +static inline int __local_lock_irqsave(struct local_irq_lock *lv)
  7829. +{
  7830. + if (lv->owner != current) {
  7831. + __local_lock_irq(lv);
  7832. + return 0;
  7833. + } else {
  7834. + lv->nestcnt++;
  7835. + return 1;
  7836. + }
  7837. +}
  7838. +
  7839. +#define local_lock_irqsave(lvar, _flags) \
  7840. + do { \
  7841. + if (__local_lock_irqsave(&get_local_var(lvar))) \
  7842. + put_local_var(lvar); \
  7843. + _flags = __this_cpu_read(lvar.flags); \
  7844. + } while (0)
  7845. +
  7846. +#define local_lock_irqsave_on(lvar, _flags, cpu) \
  7847. + do { \
  7848. + __local_lock_irqsave(&per_cpu(lvar, cpu)); \
  7849. + _flags = per_cpu(lvar, cpu).flags; \
  7850. + } while (0)
  7851. +
  7852. +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
  7853. + unsigned long flags)
  7854. +{
  7855. + LL_WARN(!lv->nestcnt);
  7856. + LL_WARN(lv->owner != current);
  7857. + if (--lv->nestcnt)
  7858. + return 0;
  7859. +
  7860. + lv->owner = NULL;
  7861. + spin_unlock_irqrestore(&lv->lock, lv->flags);
  7862. + return 1;
  7863. +}
  7864. +
  7865. +#define local_unlock_irqrestore(lvar, flags) \
  7866. + do { \
  7867. + if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \
  7868. + put_local_var(lvar); \
  7869. + } while (0)
  7870. +
  7871. +#define local_unlock_irqrestore_on(lvar, flags, cpu) \
  7872. + do { \
  7873. + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \
  7874. + } while (0)
  7875. +
  7876. +#define local_spin_trylock_irq(lvar, lock) \
  7877. + ({ \
  7878. + int __locked; \
  7879. + local_lock_irq(lvar); \
  7880. + __locked = spin_trylock(lock); \
  7881. + if (!__locked) \
  7882. + local_unlock_irq(lvar); \
  7883. + __locked; \
  7884. + })
  7885. +
  7886. +#define local_spin_lock_irq(lvar, lock) \
  7887. + do { \
  7888. + local_lock_irq(lvar); \
  7889. + spin_lock(lock); \
  7890. + } while (0)
  7891. +
  7892. +#define local_spin_unlock_irq(lvar, lock) \
  7893. + do { \
  7894. + spin_unlock(lock); \
  7895. + local_unlock_irq(lvar); \
  7896. + } while (0)
  7897. +
  7898. +#define local_spin_lock_irqsave(lvar, lock, flags) \
  7899. + do { \
  7900. + local_lock_irqsave(lvar, flags); \
  7901. + spin_lock(lock); \
  7902. + } while (0)
  7903. +
  7904. +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
  7905. + do { \
  7906. + spin_unlock(lock); \
  7907. + local_unlock_irqrestore(lvar, flags); \
  7908. + } while (0)
  7909. +
  7910. +#define get_locked_var(lvar, var) \
  7911. + (*({ \
  7912. + local_lock(lvar); \
  7913. + this_cpu_ptr(&var); \
  7914. + }))
  7915. +
  7916. +#define put_locked_var(lvar, var) local_unlock(lvar);
  7917. +
  7918. +#define local_lock_cpu(lvar) \
  7919. + ({ \
  7920. + local_lock(lvar); \
  7921. + smp_processor_id(); \
  7922. + })
  7923. +
  7924. +#define local_unlock_cpu(lvar) local_unlock(lvar)
  7925. +
  7926. +#else /* PREEMPT_RT_BASE */
  7927. +
  7928. +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar
  7929. +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar
  7930. +
  7931. +static inline void local_irq_lock_init(int lvar) { }
  7932. +
  7933. +#define local_lock(lvar) preempt_disable()
  7934. +#define local_unlock(lvar) preempt_enable()
  7935. +#define local_lock_irq(lvar) local_irq_disable()
  7936. +#define local_lock_irq_on(lvar, cpu) local_irq_disable()
  7937. +#define local_unlock_irq(lvar) local_irq_enable()
  7938. +#define local_unlock_irq_on(lvar, cpu) local_irq_enable()
  7939. +#define local_lock_irqsave(lvar, flags) local_irq_save(flags)
  7940. +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags)
  7941. +
  7942. +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock)
  7943. +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock)
  7944. +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock)
  7945. +#define local_spin_lock_irqsave(lvar, lock, flags) \
  7946. + spin_lock_irqsave(lock, flags)
  7947. +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
  7948. + spin_unlock_irqrestore(lock, flags)
  7949. +
  7950. +#define get_locked_var(lvar, var) get_cpu_var(var)
  7951. +#define put_locked_var(lvar, var) put_cpu_var(var)
  7952. +
  7953. +#define local_lock_cpu(lvar) get_cpu()
  7954. +#define local_unlock_cpu(lvar) put_cpu()
  7955. +
  7956. +#endif
  7957. +
  7958. +#endif
  7959. diff -Nur linux-4.8.15.orig/include/linux/mm_types.h linux-4.8.15/include/linux/mm_types.h
  7960. --- linux-4.8.15.orig/include/linux/mm_types.h 2016-12-15 17:50:48.000000000 +0100
  7961. +++ linux-4.8.15/include/linux/mm_types.h 2017-01-01 17:07:15.791411498 +0100
  7962. @@ -11,6 +11,7 @@
  7963. #include <linux/completion.h>
  7964. #include <linux/cpumask.h>
  7965. #include <linux/uprobes.h>
  7966. +#include <linux/rcupdate.h>
  7967. #include <linux/page-flags-layout.h>
  7968. #include <linux/workqueue.h>
  7969. #include <asm/page.h>
  7970. @@ -508,6 +509,9 @@
  7971. bool tlb_flush_pending;
  7972. #endif
  7973. struct uprobes_state uprobes_state;
  7974. +#ifdef CONFIG_PREEMPT_RT_BASE
  7975. + struct rcu_head delayed_drop;
  7976. +#endif
  7977. #ifdef CONFIG_X86_INTEL_MPX
  7978. /* address of the bounds directory */
  7979. void __user *bd_addr;
  7980. diff -Nur linux-4.8.15.orig/include/linux/mutex.h linux-4.8.15/include/linux/mutex.h
  7981. --- linux-4.8.15.orig/include/linux/mutex.h 2016-12-15 17:50:48.000000000 +0100
  7982. +++ linux-4.8.15/include/linux/mutex.h 2017-01-01 17:07:15.791411498 +0100
  7983. @@ -19,6 +19,17 @@
  7984. #include <asm/processor.h>
  7985. #include <linux/osq_lock.h>
  7986. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  7987. +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
  7988. + , .dep_map = { .name = #lockname }
  7989. +#else
  7990. +# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
  7991. +#endif
  7992. +
  7993. +#ifdef CONFIG_PREEMPT_RT_FULL
  7994. +# include <linux/mutex_rt.h>
  7995. +#else
  7996. +
  7997. /*
  7998. * Simple, straightforward mutexes with strict semantics:
  7999. *
  8000. @@ -99,13 +110,6 @@
  8001. static inline void mutex_destroy(struct mutex *lock) {}
  8002. #endif
  8003. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8004. -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
  8005. - , .dep_map = { .name = #lockname }
  8006. -#else
  8007. -# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
  8008. -#endif
  8009. -
  8010. #define __MUTEX_INITIALIZER(lockname) \
  8011. { .count = ATOMIC_INIT(1) \
  8012. , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
  8013. @@ -173,6 +177,8 @@
  8014. extern int mutex_trylock(struct mutex *lock);
  8015. extern void mutex_unlock(struct mutex *lock);
  8016. +#endif /* !PREEMPT_RT_FULL */
  8017. +
  8018. extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
  8019. #endif /* __LINUX_MUTEX_H */
  8020. diff -Nur linux-4.8.15.orig/include/linux/mutex_rt.h linux-4.8.15/include/linux/mutex_rt.h
  8021. --- linux-4.8.15.orig/include/linux/mutex_rt.h 1970-01-01 01:00:00.000000000 +0100
  8022. +++ linux-4.8.15/include/linux/mutex_rt.h 2017-01-01 17:07:15.791411498 +0100
  8023. @@ -0,0 +1,84 @@
  8024. +#ifndef __LINUX_MUTEX_RT_H
  8025. +#define __LINUX_MUTEX_RT_H
  8026. +
  8027. +#ifndef __LINUX_MUTEX_H
  8028. +#error "Please include mutex.h"
  8029. +#endif
  8030. +
  8031. +#include <linux/rtmutex.h>
  8032. +
  8033. +/* FIXME: Just for __lockfunc */
  8034. +#include <linux/spinlock.h>
  8035. +
  8036. +struct mutex {
  8037. + struct rt_mutex lock;
  8038. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8039. + struct lockdep_map dep_map;
  8040. +#endif
  8041. +};
  8042. +
  8043. +#define __MUTEX_INITIALIZER(mutexname) \
  8044. + { \
  8045. + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \
  8046. + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \
  8047. + }
  8048. +
  8049. +#define DEFINE_MUTEX(mutexname) \
  8050. + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
  8051. +
  8052. +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
  8053. +extern void __lockfunc _mutex_lock(struct mutex *lock);
  8054. +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
  8055. +extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
  8056. +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass);
  8057. +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
  8058. +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass);
  8059. +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass);
  8060. +extern int __lockfunc _mutex_trylock(struct mutex *lock);
  8061. +extern void __lockfunc _mutex_unlock(struct mutex *lock);
  8062. +
  8063. +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock)
  8064. +#define mutex_lock(l) _mutex_lock(l)
  8065. +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l)
  8066. +#define mutex_lock_killable(l) _mutex_lock_killable(l)
  8067. +#define mutex_trylock(l) _mutex_trylock(l)
  8068. +#define mutex_unlock(l) _mutex_unlock(l)
  8069. +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock)
  8070. +
  8071. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  8072. +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s)
  8073. +# define mutex_lock_interruptible_nested(l, s) \
  8074. + _mutex_lock_interruptible_nested(l, s)
  8075. +# define mutex_lock_killable_nested(l, s) \
  8076. + _mutex_lock_killable_nested(l, s)
  8077. +
  8078. +# define mutex_lock_nest_lock(lock, nest_lock) \
  8079. +do { \
  8080. + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
  8081. + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
  8082. +} while (0)
  8083. +
  8084. +#else
  8085. +# define mutex_lock_nested(l, s) _mutex_lock(l)
  8086. +# define mutex_lock_interruptible_nested(l, s) \
  8087. + _mutex_lock_interruptible(l)
  8088. +# define mutex_lock_killable_nested(l, s) \
  8089. + _mutex_lock_killable(l)
  8090. +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
  8091. +#endif
  8092. +
  8093. +# define mutex_init(mutex) \
  8094. +do { \
  8095. + static struct lock_class_key __key; \
  8096. + \
  8097. + rt_mutex_init(&(mutex)->lock); \
  8098. + __mutex_do_init((mutex), #mutex, &__key); \
  8099. +} while (0)
  8100. +
  8101. +# define __mutex_init(mutex, name, key) \
  8102. +do { \
  8103. + rt_mutex_init(&(mutex)->lock); \
  8104. + __mutex_do_init((mutex), name, key); \
  8105. +} while (0)
  8106. +
  8107. +#endif
  8108. diff -Nur linux-4.8.15.orig/include/linux/netdevice.h linux-4.8.15/include/linux/netdevice.h
  8109. --- linux-4.8.15.orig/include/linux/netdevice.h 2016-12-15 17:50:48.000000000 +0100
  8110. +++ linux-4.8.15/include/linux/netdevice.h 2017-01-01 17:07:15.807412535 +0100
  8111. @@ -395,7 +395,19 @@
  8112. typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
  8113. void __napi_schedule(struct napi_struct *n);
  8114. +
  8115. +/*
  8116. + * When PREEMPT_RT_FULL is defined, all device interrupt handlers
  8117. + * run as threads, and they can also be preempted (without PREEMPT_RT
  8118. + * interrupt threads can not be preempted). Which means that calling
  8119. + * __napi_schedule_irqoff() from an interrupt handler can be preempted
  8120. + * and can corrupt the napi->poll_list.
  8121. + */
  8122. +#ifdef CONFIG_PREEMPT_RT_FULL
  8123. +#define __napi_schedule_irqoff(n) __napi_schedule(n)
  8124. +#else
  8125. void __napi_schedule_irqoff(struct napi_struct *n);
  8126. +#endif
  8127. static inline bool napi_disable_pending(struct napi_struct *n)
  8128. {
  8129. @@ -2446,14 +2458,53 @@
  8130. void synchronize_net(void);
  8131. int init_dummy_netdev(struct net_device *dev);
  8132. -DECLARE_PER_CPU(int, xmit_recursion);
  8133. #define XMIT_RECURSION_LIMIT 10
  8134. +#ifdef CONFIG_PREEMPT_RT_FULL
  8135. +static inline int dev_recursion_level(void)
  8136. +{
  8137. + return current->xmit_recursion;
  8138. +}
  8139. +
  8140. +static inline int xmit_rec_read(void)
  8141. +{
  8142. + return current->xmit_recursion;
  8143. +}
  8144. +
  8145. +static inline void xmit_rec_inc(void)
  8146. +{
  8147. + current->xmit_recursion++;
  8148. +}
  8149. +
  8150. +static inline void xmit_rec_dec(void)
  8151. +{
  8152. + current->xmit_recursion--;
  8153. +}
  8154. +
  8155. +#else
  8156. +
  8157. +DECLARE_PER_CPU(int, xmit_recursion);
  8158. static inline int dev_recursion_level(void)
  8159. {
  8160. return this_cpu_read(xmit_recursion);
  8161. }
  8162. +static inline int xmit_rec_read(void)
  8163. +{
  8164. + return __this_cpu_read(xmit_recursion);
  8165. +}
  8166. +
  8167. +static inline void xmit_rec_inc(void)
  8168. +{
  8169. + __this_cpu_inc(xmit_recursion);
  8170. +}
  8171. +
  8172. +static inline void xmit_rec_dec(void)
  8173. +{
  8174. + __this_cpu_dec(xmit_recursion);
  8175. +}
  8176. +#endif
  8177. +
  8178. struct net_device *dev_get_by_index(struct net *net, int ifindex);
  8179. struct net_device *__dev_get_by_index(struct net *net, int ifindex);
  8180. struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
  8181. @@ -2831,6 +2882,7 @@
  8182. unsigned int dropped;
  8183. struct sk_buff_head input_pkt_queue;
  8184. struct napi_struct backlog;
  8185. + struct sk_buff_head tofree_queue;
  8186. };
  8187. diff -Nur linux-4.8.15.orig/include/linux/netfilter/x_tables.h linux-4.8.15/include/linux/netfilter/x_tables.h
  8188. --- linux-4.8.15.orig/include/linux/netfilter/x_tables.h 2016-12-15 17:50:48.000000000 +0100
  8189. +++ linux-4.8.15/include/linux/netfilter/x_tables.h 2017-01-01 17:07:15.815413044 +0100
  8190. @@ -4,6 +4,7 @@
  8191. #include <linux/netdevice.h>
  8192. #include <linux/static_key.h>
  8193. +#include <linux/locallock.h>
  8194. #include <uapi/linux/netfilter/x_tables.h>
  8195. /* Test a struct->invflags and a boolean for inequality */
  8196. @@ -300,6 +301,8 @@
  8197. */
  8198. DECLARE_PER_CPU(seqcount_t, xt_recseq);
  8199. +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock);
  8200. +
  8201. /* xt_tee_enabled - true if x_tables needs to handle reentrancy
  8202. *
  8203. * Enabled if current ip(6)tables ruleset has at least one -j TEE rule.
  8204. @@ -320,6 +323,9 @@
  8205. {
  8206. unsigned int addend;
  8207. + /* RT protection */
  8208. + local_lock(xt_write_lock);
  8209. +
  8210. /*
  8211. * Low order bit of sequence is set if we already
  8212. * called xt_write_recseq_begin().
  8213. @@ -350,6 +356,7 @@
  8214. /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
  8215. smp_wmb();
  8216. __this_cpu_add(xt_recseq.sequence, addend);
  8217. + local_unlock(xt_write_lock);
  8218. }
  8219. /*
  8220. diff -Nur linux-4.8.15.orig/include/linux/nfs_fs.h linux-4.8.15/include/linux/nfs_fs.h
  8221. --- linux-4.8.15.orig/include/linux/nfs_fs.h 2016-12-15 17:50:48.000000000 +0100
  8222. +++ linux-4.8.15/include/linux/nfs_fs.h 2017-01-01 17:07:15.815413044 +0100
  8223. @@ -165,7 +165,11 @@
  8224. /* Readers: in-flight sillydelete RPC calls */
  8225. /* Writers: rmdir */
  8226. +#ifdef CONFIG_PREEMPT_RT_BASE
  8227. + struct semaphore rmdir_sem;
  8228. +#else
  8229. struct rw_semaphore rmdir_sem;
  8230. +#endif
  8231. #if IS_ENABLED(CONFIG_NFS_V4)
  8232. struct nfs4_cached_acl *nfs4_acl;
  8233. diff -Nur linux-4.8.15.orig/include/linux/nfs_xdr.h linux-4.8.15/include/linux/nfs_xdr.h
  8234. --- linux-4.8.15.orig/include/linux/nfs_xdr.h 2016-12-15 17:50:48.000000000 +0100
  8235. +++ linux-4.8.15/include/linux/nfs_xdr.h 2017-01-01 17:07:15.831414081 +0100
  8236. @@ -1484,7 +1484,7 @@
  8237. struct nfs_removeargs args;
  8238. struct nfs_removeres res;
  8239. struct dentry *dentry;
  8240. - wait_queue_head_t wq;
  8241. + struct swait_queue_head wq;
  8242. struct rpc_cred *cred;
  8243. struct nfs_fattr dir_attr;
  8244. long timeout;
  8245. diff -Nur linux-4.8.15.orig/include/linux/notifier.h linux-4.8.15/include/linux/notifier.h
  8246. --- linux-4.8.15.orig/include/linux/notifier.h 2016-12-15 17:50:48.000000000 +0100
  8247. +++ linux-4.8.15/include/linux/notifier.h 2017-01-01 17:07:15.831414081 +0100
  8248. @@ -6,7 +6,7 @@
  8249. *
  8250. * Alan Cox <Alan.Cox@linux.org>
  8251. */
  8252. -
  8253. +
  8254. #ifndef _LINUX_NOTIFIER_H
  8255. #define _LINUX_NOTIFIER_H
  8256. #include <linux/errno.h>
  8257. @@ -42,9 +42,7 @@
  8258. * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
  8259. * As compensation, srcu_notifier_chain_unregister() is rather expensive.
  8260. * SRCU notifier chains should be used when the chain will be called very
  8261. - * often but notifier_blocks will seldom be removed. Also, SRCU notifier
  8262. - * chains are slightly more difficult to use because they require special
  8263. - * runtime initialization.
  8264. + * often but notifier_blocks will seldom be removed.
  8265. */
  8266. struct notifier_block;
  8267. @@ -90,7 +88,7 @@
  8268. (name)->head = NULL; \
  8269. } while (0)
  8270. -/* srcu_notifier_heads must be initialized and cleaned up dynamically */
  8271. +/* srcu_notifier_heads must be cleaned up dynamically */
  8272. extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
  8273. #define srcu_cleanup_notifier_head(name) \
  8274. cleanup_srcu_struct(&(name)->srcu);
  8275. @@ -103,7 +101,13 @@
  8276. .head = NULL }
  8277. #define RAW_NOTIFIER_INIT(name) { \
  8278. .head = NULL }
  8279. -/* srcu_notifier_heads cannot be initialized statically */
  8280. +
  8281. +#define SRCU_NOTIFIER_INIT(name, pcpu) \
  8282. + { \
  8283. + .mutex = __MUTEX_INITIALIZER(name.mutex), \
  8284. + .head = NULL, \
  8285. + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \
  8286. + }
  8287. #define ATOMIC_NOTIFIER_HEAD(name) \
  8288. struct atomic_notifier_head name = \
  8289. @@ -115,6 +119,18 @@
  8290. struct raw_notifier_head name = \
  8291. RAW_NOTIFIER_INIT(name)
  8292. +#define _SRCU_NOTIFIER_HEAD(name, mod) \
  8293. + static DEFINE_PER_CPU(struct srcu_struct_array, \
  8294. + name##_head_srcu_array); \
  8295. + mod struct srcu_notifier_head name = \
  8296. + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array)
  8297. +
  8298. +#define SRCU_NOTIFIER_HEAD(name) \
  8299. + _SRCU_NOTIFIER_HEAD(name, )
  8300. +
  8301. +#define SRCU_NOTIFIER_HEAD_STATIC(name) \
  8302. + _SRCU_NOTIFIER_HEAD(name, static)
  8303. +
  8304. #ifdef __KERNEL__
  8305. extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
  8306. @@ -184,12 +200,12 @@
  8307. /*
  8308. * Declared notifiers so far. I can imagine quite a few more chains
  8309. - * over time (eg laptop power reset chains, reboot chain (to clean
  8310. + * over time (eg laptop power reset chains, reboot chain (to clean
  8311. * device units up), device [un]mount chain, module load/unload chain,
  8312. - * low memory chain, screenblank chain (for plug in modular screenblankers)
  8313. + * low memory chain, screenblank chain (for plug in modular screenblankers)
  8314. * VC switch chains (for loadable kernel svgalib VC switch helpers) etc...
  8315. */
  8316. -
  8317. +
  8318. /* CPU notfiers are defined in include/linux/cpu.h. */
  8319. /* netdevice notifiers are defined in include/linux/netdevice.h */
  8320. diff -Nur linux-4.8.15.orig/include/linux/percpu.h linux-4.8.15/include/linux/percpu.h
  8321. --- linux-4.8.15.orig/include/linux/percpu.h 2016-12-15 17:50:48.000000000 +0100
  8322. +++ linux-4.8.15/include/linux/percpu.h 2017-01-01 17:07:15.843414849 +0100
  8323. @@ -18,6 +18,35 @@
  8324. #define PERCPU_MODULE_RESERVE 0
  8325. #endif
  8326. +#ifdef CONFIG_PREEMPT_RT_FULL
  8327. +
  8328. +#define get_local_var(var) (*({ \
  8329. + migrate_disable(); \
  8330. + this_cpu_ptr(&var); }))
  8331. +
  8332. +#define put_local_var(var) do { \
  8333. + (void)&(var); \
  8334. + migrate_enable(); \
  8335. +} while (0)
  8336. +
  8337. +# define get_local_ptr(var) ({ \
  8338. + migrate_disable(); \
  8339. + this_cpu_ptr(var); })
  8340. +
  8341. +# define put_local_ptr(var) do { \
  8342. + (void)(var); \
  8343. + migrate_enable(); \
  8344. +} while (0)
  8345. +
  8346. +#else
  8347. +
  8348. +#define get_local_var(var) get_cpu_var(var)
  8349. +#define put_local_var(var) put_cpu_var(var)
  8350. +#define get_local_ptr(var) get_cpu_ptr(var)
  8351. +#define put_local_ptr(var) put_cpu_ptr(var)
  8352. +
  8353. +#endif
  8354. +
  8355. /* minimum unit size, also is the maximum supported allocation size */
  8356. #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
  8357. diff -Nur linux-4.8.15.orig/include/linux/pid.h linux-4.8.15/include/linux/pid.h
  8358. --- linux-4.8.15.orig/include/linux/pid.h 2016-12-15 17:50:48.000000000 +0100
  8359. +++ linux-4.8.15/include/linux/pid.h 2017-01-01 17:07:15.843414849 +0100
  8360. @@ -2,6 +2,7 @@
  8361. #define _LINUX_PID_H
  8362. #include <linux/rcupdate.h>
  8363. +#include <linux/atomic.h>
  8364. enum pid_type
  8365. {
  8366. diff -Nur linux-4.8.15.orig/include/linux/preempt.h linux-4.8.15/include/linux/preempt.h
  8367. --- linux-4.8.15.orig/include/linux/preempt.h 2016-12-15 17:50:48.000000000 +0100
  8368. +++ linux-4.8.15/include/linux/preempt.h 2017-01-01 17:07:15.851415364 +0100
  8369. @@ -50,7 +50,11 @@
  8370. #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
  8371. #define NMI_OFFSET (1UL << NMI_SHIFT)
  8372. -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
  8373. +#ifndef CONFIG_PREEMPT_RT_FULL
  8374. +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
  8375. +#else
  8376. +# define SOFTIRQ_DISABLE_OFFSET (0)
  8377. +#endif
  8378. /* We use the MSB mostly because its available */
  8379. #define PREEMPT_NEED_RESCHED 0x80000000
  8380. @@ -59,9 +63,15 @@
  8381. #include <asm/preempt.h>
  8382. #define hardirq_count() (preempt_count() & HARDIRQ_MASK)
  8383. -#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
  8384. #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
  8385. | NMI_MASK))
  8386. +#ifndef CONFIG_PREEMPT_RT_FULL
  8387. +# define softirq_count() (preempt_count() & SOFTIRQ_MASK)
  8388. +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
  8389. +#else
  8390. +# define softirq_count() (0UL)
  8391. +extern int in_serving_softirq(void);
  8392. +#endif
  8393. /*
  8394. * Are we doing bottom half or hardware interrupt processing?
  8395. @@ -72,7 +82,6 @@
  8396. #define in_irq() (hardirq_count())
  8397. #define in_softirq() (softirq_count())
  8398. #define in_interrupt() (irq_count())
  8399. -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
  8400. /*
  8401. * Are we in NMI context?
  8402. @@ -91,7 +100,11 @@
  8403. /*
  8404. * The preempt_count offset after spin_lock()
  8405. */
  8406. +#if !defined(CONFIG_PREEMPT_RT_FULL)
  8407. #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
  8408. +#else
  8409. +#define PREEMPT_LOCK_OFFSET 0
  8410. +#endif
  8411. /*
  8412. * The preempt_count offset needed for things like:
  8413. @@ -140,6 +153,20 @@
  8414. #define preempt_count_inc() preempt_count_add(1)
  8415. #define preempt_count_dec() preempt_count_sub(1)
  8416. +#ifdef CONFIG_PREEMPT_LAZY
  8417. +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
  8418. +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
  8419. +#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
  8420. +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
  8421. +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
  8422. +#else
  8423. +#define add_preempt_lazy_count(val) do { } while (0)
  8424. +#define sub_preempt_lazy_count(val) do { } while (0)
  8425. +#define inc_preempt_lazy_count() do { } while (0)
  8426. +#define dec_preempt_lazy_count() do { } while (0)
  8427. +#define preempt_lazy_count() (0)
  8428. +#endif
  8429. +
  8430. #ifdef CONFIG_PREEMPT_COUNT
  8431. #define preempt_disable() \
  8432. @@ -148,13 +175,25 @@
  8433. barrier(); \
  8434. } while (0)
  8435. +#define preempt_lazy_disable() \
  8436. +do { \
  8437. + inc_preempt_lazy_count(); \
  8438. + barrier(); \
  8439. +} while (0)
  8440. +
  8441. #define sched_preempt_enable_no_resched() \
  8442. do { \
  8443. barrier(); \
  8444. preempt_count_dec(); \
  8445. } while (0)
  8446. -#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
  8447. +#ifdef CONFIG_PREEMPT_RT_BASE
  8448. +# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
  8449. +# define preempt_check_resched_rt() preempt_check_resched()
  8450. +#else
  8451. +# define preempt_enable_no_resched() preempt_enable()
  8452. +# define preempt_check_resched_rt() barrier();
  8453. +#endif
  8454. #define preemptible() (preempt_count() == 0 && !irqs_disabled())
  8455. @@ -179,6 +218,13 @@
  8456. __preempt_schedule(); \
  8457. } while (0)
  8458. +#define preempt_lazy_enable() \
  8459. +do { \
  8460. + dec_preempt_lazy_count(); \
  8461. + barrier(); \
  8462. + preempt_check_resched(); \
  8463. +} while (0)
  8464. +
  8465. #else /* !CONFIG_PREEMPT */
  8466. #define preempt_enable() \
  8467. do { \
  8468. @@ -224,6 +270,7 @@
  8469. #define preempt_disable_notrace() barrier()
  8470. #define preempt_enable_no_resched_notrace() barrier()
  8471. #define preempt_enable_notrace() barrier()
  8472. +#define preempt_check_resched_rt() barrier()
  8473. #define preemptible() 0
  8474. #endif /* CONFIG_PREEMPT_COUNT */
  8475. @@ -244,10 +291,31 @@
  8476. } while (0)
  8477. #define preempt_fold_need_resched() \
  8478. do { \
  8479. - if (tif_need_resched()) \
  8480. + if (tif_need_resched_now()) \
  8481. set_preempt_need_resched(); \
  8482. } while (0)
  8483. +#ifdef CONFIG_PREEMPT_RT_FULL
  8484. +# define preempt_disable_rt() preempt_disable()
  8485. +# define preempt_enable_rt() preempt_enable()
  8486. +# define preempt_disable_nort() barrier()
  8487. +# define preempt_enable_nort() barrier()
  8488. +# ifdef CONFIG_SMP
  8489. + extern void migrate_disable(void);
  8490. + extern void migrate_enable(void);
  8491. +# else /* CONFIG_SMP */
  8492. +# define migrate_disable() barrier()
  8493. +# define migrate_enable() barrier()
  8494. +# endif /* CONFIG_SMP */
  8495. +#else
  8496. +# define preempt_disable_rt() barrier()
  8497. +# define preempt_enable_rt() barrier()
  8498. +# define preempt_disable_nort() preempt_disable()
  8499. +# define preempt_enable_nort() preempt_enable()
  8500. +# define migrate_disable() preempt_disable()
  8501. +# define migrate_enable() preempt_enable()
  8502. +#endif
  8503. +
  8504. #ifdef CONFIG_PREEMPT_NOTIFIERS
  8505. struct preempt_notifier;
  8506. diff -Nur linux-4.8.15.orig/include/linux/printk.h linux-4.8.15/include/linux/printk.h
  8507. --- linux-4.8.15.orig/include/linux/printk.h 2016-12-15 17:50:48.000000000 +0100
  8508. +++ linux-4.8.15/include/linux/printk.h 2017-01-01 17:07:15.855415618 +0100
  8509. @@ -125,9 +125,11 @@
  8510. #ifdef CONFIG_EARLY_PRINTK
  8511. extern asmlinkage __printf(1, 2)
  8512. void early_printk(const char *fmt, ...);
  8513. +extern void printk_kill(void);
  8514. #else
  8515. static inline __printf(1, 2) __cold
  8516. void early_printk(const char *s, ...) { }
  8517. +static inline void printk_kill(void) { }
  8518. #endif
  8519. #ifdef CONFIG_PRINTK_NMI
  8520. diff -Nur linux-4.8.15.orig/include/linux/radix-tree.h linux-4.8.15/include/linux/radix-tree.h
  8521. --- linux-4.8.15.orig/include/linux/radix-tree.h 2016-12-15 17:50:48.000000000 +0100
  8522. +++ linux-4.8.15/include/linux/radix-tree.h 2017-01-01 17:07:15.859415876 +0100
  8523. @@ -289,9 +289,19 @@
  8524. unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
  8525. void ***results, unsigned long *indices,
  8526. unsigned long first_index, unsigned int max_items);
  8527. +#ifdef CONFIG_PREEMPT_RT_FULL
  8528. +static inline int radix_tree_preload(gfp_t gm) { return 0; }
  8529. +static inline int radix_tree_maybe_preload(gfp_t gfp_mask) { return 0; }
  8530. +static inline int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
  8531. +{
  8532. + return 0;
  8533. +};
  8534. +
  8535. +#else
  8536. int radix_tree_preload(gfp_t gfp_mask);
  8537. int radix_tree_maybe_preload(gfp_t gfp_mask);
  8538. int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order);
  8539. +#endif
  8540. void radix_tree_init(void);
  8541. void *radix_tree_tag_set(struct radix_tree_root *root,
  8542. unsigned long index, unsigned int tag);
  8543. @@ -316,7 +326,7 @@
  8544. static inline void radix_tree_preload_end(void)
  8545. {
  8546. - preempt_enable();
  8547. + preempt_enable_nort();
  8548. }
  8549. /**
  8550. diff -Nur linux-4.8.15.orig/include/linux/random.h linux-4.8.15/include/linux/random.h
  8551. --- linux-4.8.15.orig/include/linux/random.h 2016-12-15 17:50:48.000000000 +0100
  8552. +++ linux-4.8.15/include/linux/random.h 2017-01-01 17:07:15.859415876 +0100
  8553. @@ -20,7 +20,7 @@
  8554. extern void add_device_randomness(const void *, unsigned int);
  8555. extern void add_input_randomness(unsigned int type, unsigned int code,
  8556. unsigned int value);
  8557. -extern void add_interrupt_randomness(int irq, int irq_flags);
  8558. +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip);
  8559. extern void get_random_bytes(void *buf, int nbytes);
  8560. extern int add_random_ready_callback(struct random_ready_callback *rdy);
  8561. diff -Nur linux-4.8.15.orig/include/linux/rbtree_augmented.h linux-4.8.15/include/linux/rbtree_augmented.h
  8562. --- linux-4.8.15.orig/include/linux/rbtree_augmented.h 2016-12-15 17:50:48.000000000 +0100
  8563. +++ linux-4.8.15/include/linux/rbtree_augmented.h 2017-01-01 17:07:15.859415876 +0100
  8564. @@ -26,6 +26,7 @@
  8565. #include <linux/compiler.h>
  8566. #include <linux/rbtree.h>
  8567. +#include <linux/rcupdate.h>
  8568. /*
  8569. * Please note - only struct rb_augment_callbacks and the prototypes for
  8570. diff -Nur linux-4.8.15.orig/include/linux/rbtree.h linux-4.8.15/include/linux/rbtree.h
  8571. --- linux-4.8.15.orig/include/linux/rbtree.h 2016-12-15 17:50:48.000000000 +0100
  8572. +++ linux-4.8.15/include/linux/rbtree.h 2017-01-01 17:07:15.859415876 +0100
  8573. @@ -31,7 +31,7 @@
  8574. #include <linux/kernel.h>
  8575. #include <linux/stddef.h>
  8576. -#include <linux/rcupdate.h>
  8577. +#include <linux/rcu_assign_pointer.h>
  8578. struct rb_node {
  8579. unsigned long __rb_parent_color;
  8580. diff -Nur linux-4.8.15.orig/include/linux/rcu_assign_pointer.h linux-4.8.15/include/linux/rcu_assign_pointer.h
  8581. --- linux-4.8.15.orig/include/linux/rcu_assign_pointer.h 1970-01-01 01:00:00.000000000 +0100
  8582. +++ linux-4.8.15/include/linux/rcu_assign_pointer.h 2017-01-01 17:07:15.859415876 +0100
  8583. @@ -0,0 +1,54 @@
  8584. +#ifndef __LINUX_RCU_ASSIGN_POINTER_H__
  8585. +#define __LINUX_RCU_ASSIGN_POINTER_H__
  8586. +#include <linux/compiler.h>
  8587. +#include <asm/barrier.h>
  8588. +
  8589. +/**
  8590. + * RCU_INITIALIZER() - statically initialize an RCU-protected global variable
  8591. + * @v: The value to statically initialize with.
  8592. + */
  8593. +#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)
  8594. +
  8595. +/**
  8596. + * rcu_assign_pointer() - assign to RCU-protected pointer
  8597. + * @p: pointer to assign to
  8598. + * @v: value to assign (publish)
  8599. + *
  8600. + * Assigns the specified value to the specified RCU-protected
  8601. + * pointer, ensuring that any concurrent RCU readers will see
  8602. + * any prior initialization.
  8603. + *
  8604. + * Inserts memory barriers on architectures that require them
  8605. + * (which is most of them), and also prevents the compiler from
  8606. + * reordering the code that initializes the structure after the pointer
  8607. + * assignment. More importantly, this call documents which pointers
  8608. + * will be dereferenced by RCU read-side code.
  8609. + *
  8610. + * In some special cases, you may use RCU_INIT_POINTER() instead
  8611. + * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due
  8612. + * to the fact that it does not constrain either the CPU or the compiler.
  8613. + * That said, using RCU_INIT_POINTER() when you should have used
  8614. + * rcu_assign_pointer() is a very bad thing that results in
  8615. + * impossible-to-diagnose memory corruption. So please be careful.
  8616. + * See the RCU_INIT_POINTER() comment header for details.
  8617. + *
  8618. + * Note that rcu_assign_pointer() evaluates each of its arguments only
  8619. + * once, appearances notwithstanding. One of the "extra" evaluations
  8620. + * is in typeof() and the other visible only to sparse (__CHECKER__),
  8621. + * neither of which actually execute the argument. As with most cpp
  8622. + * macros, this execute-arguments-only-once property is important, so
  8623. + * please be careful when making changes to rcu_assign_pointer() and the
  8624. + * other macros that it invokes.
  8625. + */
  8626. +#define rcu_assign_pointer(p, v) \
  8627. +({ \
  8628. + uintptr_t _r_a_p__v = (uintptr_t)(v); \
  8629. + \
  8630. + if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \
  8631. + WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \
  8632. + else \
  8633. + smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
  8634. + _r_a_p__v; \
  8635. +})
  8636. +
  8637. +#endif
  8638. diff -Nur linux-4.8.15.orig/include/linux/rcupdate.h linux-4.8.15/include/linux/rcupdate.h
  8639. --- linux-4.8.15.orig/include/linux/rcupdate.h 2016-12-15 17:50:48.000000000 +0100
  8640. +++ linux-4.8.15/include/linux/rcupdate.h 2017-01-01 17:07:15.867416395 +0100
  8641. @@ -46,6 +46,7 @@
  8642. #include <linux/compiler.h>
  8643. #include <linux/ktime.h>
  8644. #include <linux/irqflags.h>
  8645. +#include <linux/rcu_assign_pointer.h>
  8646. #include <asm/barrier.h>
  8647. @@ -178,6 +179,9 @@
  8648. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  8649. +#ifdef CONFIG_PREEMPT_RT_FULL
  8650. +#define call_rcu_bh call_rcu
  8651. +#else
  8652. /**
  8653. * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
  8654. * @head: structure to be used for queueing the RCU updates.
  8655. @@ -201,6 +205,7 @@
  8656. */
  8657. void call_rcu_bh(struct rcu_head *head,
  8658. rcu_callback_t func);
  8659. +#endif
  8660. /**
  8661. * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
  8662. @@ -301,6 +306,11 @@
  8663. * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
  8664. */
  8665. #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
  8666. +#ifndef CONFIG_PREEMPT_RT_FULL
  8667. +#define sched_rcu_preempt_depth() rcu_preempt_depth()
  8668. +#else
  8669. +static inline int sched_rcu_preempt_depth(void) { return 0; }
  8670. +#endif
  8671. #else /* #ifdef CONFIG_PREEMPT_RCU */
  8672. @@ -326,6 +336,8 @@
  8673. return 0;
  8674. }
  8675. +#define sched_rcu_preempt_depth() rcu_preempt_depth()
  8676. +
  8677. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  8678. /* Internal to kernel */
  8679. @@ -500,7 +512,14 @@
  8680. int debug_lockdep_rcu_enabled(void);
  8681. int rcu_read_lock_held(void);
  8682. +#ifdef CONFIG_PREEMPT_RT_FULL
  8683. +static inline int rcu_read_lock_bh_held(void)
  8684. +{
  8685. + return rcu_read_lock_held();
  8686. +}
  8687. +#else
  8688. int rcu_read_lock_bh_held(void);
  8689. +#endif
  8690. /**
  8691. * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
  8692. @@ -621,54 +640,6 @@
  8693. })
  8694. /**
  8695. - * RCU_INITIALIZER() - statically initialize an RCU-protected global variable
  8696. - * @v: The value to statically initialize with.
  8697. - */
  8698. -#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)
  8699. -
  8700. -/**
  8701. - * rcu_assign_pointer() - assign to RCU-protected pointer
  8702. - * @p: pointer to assign to
  8703. - * @v: value to assign (publish)
  8704. - *
  8705. - * Assigns the specified value to the specified RCU-protected
  8706. - * pointer, ensuring that any concurrent RCU readers will see
  8707. - * any prior initialization.
  8708. - *
  8709. - * Inserts memory barriers on architectures that require them
  8710. - * (which is most of them), and also prevents the compiler from
  8711. - * reordering the code that initializes the structure after the pointer
  8712. - * assignment. More importantly, this call documents which pointers
  8713. - * will be dereferenced by RCU read-side code.
  8714. - *
  8715. - * In some special cases, you may use RCU_INIT_POINTER() instead
  8716. - * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due
  8717. - * to the fact that it does not constrain either the CPU or the compiler.
  8718. - * That said, using RCU_INIT_POINTER() when you should have used
  8719. - * rcu_assign_pointer() is a very bad thing that results in
  8720. - * impossible-to-diagnose memory corruption. So please be careful.
  8721. - * See the RCU_INIT_POINTER() comment header for details.
  8722. - *
  8723. - * Note that rcu_assign_pointer() evaluates each of its arguments only
  8724. - * once, appearances notwithstanding. One of the "extra" evaluations
  8725. - * is in typeof() and the other visible only to sparse (__CHECKER__),
  8726. - * neither of which actually execute the argument. As with most cpp
  8727. - * macros, this execute-arguments-only-once property is important, so
  8728. - * please be careful when making changes to rcu_assign_pointer() and the
  8729. - * other macros that it invokes.
  8730. - */
  8731. -#define rcu_assign_pointer(p, v) \
  8732. -({ \
  8733. - uintptr_t _r_a_p__v = (uintptr_t)(v); \
  8734. - \
  8735. - if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \
  8736. - WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \
  8737. - else \
  8738. - smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
  8739. - _r_a_p__v; \
  8740. -})
  8741. -
  8742. -/**
  8743. * rcu_access_pointer() - fetch RCU pointer with no dereferencing
  8744. * @p: The pointer to read
  8745. *
  8746. @@ -946,10 +917,14 @@
  8747. static inline void rcu_read_lock_bh(void)
  8748. {
  8749. local_bh_disable();
  8750. +#ifdef CONFIG_PREEMPT_RT_FULL
  8751. + rcu_read_lock();
  8752. +#else
  8753. __acquire(RCU_BH);
  8754. rcu_lock_acquire(&rcu_bh_lock_map);
  8755. RCU_LOCKDEP_WARN(!rcu_is_watching(),
  8756. "rcu_read_lock_bh() used illegally while idle");
  8757. +#endif
  8758. }
  8759. /*
  8760. @@ -959,10 +934,14 @@
  8761. */
  8762. static inline void rcu_read_unlock_bh(void)
  8763. {
  8764. +#ifdef CONFIG_PREEMPT_RT_FULL
  8765. + rcu_read_unlock();
  8766. +#else
  8767. RCU_LOCKDEP_WARN(!rcu_is_watching(),
  8768. "rcu_read_unlock_bh() used illegally while idle");
  8769. rcu_lock_release(&rcu_bh_lock_map);
  8770. __release(RCU_BH);
  8771. +#endif
  8772. local_bh_enable();
  8773. }
  8774. diff -Nur linux-4.8.15.orig/include/linux/rcutree.h linux-4.8.15/include/linux/rcutree.h
  8775. --- linux-4.8.15.orig/include/linux/rcutree.h 2016-12-15 17:50:48.000000000 +0100
  8776. +++ linux-4.8.15/include/linux/rcutree.h 2017-01-01 17:07:15.867416395 +0100
  8777. @@ -44,7 +44,11 @@
  8778. rcu_note_context_switch();
  8779. }
  8780. +#ifdef CONFIG_PREEMPT_RT_FULL
  8781. +# define synchronize_rcu_bh synchronize_rcu
  8782. +#else
  8783. void synchronize_rcu_bh(void);
  8784. +#endif
  8785. void synchronize_sched_expedited(void);
  8786. void synchronize_rcu_expedited(void);
  8787. @@ -72,7 +76,11 @@
  8788. }
  8789. void rcu_barrier(void);
  8790. +#ifdef CONFIG_PREEMPT_RT_FULL
  8791. +# define rcu_barrier_bh rcu_barrier
  8792. +#else
  8793. void rcu_barrier_bh(void);
  8794. +#endif
  8795. void rcu_barrier_sched(void);
  8796. unsigned long get_state_synchronize_rcu(void);
  8797. void cond_synchronize_rcu(unsigned long oldstate);
  8798. @@ -82,17 +90,14 @@
  8799. extern unsigned long rcutorture_testseq;
  8800. extern unsigned long rcutorture_vernum;
  8801. unsigned long rcu_batches_started(void);
  8802. -unsigned long rcu_batches_started_bh(void);
  8803. unsigned long rcu_batches_started_sched(void);
  8804. unsigned long rcu_batches_completed(void);
  8805. -unsigned long rcu_batches_completed_bh(void);
  8806. unsigned long rcu_batches_completed_sched(void);
  8807. unsigned long rcu_exp_batches_completed(void);
  8808. unsigned long rcu_exp_batches_completed_sched(void);
  8809. void show_rcu_gp_kthreads(void);
  8810. void rcu_force_quiescent_state(void);
  8811. -void rcu_bh_force_quiescent_state(void);
  8812. void rcu_sched_force_quiescent_state(void);
  8813. void rcu_idle_enter(void);
  8814. @@ -109,6 +114,16 @@
  8815. bool rcu_is_watching(void);
  8816. +#ifndef CONFIG_PREEMPT_RT_FULL
  8817. +void rcu_bh_force_quiescent_state(void);
  8818. +unsigned long rcu_batches_started_bh(void);
  8819. +unsigned long rcu_batches_completed_bh(void);
  8820. +#else
  8821. +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state
  8822. +# define rcu_batches_completed_bh rcu_batches_completed
  8823. +# define rcu_batches_started_bh rcu_batches_completed
  8824. +#endif
  8825. +
  8826. void rcu_all_qs(void);
  8827. /* RCUtree hotplug events */
  8828. diff -Nur linux-4.8.15.orig/include/linux/rtmutex.h linux-4.8.15/include/linux/rtmutex.h
  8829. --- linux-4.8.15.orig/include/linux/rtmutex.h 2016-12-15 17:50:48.000000000 +0100
  8830. +++ linux-4.8.15/include/linux/rtmutex.h 2017-01-01 17:07:15.875416916 +0100
  8831. @@ -13,11 +13,15 @@
  8832. #define __LINUX_RT_MUTEX_H
  8833. #include <linux/linkage.h>
  8834. +#include <linux/spinlock_types_raw.h>
  8835. #include <linux/rbtree.h>
  8836. -#include <linux/spinlock_types.h>
  8837. extern int max_lock_depth; /* for sysctl */
  8838. +#ifdef CONFIG_DEBUG_MUTEXES
  8839. +#include <linux/debug_locks.h>
  8840. +#endif
  8841. +
  8842. /**
  8843. * The rt_mutex structure
  8844. *
  8845. @@ -31,8 +35,8 @@
  8846. struct rb_root waiters;
  8847. struct rb_node *waiters_leftmost;
  8848. struct task_struct *owner;
  8849. -#ifdef CONFIG_DEBUG_RT_MUTEXES
  8850. int save_state;
  8851. +#ifdef CONFIG_DEBUG_RT_MUTEXES
  8852. const char *name, *file;
  8853. int line;
  8854. void *magic;
  8855. @@ -55,22 +59,33 @@
  8856. # define rt_mutex_debug_check_no_locks_held(task) do { } while (0)
  8857. #endif
  8858. +# define rt_mutex_init(mutex) \
  8859. + do { \
  8860. + raw_spin_lock_init(&(mutex)->wait_lock); \
  8861. + __rt_mutex_init(mutex, #mutex); \
  8862. + } while (0)
  8863. +
  8864. #ifdef CONFIG_DEBUG_RT_MUTEXES
  8865. # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
  8866. , .name = #mutexname, .file = __FILE__, .line = __LINE__
  8867. -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__)
  8868. extern void rt_mutex_debug_task_free(struct task_struct *tsk);
  8869. #else
  8870. # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
  8871. -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL)
  8872. # define rt_mutex_debug_task_free(t) do { } while (0)
  8873. #endif
  8874. -#define __RT_MUTEX_INITIALIZER(mutexname) \
  8875. - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  8876. +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
  8877. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  8878. , .waiters = RB_ROOT \
  8879. , .owner = NULL \
  8880. - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
  8881. + __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
  8882. +
  8883. +#define __RT_MUTEX_INITIALIZER(mutexname) \
  8884. + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
  8885. +
  8886. +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
  8887. + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
  8888. + , .save_state = 1 }
  8889. #define DEFINE_RT_MUTEX(mutexname) \
  8890. struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
  8891. @@ -91,6 +106,7 @@
  8892. extern void rt_mutex_lock(struct rt_mutex *lock);
  8893. extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
  8894. +extern int rt_mutex_lock_killable(struct rt_mutex *lock);
  8895. extern int rt_mutex_timed_lock(struct rt_mutex *lock,
  8896. struct hrtimer_sleeper *timeout);
  8897. diff -Nur linux-4.8.15.orig/include/linux/rwlock_rt.h linux-4.8.15/include/linux/rwlock_rt.h
  8898. --- linux-4.8.15.orig/include/linux/rwlock_rt.h 1970-01-01 01:00:00.000000000 +0100
  8899. +++ linux-4.8.15/include/linux/rwlock_rt.h 2017-01-01 17:07:15.875416916 +0100
  8900. @@ -0,0 +1,99 @@
  8901. +#ifndef __LINUX_RWLOCK_RT_H
  8902. +#define __LINUX_RWLOCK_RT_H
  8903. +
  8904. +#ifndef __LINUX_SPINLOCK_H
  8905. +#error Do not include directly. Use spinlock.h
  8906. +#endif
  8907. +
  8908. +#define rwlock_init(rwl) \
  8909. +do { \
  8910. + static struct lock_class_key __key; \
  8911. + \
  8912. + rt_mutex_init(&(rwl)->lock); \
  8913. + __rt_rwlock_init(rwl, #rwl, &__key); \
  8914. +} while (0)
  8915. +
  8916. +extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
  8917. +extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
  8918. +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
  8919. +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags);
  8920. +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
  8921. +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
  8922. +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
  8923. +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock);
  8924. +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock);
  8925. +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
  8926. +
  8927. +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock))
  8928. +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock))
  8929. +
  8930. +#define write_trylock_irqsave(lock, flags) \
  8931. + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags))
  8932. +
  8933. +#define read_lock_irqsave(lock, flags) \
  8934. + do { \
  8935. + typecheck(unsigned long, flags); \
  8936. + flags = rt_read_lock_irqsave(lock); \
  8937. + } while (0)
  8938. +
  8939. +#define write_lock_irqsave(lock, flags) \
  8940. + do { \
  8941. + typecheck(unsigned long, flags); \
  8942. + flags = rt_write_lock_irqsave(lock); \
  8943. + } while (0)
  8944. +
  8945. +#define read_lock(lock) rt_read_lock(lock)
  8946. +
  8947. +#define read_lock_bh(lock) \
  8948. + do { \
  8949. + local_bh_disable(); \
  8950. + rt_read_lock(lock); \
  8951. + } while (0)
  8952. +
  8953. +#define read_lock_irq(lock) read_lock(lock)
  8954. +
  8955. +#define write_lock(lock) rt_write_lock(lock)
  8956. +
  8957. +#define write_lock_bh(lock) \
  8958. + do { \
  8959. + local_bh_disable(); \
  8960. + rt_write_lock(lock); \
  8961. + } while (0)
  8962. +
  8963. +#define write_lock_irq(lock) write_lock(lock)
  8964. +
  8965. +#define read_unlock(lock) rt_read_unlock(lock)
  8966. +
  8967. +#define read_unlock_bh(lock) \
  8968. + do { \
  8969. + rt_read_unlock(lock); \
  8970. + local_bh_enable(); \
  8971. + } while (0)
  8972. +
  8973. +#define read_unlock_irq(lock) read_unlock(lock)
  8974. +
  8975. +#define write_unlock(lock) rt_write_unlock(lock)
  8976. +
  8977. +#define write_unlock_bh(lock) \
  8978. + do { \
  8979. + rt_write_unlock(lock); \
  8980. + local_bh_enable(); \
  8981. + } while (0)
  8982. +
  8983. +#define write_unlock_irq(lock) write_unlock(lock)
  8984. +
  8985. +#define read_unlock_irqrestore(lock, flags) \
  8986. + do { \
  8987. + typecheck(unsigned long, flags); \
  8988. + (void) flags; \
  8989. + rt_read_unlock(lock); \
  8990. + } while (0)
  8991. +
  8992. +#define write_unlock_irqrestore(lock, flags) \
  8993. + do { \
  8994. + typecheck(unsigned long, flags); \
  8995. + (void) flags; \
  8996. + rt_write_unlock(lock); \
  8997. + } while (0)
  8998. +
  8999. +#endif
  9000. diff -Nur linux-4.8.15.orig/include/linux/rwlock_types.h linux-4.8.15/include/linux/rwlock_types.h
  9001. --- linux-4.8.15.orig/include/linux/rwlock_types.h 2016-12-15 17:50:48.000000000 +0100
  9002. +++ linux-4.8.15/include/linux/rwlock_types.h 2017-01-01 17:07:15.875416916 +0100
  9003. @@ -1,6 +1,10 @@
  9004. #ifndef __LINUX_RWLOCK_TYPES_H
  9005. #define __LINUX_RWLOCK_TYPES_H
  9006. +#if !defined(__LINUX_SPINLOCK_TYPES_H)
  9007. +# error "Do not include directly, include spinlock_types.h"
  9008. +#endif
  9009. +
  9010. /*
  9011. * include/linux/rwlock_types.h - generic rwlock type definitions
  9012. * and initializers
  9013. diff -Nur linux-4.8.15.orig/include/linux/rwlock_types_rt.h linux-4.8.15/include/linux/rwlock_types_rt.h
  9014. --- linux-4.8.15.orig/include/linux/rwlock_types_rt.h 1970-01-01 01:00:00.000000000 +0100
  9015. +++ linux-4.8.15/include/linux/rwlock_types_rt.h 2017-01-01 17:07:15.875416916 +0100
  9016. @@ -0,0 +1,33 @@
  9017. +#ifndef __LINUX_RWLOCK_TYPES_RT_H
  9018. +#define __LINUX_RWLOCK_TYPES_RT_H
  9019. +
  9020. +#ifndef __LINUX_SPINLOCK_TYPES_H
  9021. +#error "Do not include directly. Include spinlock_types.h instead"
  9022. +#endif
  9023. +
  9024. +/*
  9025. + * rwlocks - rtmutex which allows single reader recursion
  9026. + */
  9027. +typedef struct {
  9028. + struct rt_mutex lock;
  9029. + int read_depth;
  9030. + unsigned int break_lock;
  9031. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9032. + struct lockdep_map dep_map;
  9033. +#endif
  9034. +} rwlock_t;
  9035. +
  9036. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9037. +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  9038. +#else
  9039. +# define RW_DEP_MAP_INIT(lockname)
  9040. +#endif
  9041. +
  9042. +#define __RW_LOCK_UNLOCKED(name) \
  9043. + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \
  9044. + RW_DEP_MAP_INIT(name) }
  9045. +
  9046. +#define DEFINE_RWLOCK(name) \
  9047. + rwlock_t name = __RW_LOCK_UNLOCKED(name)
  9048. +
  9049. +#endif
  9050. diff -Nur linux-4.8.15.orig/include/linux/rwsem.h linux-4.8.15/include/linux/rwsem.h
  9051. --- linux-4.8.15.orig/include/linux/rwsem.h 2016-12-15 17:50:48.000000000 +0100
  9052. +++ linux-4.8.15/include/linux/rwsem.h 2017-01-01 17:07:15.875416916 +0100
  9053. @@ -19,6 +19,10 @@
  9054. #include <linux/osq_lock.h>
  9055. #endif
  9056. +#ifdef CONFIG_PREEMPT_RT_FULL
  9057. +#include <linux/rwsem_rt.h>
  9058. +#else /* PREEMPT_RT_FULL */
  9059. +
  9060. struct rw_semaphore;
  9061. #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
  9062. @@ -184,4 +188,6 @@
  9063. # define up_read_non_owner(sem) up_read(sem)
  9064. #endif
  9065. +#endif /* !PREEMPT_RT_FULL */
  9066. +
  9067. #endif /* _LINUX_RWSEM_H */
  9068. diff -Nur linux-4.8.15.orig/include/linux/rwsem_rt.h linux-4.8.15/include/linux/rwsem_rt.h
  9069. --- linux-4.8.15.orig/include/linux/rwsem_rt.h 1970-01-01 01:00:00.000000000 +0100
  9070. +++ linux-4.8.15/include/linux/rwsem_rt.h 2017-01-01 17:07:15.875416916 +0100
  9071. @@ -0,0 +1,167 @@
  9072. +#ifndef _LINUX_RWSEM_RT_H
  9073. +#define _LINUX_RWSEM_RT_H
  9074. +
  9075. +#ifndef _LINUX_RWSEM_H
  9076. +#error "Include rwsem.h"
  9077. +#endif
  9078. +
  9079. +/*
  9080. + * RW-semaphores are a spinlock plus a reader-depth count.
  9081. + *
  9082. + * Note that the semantics are different from the usual
  9083. + * Linux rw-sems, in PREEMPT_RT mode we do not allow
  9084. + * multiple readers to hold the lock at once, we only allow
  9085. + * a read-lock owner to read-lock recursively. This is
  9086. + * better for latency, makes the implementation inherently
  9087. + * fair and makes it simpler as well.
  9088. + */
  9089. +
  9090. +#include <linux/rtmutex.h>
  9091. +
  9092. +struct rw_semaphore {
  9093. + struct rt_mutex lock;
  9094. + int read_depth;
  9095. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9096. + struct lockdep_map dep_map;
  9097. +#endif
  9098. +};
  9099. +
  9100. +#define __RWSEM_INITIALIZER(name) \
  9101. + { .lock = __RT_MUTEX_INITIALIZER(name.lock), \
  9102. + RW_DEP_MAP_INIT(name) }
  9103. +
  9104. +#define DECLARE_RWSEM(lockname) \
  9105. + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
  9106. +
  9107. +extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
  9108. + struct lock_class_key *key);
  9109. +
  9110. +#define __rt_init_rwsem(sem, name, key) \
  9111. + do { \
  9112. + rt_mutex_init(&(sem)->lock); \
  9113. + __rt_rwsem_init((sem), (name), (key));\
  9114. + } while (0)
  9115. +
  9116. +#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key)
  9117. +
  9118. +# define rt_init_rwsem(sem) \
  9119. +do { \
  9120. + static struct lock_class_key __key; \
  9121. + \
  9122. + __rt_init_rwsem((sem), #sem, &__key); \
  9123. +} while (0)
  9124. +
  9125. +extern void rt_down_write(struct rw_semaphore *rwsem);
  9126. +extern int rt_down_write_killable(struct rw_semaphore *rwsem);
  9127. +extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass);
  9128. +extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass);
  9129. +extern int rt_down_write_killable_nested(struct rw_semaphore *rwsem,
  9130. + int subclass);
  9131. +extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
  9132. + struct lockdep_map *nest);
  9133. +extern void rt__down_read(struct rw_semaphore *rwsem);
  9134. +extern void rt_down_read(struct rw_semaphore *rwsem);
  9135. +extern int rt_down_write_trylock(struct rw_semaphore *rwsem);
  9136. +extern int rt__down_read_trylock(struct rw_semaphore *rwsem);
  9137. +extern int rt_down_read_trylock(struct rw_semaphore *rwsem);
  9138. +extern void __rt_up_read(struct rw_semaphore *rwsem);
  9139. +extern void rt_up_read(struct rw_semaphore *rwsem);
  9140. +extern void rt_up_write(struct rw_semaphore *rwsem);
  9141. +extern void rt_downgrade_write(struct rw_semaphore *rwsem);
  9142. +
  9143. +#define init_rwsem(sem) rt_init_rwsem(sem)
  9144. +#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock)
  9145. +
  9146. +static inline int rwsem_is_contended(struct rw_semaphore *sem)
  9147. +{
  9148. + /* rt_mutex_has_waiters() */
  9149. + return !RB_EMPTY_ROOT(&sem->lock.waiters);
  9150. +}
  9151. +
  9152. +static inline void __down_read(struct rw_semaphore *sem)
  9153. +{
  9154. + rt__down_read(sem);
  9155. +}
  9156. +
  9157. +static inline void down_read(struct rw_semaphore *sem)
  9158. +{
  9159. + rt_down_read(sem);
  9160. +}
  9161. +
  9162. +static inline int __down_read_trylock(struct rw_semaphore *sem)
  9163. +{
  9164. + return rt__down_read_trylock(sem);
  9165. +}
  9166. +
  9167. +static inline int down_read_trylock(struct rw_semaphore *sem)
  9168. +{
  9169. + return rt_down_read_trylock(sem);
  9170. +}
  9171. +
  9172. +static inline void down_write(struct rw_semaphore *sem)
  9173. +{
  9174. + rt_down_write(sem);
  9175. +}
  9176. +
  9177. +static inline int down_write_killable(struct rw_semaphore *sem)
  9178. +{
  9179. + return rt_down_write_killable(sem);
  9180. +}
  9181. +
  9182. +static inline int down_write_trylock(struct rw_semaphore *sem)
  9183. +{
  9184. + return rt_down_write_trylock(sem);
  9185. +}
  9186. +
  9187. +static inline void __up_read(struct rw_semaphore *sem)
  9188. +{
  9189. + __rt_up_read(sem);
  9190. +}
  9191. +
  9192. +static inline void up_read(struct rw_semaphore *sem)
  9193. +{
  9194. + rt_up_read(sem);
  9195. +}
  9196. +
  9197. +static inline void up_write(struct rw_semaphore *sem)
  9198. +{
  9199. + rt_up_write(sem);
  9200. +}
  9201. +
  9202. +static inline void downgrade_write(struct rw_semaphore *sem)
  9203. +{
  9204. + rt_downgrade_write(sem);
  9205. +}
  9206. +
  9207. +static inline void down_read_nested(struct rw_semaphore *sem, int subclass)
  9208. +{
  9209. + return rt_down_read_nested(sem, subclass);
  9210. +}
  9211. +
  9212. +static inline void down_write_nested(struct rw_semaphore *sem, int subclass)
  9213. +{
  9214. + rt_down_write_nested(sem, subclass);
  9215. +}
  9216. +
  9217. +static inline int down_write_killable_nested(struct rw_semaphore *sem,
  9218. + int subclass)
  9219. +{
  9220. + return rt_down_write_killable_nested(sem, subclass);
  9221. +}
  9222. +
  9223. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  9224. +static inline void down_write_nest_lock(struct rw_semaphore *sem,
  9225. + struct rw_semaphore *nest_lock)
  9226. +{
  9227. + rt_down_write_nested_lock(sem, &nest_lock->dep_map);
  9228. +}
  9229. +
  9230. +#else
  9231. +
  9232. +static inline void down_write_nest_lock(struct rw_semaphore *sem,
  9233. + struct rw_semaphore *nest_lock)
  9234. +{
  9235. + rt_down_write_nested_lock(sem, NULL);
  9236. +}
  9237. +#endif
  9238. +#endif
  9239. diff -Nur linux-4.8.15.orig/include/linux/sched.h linux-4.8.15/include/linux/sched.h
  9240. --- linux-4.8.15.orig/include/linux/sched.h 2016-12-15 17:50:48.000000000 +0100
  9241. +++ linux-4.8.15/include/linux/sched.h 2017-01-01 17:07:15.887417685 +0100
  9242. @@ -26,6 +26,7 @@
  9243. #include <linux/nodemask.h>
  9244. #include <linux/mm_types.h>
  9245. #include <linux/preempt.h>
  9246. +#include <asm/kmap_types.h>
  9247. #include <asm/page.h>
  9248. #include <asm/ptrace.h>
  9249. @@ -243,10 +244,7 @@
  9250. TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
  9251. __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
  9252. -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
  9253. #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
  9254. -#define task_is_stopped_or_traced(task) \
  9255. - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
  9256. #define task_contributes_to_load(task) \
  9257. ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
  9258. (task->flags & PF_FROZEN) == 0 && \
  9259. @@ -312,6 +310,11 @@
  9260. #endif
  9261. +#define __set_current_state_no_track(state_value) \
  9262. + do { current->state = (state_value); } while (0)
  9263. +#define set_current_state_no_track(state_value) \
  9264. + set_mb(current->state, (state_value))
  9265. +
  9266. /* Task command name length */
  9267. #define TASK_COMM_LEN 16
  9268. @@ -1009,8 +1012,18 @@
  9269. struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
  9270. extern void wake_q_add(struct wake_q_head *head,
  9271. - struct task_struct *task);
  9272. -extern void wake_up_q(struct wake_q_head *head);
  9273. + struct task_struct *task);
  9274. +extern void __wake_up_q(struct wake_q_head *head, bool sleeper);
  9275. +
  9276. +static inline void wake_up_q(struct wake_q_head *head)
  9277. +{
  9278. + __wake_up_q(head, false);
  9279. +}
  9280. +
  9281. +static inline void wake_up_q_sleeper(struct wake_q_head *head)
  9282. +{
  9283. + __wake_up_q(head, true);
  9284. +}
  9285. /*
  9286. * sched-domains (multiprocessor balancing) declarations:
  9287. @@ -1459,6 +1472,7 @@
  9288. struct task_struct {
  9289. volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
  9290. + volatile long saved_state; /* saved state for "spinlock sleepers" */
  9291. void *stack;
  9292. atomic_t usage;
  9293. unsigned int flags; /* per process flags, defined below */
  9294. @@ -1495,6 +1509,12 @@
  9295. #endif
  9296. unsigned int policy;
  9297. +#ifdef CONFIG_PREEMPT_RT_FULL
  9298. + int migrate_disable;
  9299. +# ifdef CONFIG_SCHED_DEBUG
  9300. + int migrate_disable_atomic;
  9301. +# endif
  9302. +#endif
  9303. int nr_cpus_allowed;
  9304. cpumask_t cpus_allowed;
  9305. @@ -1629,6 +1649,9 @@
  9306. struct task_cputime cputime_expires;
  9307. struct list_head cpu_timers[3];
  9308. +#ifdef CONFIG_PREEMPT_RT_BASE
  9309. + struct task_struct *posix_timer_list;
  9310. +#endif
  9311. /* process credentials */
  9312. const struct cred __rcu *real_cred; /* objective and real subjective task
  9313. @@ -1659,10 +1682,15 @@
  9314. /* signal handlers */
  9315. struct signal_struct *signal;
  9316. struct sighand_struct *sighand;
  9317. + struct sigqueue *sigqueue_cache;
  9318. sigset_t blocked, real_blocked;
  9319. sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
  9320. struct sigpending pending;
  9321. +#ifdef CONFIG_PREEMPT_RT_FULL
  9322. + /* TODO: move me into ->restart_block ? */
  9323. + struct siginfo forced_info;
  9324. +#endif
  9325. unsigned long sas_ss_sp;
  9326. size_t sas_ss_size;
  9327. @@ -1891,6 +1919,12 @@
  9328. /* bitmask and counter of trace recursion */
  9329. unsigned long trace_recursion;
  9330. #endif /* CONFIG_TRACING */
  9331. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  9332. + u64 preempt_timestamp_hist;
  9333. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  9334. + long timer_offset;
  9335. +#endif
  9336. +#endif
  9337. #ifdef CONFIG_KCOV
  9338. /* Coverage collection mode enabled for this task (0 if disabled). */
  9339. enum kcov_mode kcov_mode;
  9340. @@ -1916,9 +1950,23 @@
  9341. unsigned int sequential_io;
  9342. unsigned int sequential_io_avg;
  9343. #endif
  9344. +#ifdef CONFIG_PREEMPT_RT_BASE
  9345. + struct rcu_head put_rcu;
  9346. + int softirq_nestcnt;
  9347. + unsigned int softirqs_raised;
  9348. +#endif
  9349. +#ifdef CONFIG_PREEMPT_RT_FULL
  9350. +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
  9351. + int kmap_idx;
  9352. + pte_t kmap_pte[KM_TYPE_NR];
  9353. +# endif
  9354. +#endif
  9355. #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  9356. unsigned long task_state_change;
  9357. #endif
  9358. +#ifdef CONFIG_PREEMPT_RT_FULL
  9359. + int xmit_recursion;
  9360. +#endif
  9361. int pagefault_disabled;
  9362. #ifdef CONFIG_MMU
  9363. struct task_struct *oom_reaper_list;
  9364. @@ -1939,14 +1987,6 @@
  9365. # define arch_task_struct_size (sizeof(struct task_struct))
  9366. #endif
  9367. -/* Future-safe accessor for struct task_struct's cpus_allowed. */
  9368. -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
  9369. -
  9370. -static inline int tsk_nr_cpus_allowed(struct task_struct *p)
  9371. -{
  9372. - return p->nr_cpus_allowed;
  9373. -}
  9374. -
  9375. #define TNF_MIGRATED 0x01
  9376. #define TNF_NO_GROUP 0x02
  9377. #define TNF_SHARED 0x04
  9378. @@ -2162,6 +2202,15 @@
  9379. extern void free_task(struct task_struct *tsk);
  9380. #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
  9381. +#ifdef CONFIG_PREEMPT_RT_BASE
  9382. +extern void __put_task_struct_cb(struct rcu_head *rhp);
  9383. +
  9384. +static inline void put_task_struct(struct task_struct *t)
  9385. +{
  9386. + if (atomic_dec_and_test(&t->usage))
  9387. + call_rcu(&t->put_rcu, __put_task_struct_cb);
  9388. +}
  9389. +#else
  9390. extern void __put_task_struct(struct task_struct *t);
  9391. static inline void put_task_struct(struct task_struct *t)
  9392. @@ -2169,6 +2218,7 @@
  9393. if (atomic_dec_and_test(&t->usage))
  9394. __put_task_struct(t);
  9395. }
  9396. +#endif
  9397. struct task_struct *task_rcu_dereference(struct task_struct **ptask);
  9398. struct task_struct *try_get_task_struct(struct task_struct **ptask);
  9399. @@ -2210,6 +2260,7 @@
  9400. /*
  9401. * Per process flags
  9402. */
  9403. +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */
  9404. #define PF_EXITING 0x00000004 /* getting shut down */
  9405. #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
  9406. #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
  9407. @@ -2378,6 +2429,10 @@
  9408. extern int set_cpus_allowed_ptr(struct task_struct *p,
  9409. const struct cpumask *new_mask);
  9410. +int migrate_me(void);
  9411. +void tell_sched_cpu_down_begin(int cpu);
  9412. +void tell_sched_cpu_down_done(int cpu);
  9413. +
  9414. #else
  9415. static inline void do_set_cpus_allowed(struct task_struct *p,
  9416. const struct cpumask *new_mask)
  9417. @@ -2390,6 +2445,9 @@
  9418. return -EINVAL;
  9419. return 0;
  9420. }
  9421. +static inline int migrate_me(void) { return 0; }
  9422. +static inline void tell_sched_cpu_down_begin(int cpu) { }
  9423. +static inline void tell_sched_cpu_down_done(int cpu) { }
  9424. #endif
  9425. #ifdef CONFIG_NO_HZ_COMMON
  9426. @@ -2624,6 +2682,7 @@
  9427. extern int wake_up_state(struct task_struct *tsk, unsigned int state);
  9428. extern int wake_up_process(struct task_struct *tsk);
  9429. +extern int wake_up_lock_sleeper(struct task_struct * tsk);
  9430. extern void wake_up_new_task(struct task_struct *tsk);
  9431. #ifdef CONFIG_SMP
  9432. extern void kick_process(struct task_struct *tsk);
  9433. @@ -2832,6 +2891,17 @@
  9434. __mmdrop(mm);
  9435. }
  9436. +#ifdef CONFIG_PREEMPT_RT_BASE
  9437. +extern void __mmdrop_delayed(struct rcu_head *rhp);
  9438. +static inline void mmdrop_delayed(struct mm_struct *mm)
  9439. +{
  9440. + if (atomic_dec_and_test(&mm->mm_count))
  9441. + call_rcu(&mm->delayed_drop, __mmdrop_delayed);
  9442. +}
  9443. +#else
  9444. +# define mmdrop_delayed(mm) mmdrop(mm)
  9445. +#endif
  9446. +
  9447. static inline bool mmget_not_zero(struct mm_struct *mm)
  9448. {
  9449. return atomic_inc_not_zero(&mm->mm_users);
  9450. @@ -3168,6 +3238,43 @@
  9451. return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
  9452. }
  9453. +#ifdef CONFIG_PREEMPT_LAZY
  9454. +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
  9455. +{
  9456. + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
  9457. +}
  9458. +
  9459. +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
  9460. +{
  9461. + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
  9462. +}
  9463. +
  9464. +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
  9465. +{
  9466. + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
  9467. +}
  9468. +
  9469. +static inline int need_resched_lazy(void)
  9470. +{
  9471. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  9472. +}
  9473. +
  9474. +static inline int need_resched_now(void)
  9475. +{
  9476. + return test_thread_flag(TIF_NEED_RESCHED);
  9477. +}
  9478. +
  9479. +#else
  9480. +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
  9481. +static inline int need_resched_lazy(void) { return 0; }
  9482. +
  9483. +static inline int need_resched_now(void)
  9484. +{
  9485. + return test_thread_flag(TIF_NEED_RESCHED);
  9486. +}
  9487. +
  9488. +#endif
  9489. +
  9490. static inline int restart_syscall(void)
  9491. {
  9492. set_tsk_thread_flag(current, TIF_SIGPENDING);
  9493. @@ -3199,6 +3306,51 @@
  9494. return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
  9495. }
  9496. +static inline bool __task_is_stopped_or_traced(struct task_struct *task)
  9497. +{
  9498. + if (task->state & (__TASK_STOPPED | __TASK_TRACED))
  9499. + return true;
  9500. +#ifdef CONFIG_PREEMPT_RT_FULL
  9501. + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
  9502. + return true;
  9503. +#endif
  9504. + return false;
  9505. +}
  9506. +
  9507. +static inline bool task_is_stopped_or_traced(struct task_struct *task)
  9508. +{
  9509. + bool traced_stopped;
  9510. +
  9511. +#ifdef CONFIG_PREEMPT_RT_FULL
  9512. + unsigned long flags;
  9513. +
  9514. + raw_spin_lock_irqsave(&task->pi_lock, flags);
  9515. + traced_stopped = __task_is_stopped_or_traced(task);
  9516. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  9517. +#else
  9518. + traced_stopped = __task_is_stopped_or_traced(task);
  9519. +#endif
  9520. + return traced_stopped;
  9521. +}
  9522. +
  9523. +static inline bool task_is_traced(struct task_struct *task)
  9524. +{
  9525. + bool traced = false;
  9526. +
  9527. + if (task->state & __TASK_TRACED)
  9528. + return true;
  9529. +#ifdef CONFIG_PREEMPT_RT_FULL
  9530. + /* in case the task is sleeping on tasklist_lock */
  9531. + raw_spin_lock_irq(&task->pi_lock);
  9532. + if (task->state & __TASK_TRACED)
  9533. + traced = true;
  9534. + else if (task->saved_state & __TASK_TRACED)
  9535. + traced = true;
  9536. + raw_spin_unlock_irq(&task->pi_lock);
  9537. +#endif
  9538. + return traced;
  9539. +}
  9540. +
  9541. /*
  9542. * cond_resched() and cond_resched_lock(): latency reduction via
  9543. * explicit rescheduling in places that are safe. The return
  9544. @@ -3220,12 +3372,16 @@
  9545. __cond_resched_lock(lock); \
  9546. })
  9547. +#ifndef CONFIG_PREEMPT_RT_FULL
  9548. extern int __cond_resched_softirq(void);
  9549. #define cond_resched_softirq() ({ \
  9550. ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
  9551. __cond_resched_softirq(); \
  9552. })
  9553. +#else
  9554. +# define cond_resched_softirq() cond_resched()
  9555. +#endif
  9556. static inline void cond_resched_rcu(void)
  9557. {
  9558. @@ -3387,6 +3543,31 @@
  9559. #endif /* CONFIG_SMP */
  9560. +static inline int __migrate_disabled(struct task_struct *p)
  9561. +{
  9562. +#ifdef CONFIG_PREEMPT_RT_FULL
  9563. + return p->migrate_disable;
  9564. +#else
  9565. + return 0;
  9566. +#endif
  9567. +}
  9568. +
  9569. +/* Future-safe accessor for struct task_struct's cpus_allowed. */
  9570. +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p)
  9571. +{
  9572. + if (__migrate_disabled(p))
  9573. + return cpumask_of(task_cpu(p));
  9574. +
  9575. + return &p->cpus_allowed;
  9576. +}
  9577. +
  9578. +static inline int tsk_nr_cpus_allowed(struct task_struct *p)
  9579. +{
  9580. + if (__migrate_disabled(p))
  9581. + return 1;
  9582. + return p->nr_cpus_allowed;
  9583. +}
  9584. +
  9585. extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
  9586. extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
  9587. diff -Nur linux-4.8.15.orig/include/linux/seqlock.h linux-4.8.15/include/linux/seqlock.h
  9588. --- linux-4.8.15.orig/include/linux/seqlock.h 2016-12-15 17:50:48.000000000 +0100
  9589. +++ linux-4.8.15/include/linux/seqlock.h 2017-01-01 17:07:15.899418461 +0100
  9590. @@ -220,20 +220,30 @@
  9591. return __read_seqcount_retry(s, start);
  9592. }
  9593. -
  9594. -
  9595. -static inline void raw_write_seqcount_begin(seqcount_t *s)
  9596. +static inline void __raw_write_seqcount_begin(seqcount_t *s)
  9597. {
  9598. s->sequence++;
  9599. smp_wmb();
  9600. }
  9601. -static inline void raw_write_seqcount_end(seqcount_t *s)
  9602. +static inline void raw_write_seqcount_begin(seqcount_t *s)
  9603. +{
  9604. + preempt_disable_rt();
  9605. + __raw_write_seqcount_begin(s);
  9606. +}
  9607. +
  9608. +static inline void __raw_write_seqcount_end(seqcount_t *s)
  9609. {
  9610. smp_wmb();
  9611. s->sequence++;
  9612. }
  9613. +static inline void raw_write_seqcount_end(seqcount_t *s)
  9614. +{
  9615. + __raw_write_seqcount_end(s);
  9616. + preempt_enable_rt();
  9617. +}
  9618. +
  9619. /**
  9620. * raw_write_seqcount_barrier - do a seq write barrier
  9621. * @s: pointer to seqcount_t
  9622. @@ -428,10 +438,32 @@
  9623. /*
  9624. * Read side functions for starting and finalizing a read side section.
  9625. */
  9626. +#ifndef CONFIG_PREEMPT_RT_FULL
  9627. static inline unsigned read_seqbegin(const seqlock_t *sl)
  9628. {
  9629. return read_seqcount_begin(&sl->seqcount);
  9630. }
  9631. +#else
  9632. +/*
  9633. + * Starvation safe read side for RT
  9634. + */
  9635. +static inline unsigned read_seqbegin(seqlock_t *sl)
  9636. +{
  9637. + unsigned ret;
  9638. +
  9639. +repeat:
  9640. + ret = ACCESS_ONCE(sl->seqcount.sequence);
  9641. + if (unlikely(ret & 1)) {
  9642. + /*
  9643. + * Take the lock and let the writer proceed (i.e. evtl
  9644. + * boost it), otherwise we could loop here forever.
  9645. + */
  9646. + spin_unlock_wait(&sl->lock);
  9647. + goto repeat;
  9648. + }
  9649. + return ret;
  9650. +}
  9651. +#endif
  9652. static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
  9653. {
  9654. @@ -446,36 +478,45 @@
  9655. static inline void write_seqlock(seqlock_t *sl)
  9656. {
  9657. spin_lock(&sl->lock);
  9658. - write_seqcount_begin(&sl->seqcount);
  9659. + __raw_write_seqcount_begin(&sl->seqcount);
  9660. +}
  9661. +
  9662. +static inline int try_write_seqlock(seqlock_t *sl)
  9663. +{
  9664. + if (spin_trylock(&sl->lock)) {
  9665. + __raw_write_seqcount_begin(&sl->seqcount);
  9666. + return 1;
  9667. + }
  9668. + return 0;
  9669. }
  9670. static inline void write_sequnlock(seqlock_t *sl)
  9671. {
  9672. - write_seqcount_end(&sl->seqcount);
  9673. + __raw_write_seqcount_end(&sl->seqcount);
  9674. spin_unlock(&sl->lock);
  9675. }
  9676. static inline void write_seqlock_bh(seqlock_t *sl)
  9677. {
  9678. spin_lock_bh(&sl->lock);
  9679. - write_seqcount_begin(&sl->seqcount);
  9680. + __raw_write_seqcount_begin(&sl->seqcount);
  9681. }
  9682. static inline void write_sequnlock_bh(seqlock_t *sl)
  9683. {
  9684. - write_seqcount_end(&sl->seqcount);
  9685. + __raw_write_seqcount_end(&sl->seqcount);
  9686. spin_unlock_bh(&sl->lock);
  9687. }
  9688. static inline void write_seqlock_irq(seqlock_t *sl)
  9689. {
  9690. spin_lock_irq(&sl->lock);
  9691. - write_seqcount_begin(&sl->seqcount);
  9692. + __raw_write_seqcount_begin(&sl->seqcount);
  9693. }
  9694. static inline void write_sequnlock_irq(seqlock_t *sl)
  9695. {
  9696. - write_seqcount_end(&sl->seqcount);
  9697. + __raw_write_seqcount_end(&sl->seqcount);
  9698. spin_unlock_irq(&sl->lock);
  9699. }
  9700. @@ -484,7 +525,7 @@
  9701. unsigned long flags;
  9702. spin_lock_irqsave(&sl->lock, flags);
  9703. - write_seqcount_begin(&sl->seqcount);
  9704. + __raw_write_seqcount_begin(&sl->seqcount);
  9705. return flags;
  9706. }
  9707. @@ -494,7 +535,7 @@
  9708. static inline void
  9709. write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
  9710. {
  9711. - write_seqcount_end(&sl->seqcount);
  9712. + __raw_write_seqcount_end(&sl->seqcount);
  9713. spin_unlock_irqrestore(&sl->lock, flags);
  9714. }
  9715. diff -Nur linux-4.8.15.orig/include/linux/signal.h linux-4.8.15/include/linux/signal.h
  9716. --- linux-4.8.15.orig/include/linux/signal.h 2016-12-15 17:50:48.000000000 +0100
  9717. +++ linux-4.8.15/include/linux/signal.h 2017-01-01 17:07:15.899418461 +0100
  9718. @@ -233,6 +233,7 @@
  9719. }
  9720. extern void flush_sigqueue(struct sigpending *queue);
  9721. +extern void flush_task_sigqueue(struct task_struct *tsk);
  9722. /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
  9723. static inline int valid_signal(unsigned long sig)
  9724. diff -Nur linux-4.8.15.orig/include/linux/skbuff.h linux-4.8.15/include/linux/skbuff.h
  9725. --- linux-4.8.15.orig/include/linux/skbuff.h 2016-12-15 17:50:48.000000000 +0100
  9726. +++ linux-4.8.15/include/linux/skbuff.h 2017-01-01 17:07:15.915419486 +0100
  9727. @@ -284,6 +284,7 @@
  9728. __u32 qlen;
  9729. spinlock_t lock;
  9730. + raw_spinlock_t raw_lock;
  9731. };
  9732. struct sk_buff;
  9733. @@ -1565,6 +1566,12 @@
  9734. __skb_queue_head_init(list);
  9735. }
  9736. +static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
  9737. +{
  9738. + raw_spin_lock_init(&list->raw_lock);
  9739. + __skb_queue_head_init(list);
  9740. +}
  9741. +
  9742. static inline void skb_queue_head_init_class(struct sk_buff_head *list,
  9743. struct lock_class_key *class)
  9744. {
  9745. diff -Nur linux-4.8.15.orig/include/linux/smp.h linux-4.8.15/include/linux/smp.h
  9746. --- linux-4.8.15.orig/include/linux/smp.h 2016-12-15 17:50:48.000000000 +0100
  9747. +++ linux-4.8.15/include/linux/smp.h 2017-01-01 17:07:15.915419486 +0100
  9748. @@ -185,6 +185,9 @@
  9749. #define get_cpu() ({ preempt_disable(); smp_processor_id(); })
  9750. #define put_cpu() preempt_enable()
  9751. +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
  9752. +#define put_cpu_light() migrate_enable()
  9753. +
  9754. /*
  9755. * Callback to arch code if there's nosmp or maxcpus=0 on the
  9756. * boot command line:
  9757. diff -Nur linux-4.8.15.orig/include/linux/spinlock_api_smp.h linux-4.8.15/include/linux/spinlock_api_smp.h
  9758. --- linux-4.8.15.orig/include/linux/spinlock_api_smp.h 2016-12-15 17:50:48.000000000 +0100
  9759. +++ linux-4.8.15/include/linux/spinlock_api_smp.h 2017-01-01 17:07:15.919419743 +0100
  9760. @@ -189,6 +189,8 @@
  9761. return 0;
  9762. }
  9763. -#include <linux/rwlock_api_smp.h>
  9764. +#ifndef CONFIG_PREEMPT_RT_FULL
  9765. +# include <linux/rwlock_api_smp.h>
  9766. +#endif
  9767. #endif /* __LINUX_SPINLOCK_API_SMP_H */
  9768. diff -Nur linux-4.8.15.orig/include/linux/spinlock.h linux-4.8.15/include/linux/spinlock.h
  9769. --- linux-4.8.15.orig/include/linux/spinlock.h 2016-12-15 17:50:48.000000000 +0100
  9770. +++ linux-4.8.15/include/linux/spinlock.h 2017-01-01 17:07:15.919419743 +0100
  9771. @@ -271,7 +271,11 @@
  9772. #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock))
  9773. /* Include rwlock functions */
  9774. -#include <linux/rwlock.h>
  9775. +#ifdef CONFIG_PREEMPT_RT_FULL
  9776. +# include <linux/rwlock_rt.h>
  9777. +#else
  9778. +# include <linux/rwlock.h>
  9779. +#endif
  9780. /*
  9781. * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
  9782. @@ -282,6 +286,10 @@
  9783. # include <linux/spinlock_api_up.h>
  9784. #endif
  9785. +#ifdef CONFIG_PREEMPT_RT_FULL
  9786. +# include <linux/spinlock_rt.h>
  9787. +#else /* PREEMPT_RT_FULL */
  9788. +
  9789. /*
  9790. * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
  9791. */
  9792. @@ -347,6 +355,12 @@
  9793. raw_spin_unlock(&lock->rlock);
  9794. }
  9795. +static __always_inline int spin_unlock_no_deboost(spinlock_t *lock)
  9796. +{
  9797. + raw_spin_unlock(&lock->rlock);
  9798. + return 0;
  9799. +}
  9800. +
  9801. static __always_inline void spin_unlock_bh(spinlock_t *lock)
  9802. {
  9803. raw_spin_unlock_bh(&lock->rlock);
  9804. @@ -416,4 +430,6 @@
  9805. #define atomic_dec_and_lock(atomic, lock) \
  9806. __cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
  9807. +#endif /* !PREEMPT_RT_FULL */
  9808. +
  9809. #endif /* __LINUX_SPINLOCK_H */
  9810. diff -Nur linux-4.8.15.orig/include/linux/spinlock_rt.h linux-4.8.15/include/linux/spinlock_rt.h
  9811. --- linux-4.8.15.orig/include/linux/spinlock_rt.h 1970-01-01 01:00:00.000000000 +0100
  9812. +++ linux-4.8.15/include/linux/spinlock_rt.h 2017-01-01 17:07:15.923420007 +0100
  9813. @@ -0,0 +1,165 @@
  9814. +#ifndef __LINUX_SPINLOCK_RT_H
  9815. +#define __LINUX_SPINLOCK_RT_H
  9816. +
  9817. +#ifndef __LINUX_SPINLOCK_H
  9818. +#error Do not include directly. Use spinlock.h
  9819. +#endif
  9820. +
  9821. +#include <linux/bug.h>
  9822. +
  9823. +extern void
  9824. +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key);
  9825. +
  9826. +#define spin_lock_init(slock) \
  9827. +do { \
  9828. + static struct lock_class_key __key; \
  9829. + \
  9830. + rt_mutex_init(&(slock)->lock); \
  9831. + __rt_spin_lock_init(slock, #slock, &__key); \
  9832. +} while (0)
  9833. +
  9834. +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock);
  9835. +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock);
  9836. +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock);
  9837. +
  9838. +extern void __lockfunc rt_spin_lock(spinlock_t *lock);
  9839. +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
  9840. +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
  9841. +extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
  9842. +extern int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock);
  9843. +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
  9844. +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
  9845. +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
  9846. +extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
  9847. +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock);
  9848. +
  9849. +/*
  9850. + * lockdep-less calls, for derived types like rwlock:
  9851. + * (for trylock they can use rt_mutex_trylock() directly.
  9852. + */
  9853. +extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock);
  9854. +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
  9855. +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
  9856. +extern int __lockfunc __rt_spin_trylock(struct rt_mutex *lock);
  9857. +
  9858. +#define spin_lock(lock) rt_spin_lock(lock)
  9859. +
  9860. +#define spin_lock_bh(lock) \
  9861. + do { \
  9862. + local_bh_disable(); \
  9863. + rt_spin_lock(lock); \
  9864. + } while (0)
  9865. +
  9866. +#define spin_lock_irq(lock) spin_lock(lock)
  9867. +
  9868. +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock))
  9869. +
  9870. +#define spin_trylock(lock) \
  9871. +({ \
  9872. + int __locked; \
  9873. + __locked = spin_do_trylock(lock); \
  9874. + __locked; \
  9875. +})
  9876. +
  9877. +#ifdef CONFIG_LOCKDEP
  9878. +# define spin_lock_nested(lock, subclass) \
  9879. + do { \
  9880. + rt_spin_lock_nested(lock, subclass); \
  9881. + } while (0)
  9882. +
  9883. +#define spin_lock_bh_nested(lock, subclass) \
  9884. + do { \
  9885. + local_bh_disable(); \
  9886. + rt_spin_lock_nested(lock, subclass); \
  9887. + } while (0)
  9888. +
  9889. +# define spin_lock_irqsave_nested(lock, flags, subclass) \
  9890. + do { \
  9891. + typecheck(unsigned long, flags); \
  9892. + flags = 0; \
  9893. + rt_spin_lock_nested(lock, subclass); \
  9894. + } while (0)
  9895. +#else
  9896. +# define spin_lock_nested(lock, subclass) spin_lock(lock)
  9897. +# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock)
  9898. +
  9899. +# define spin_lock_irqsave_nested(lock, flags, subclass) \
  9900. + do { \
  9901. + typecheck(unsigned long, flags); \
  9902. + flags = 0; \
  9903. + spin_lock(lock); \
  9904. + } while (0)
  9905. +#endif
  9906. +
  9907. +#define spin_lock_irqsave(lock, flags) \
  9908. + do { \
  9909. + typecheck(unsigned long, flags); \
  9910. + flags = 0; \
  9911. + spin_lock(lock); \
  9912. + } while (0)
  9913. +
  9914. +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock)
  9915. +{
  9916. + unsigned long flags = 0;
  9917. +#ifdef CONFIG_TRACE_IRQFLAGS
  9918. + flags = rt_spin_lock_trace_flags(lock);
  9919. +#else
  9920. + spin_lock(lock); /* lock_local */
  9921. +#endif
  9922. + return flags;
  9923. +}
  9924. +
  9925. +/* FIXME: we need rt_spin_lock_nest_lock */
  9926. +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0)
  9927. +
  9928. +#define spin_unlock(lock) rt_spin_unlock(lock)
  9929. +#define spin_unlock_no_deboost(lock) rt_spin_unlock_no_deboost(lock)
  9930. +
  9931. +#define spin_unlock_bh(lock) \
  9932. + do { \
  9933. + rt_spin_unlock(lock); \
  9934. + local_bh_enable(); \
  9935. + } while (0)
  9936. +
  9937. +#define spin_unlock_irq(lock) spin_unlock(lock)
  9938. +
  9939. +#define spin_unlock_irqrestore(lock, flags) \
  9940. + do { \
  9941. + typecheck(unsigned long, flags); \
  9942. + (void) flags; \
  9943. + spin_unlock(lock); \
  9944. + } while (0)
  9945. +
  9946. +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock))
  9947. +#define spin_trylock_irq(lock) spin_trylock(lock)
  9948. +
  9949. +#define spin_trylock_irqsave(lock, flags) \
  9950. + rt_spin_trylock_irqsave(lock, &(flags))
  9951. +
  9952. +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock)
  9953. +
  9954. +#ifdef CONFIG_GENERIC_LOCKBREAK
  9955. +# define spin_is_contended(lock) ((lock)->break_lock)
  9956. +#else
  9957. +# define spin_is_contended(lock) (((void)(lock), 0))
  9958. +#endif
  9959. +
  9960. +static inline int spin_can_lock(spinlock_t *lock)
  9961. +{
  9962. + return !rt_mutex_is_locked(&lock->lock);
  9963. +}
  9964. +
  9965. +static inline int spin_is_locked(spinlock_t *lock)
  9966. +{
  9967. + return rt_mutex_is_locked(&lock->lock);
  9968. +}
  9969. +
  9970. +static inline void assert_spin_locked(spinlock_t *lock)
  9971. +{
  9972. + BUG_ON(!spin_is_locked(lock));
  9973. +}
  9974. +
  9975. +#define atomic_dec_and_lock(atomic, lock) \
  9976. + atomic_dec_and_spin_lock(atomic, lock)
  9977. +
  9978. +#endif
  9979. diff -Nur linux-4.8.15.orig/include/linux/spinlock_types.h linux-4.8.15/include/linux/spinlock_types.h
  9980. --- linux-4.8.15.orig/include/linux/spinlock_types.h 2016-12-15 17:50:48.000000000 +0100
  9981. +++ linux-4.8.15/include/linux/spinlock_types.h 2017-01-01 17:07:15.923420007 +0100
  9982. @@ -9,80 +9,15 @@
  9983. * Released under the General Public License (GPL).
  9984. */
  9985. -#if defined(CONFIG_SMP)
  9986. -# include <asm/spinlock_types.h>
  9987. -#else
  9988. -# include <linux/spinlock_types_up.h>
  9989. -#endif
  9990. -
  9991. -#include <linux/lockdep.h>
  9992. -
  9993. -typedef struct raw_spinlock {
  9994. - arch_spinlock_t raw_lock;
  9995. -#ifdef CONFIG_GENERIC_LOCKBREAK
  9996. - unsigned int break_lock;
  9997. -#endif
  9998. -#ifdef CONFIG_DEBUG_SPINLOCK
  9999. - unsigned int magic, owner_cpu;
  10000. - void *owner;
  10001. -#endif
  10002. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  10003. - struct lockdep_map dep_map;
  10004. -#endif
  10005. -} raw_spinlock_t;
  10006. -
  10007. -#define SPINLOCK_MAGIC 0xdead4ead
  10008. -
  10009. -#define SPINLOCK_OWNER_INIT ((void *)-1L)
  10010. -
  10011. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  10012. -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  10013. -#else
  10014. -# define SPIN_DEP_MAP_INIT(lockname)
  10015. -#endif
  10016. +#include <linux/spinlock_types_raw.h>
  10017. -#ifdef CONFIG_DEBUG_SPINLOCK
  10018. -# define SPIN_DEBUG_INIT(lockname) \
  10019. - .magic = SPINLOCK_MAGIC, \
  10020. - .owner_cpu = -1, \
  10021. - .owner = SPINLOCK_OWNER_INIT,
  10022. +#ifndef CONFIG_PREEMPT_RT_FULL
  10023. +# include <linux/spinlock_types_nort.h>
  10024. +# include <linux/rwlock_types.h>
  10025. #else
  10026. -# define SPIN_DEBUG_INIT(lockname)
  10027. +# include <linux/rtmutex.h>
  10028. +# include <linux/spinlock_types_rt.h>
  10029. +# include <linux/rwlock_types_rt.h>
  10030. #endif
  10031. -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
  10032. - { \
  10033. - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
  10034. - SPIN_DEBUG_INIT(lockname) \
  10035. - SPIN_DEP_MAP_INIT(lockname) }
  10036. -
  10037. -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
  10038. - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
  10039. -
  10040. -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
  10041. -
  10042. -typedef struct spinlock {
  10043. - union {
  10044. - struct raw_spinlock rlock;
  10045. -
  10046. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  10047. -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
  10048. - struct {
  10049. - u8 __padding[LOCK_PADSIZE];
  10050. - struct lockdep_map dep_map;
  10051. - };
  10052. -#endif
  10053. - };
  10054. -} spinlock_t;
  10055. -
  10056. -#define __SPIN_LOCK_INITIALIZER(lockname) \
  10057. - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
  10058. -
  10059. -#define __SPIN_LOCK_UNLOCKED(lockname) \
  10060. - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
  10061. -
  10062. -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
  10063. -
  10064. -#include <linux/rwlock_types.h>
  10065. -
  10066. #endif /* __LINUX_SPINLOCK_TYPES_H */
  10067. diff -Nur linux-4.8.15.orig/include/linux/spinlock_types_nort.h linux-4.8.15/include/linux/spinlock_types_nort.h
  10068. --- linux-4.8.15.orig/include/linux/spinlock_types_nort.h 1970-01-01 01:00:00.000000000 +0100
  10069. +++ linux-4.8.15/include/linux/spinlock_types_nort.h 2017-01-01 17:07:15.923420007 +0100
  10070. @@ -0,0 +1,33 @@
  10071. +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H
  10072. +#define __LINUX_SPINLOCK_TYPES_NORT_H
  10073. +
  10074. +#ifndef __LINUX_SPINLOCK_TYPES_H
  10075. +#error "Do not include directly. Include spinlock_types.h instead"
  10076. +#endif
  10077. +
  10078. +/*
  10079. + * The non RT version maps spinlocks to raw_spinlocks
  10080. + */
  10081. +typedef struct spinlock {
  10082. + union {
  10083. + struct raw_spinlock rlock;
  10084. +
  10085. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  10086. +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
  10087. + struct {
  10088. + u8 __padding[LOCK_PADSIZE];
  10089. + struct lockdep_map dep_map;
  10090. + };
  10091. +#endif
  10092. + };
  10093. +} spinlock_t;
  10094. +
  10095. +#define __SPIN_LOCK_INITIALIZER(lockname) \
  10096. + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
  10097. +
  10098. +#define __SPIN_LOCK_UNLOCKED(lockname) \
  10099. + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
  10100. +
  10101. +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
  10102. +
  10103. +#endif
  10104. diff -Nur linux-4.8.15.orig/include/linux/spinlock_types_raw.h linux-4.8.15/include/linux/spinlock_types_raw.h
  10105. --- linux-4.8.15.orig/include/linux/spinlock_types_raw.h 1970-01-01 01:00:00.000000000 +0100
  10106. +++ linux-4.8.15/include/linux/spinlock_types_raw.h 2017-01-01 17:07:15.923420007 +0100
  10107. @@ -0,0 +1,56 @@
  10108. +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  10109. +#define __LINUX_SPINLOCK_TYPES_RAW_H
  10110. +
  10111. +#if defined(CONFIG_SMP)
  10112. +# include <asm/spinlock_types.h>
  10113. +#else
  10114. +# include <linux/spinlock_types_up.h>
  10115. +#endif
  10116. +
  10117. +#include <linux/lockdep.h>
  10118. +
  10119. +typedef struct raw_spinlock {
  10120. + arch_spinlock_t raw_lock;
  10121. +#ifdef CONFIG_GENERIC_LOCKBREAK
  10122. + unsigned int break_lock;
  10123. +#endif
  10124. +#ifdef CONFIG_DEBUG_SPINLOCK
  10125. + unsigned int magic, owner_cpu;
  10126. + void *owner;
  10127. +#endif
  10128. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  10129. + struct lockdep_map dep_map;
  10130. +#endif
  10131. +} raw_spinlock_t;
  10132. +
  10133. +#define SPINLOCK_MAGIC 0xdead4ead
  10134. +
  10135. +#define SPINLOCK_OWNER_INIT ((void *)-1L)
  10136. +
  10137. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  10138. +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  10139. +#else
  10140. +# define SPIN_DEP_MAP_INIT(lockname)
  10141. +#endif
  10142. +
  10143. +#ifdef CONFIG_DEBUG_SPINLOCK
  10144. +# define SPIN_DEBUG_INIT(lockname) \
  10145. + .magic = SPINLOCK_MAGIC, \
  10146. + .owner_cpu = -1, \
  10147. + .owner = SPINLOCK_OWNER_INIT,
  10148. +#else
  10149. +# define SPIN_DEBUG_INIT(lockname)
  10150. +#endif
  10151. +
  10152. +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
  10153. + { \
  10154. + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
  10155. + SPIN_DEBUG_INIT(lockname) \
  10156. + SPIN_DEP_MAP_INIT(lockname) }
  10157. +
  10158. +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
  10159. + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
  10160. +
  10161. +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
  10162. +
  10163. +#endif
  10164. diff -Nur linux-4.8.15.orig/include/linux/spinlock_types_rt.h linux-4.8.15/include/linux/spinlock_types_rt.h
  10165. --- linux-4.8.15.orig/include/linux/spinlock_types_rt.h 1970-01-01 01:00:00.000000000 +0100
  10166. +++ linux-4.8.15/include/linux/spinlock_types_rt.h 2017-01-01 17:07:15.923420007 +0100
  10167. @@ -0,0 +1,48 @@
  10168. +#ifndef __LINUX_SPINLOCK_TYPES_RT_H
  10169. +#define __LINUX_SPINLOCK_TYPES_RT_H
  10170. +
  10171. +#ifndef __LINUX_SPINLOCK_TYPES_H
  10172. +#error "Do not include directly. Include spinlock_types.h instead"
  10173. +#endif
  10174. +
  10175. +#include <linux/cache.h>
  10176. +
  10177. +/*
  10178. + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field:
  10179. + */
  10180. +typedef struct spinlock {
  10181. + struct rt_mutex lock;
  10182. + unsigned int break_lock;
  10183. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  10184. + struct lockdep_map dep_map;
  10185. +#endif
  10186. +} spinlock_t;
  10187. +
  10188. +#ifdef CONFIG_DEBUG_RT_MUTEXES
  10189. +# define __RT_SPIN_INITIALIZER(name) \
  10190. + { \
  10191. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
  10192. + .save_state = 1, \
  10193. + .file = __FILE__, \
  10194. + .line = __LINE__ , \
  10195. + }
  10196. +#else
  10197. +# define __RT_SPIN_INITIALIZER(name) \
  10198. + { \
  10199. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
  10200. + .save_state = 1, \
  10201. + }
  10202. +#endif
  10203. +
  10204. +/*
  10205. +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock)
  10206. +*/
  10207. +
  10208. +#define __SPIN_LOCK_UNLOCKED(name) \
  10209. + { .lock = __RT_SPIN_INITIALIZER(name.lock), \
  10210. + SPIN_DEP_MAP_INIT(name) }
  10211. +
  10212. +#define DEFINE_SPINLOCK(name) \
  10213. + spinlock_t name = __SPIN_LOCK_UNLOCKED(name)
  10214. +
  10215. +#endif
  10216. diff -Nur linux-4.8.15.orig/include/linux/srcu.h linux-4.8.15/include/linux/srcu.h
  10217. --- linux-4.8.15.orig/include/linux/srcu.h 2016-12-15 17:50:48.000000000 +0100
  10218. +++ linux-4.8.15/include/linux/srcu.h 2017-01-01 17:07:15.923420007 +0100
  10219. @@ -84,10 +84,10 @@
  10220. void process_srcu(struct work_struct *work);
  10221. -#define __SRCU_STRUCT_INIT(name) \
  10222. +#define __SRCU_STRUCT_INIT(name, pcpu_name) \
  10223. { \
  10224. .completed = -300, \
  10225. - .per_cpu_ref = &name##_srcu_array, \
  10226. + .per_cpu_ref = &pcpu_name, \
  10227. .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \
  10228. .running = false, \
  10229. .batch_queue = RCU_BATCH_INIT(name.batch_queue), \
  10230. @@ -119,7 +119,7 @@
  10231. */
  10232. #define __DEFINE_SRCU(name, is_static) \
  10233. static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
  10234. - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
  10235. + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array)
  10236. #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
  10237. #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
  10238. diff -Nur linux-4.8.15.orig/include/linux/suspend.h linux-4.8.15/include/linux/suspend.h
  10239. --- linux-4.8.15.orig/include/linux/suspend.h 2016-12-15 17:50:48.000000000 +0100
  10240. +++ linux-4.8.15/include/linux/suspend.h 2017-01-01 17:07:15.927420258 +0100
  10241. @@ -193,6 +193,12 @@
  10242. void (*end)(void);
  10243. };
  10244. +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
  10245. +extern bool pm_in_action;
  10246. +#else
  10247. +# define pm_in_action false
  10248. +#endif
  10249. +
  10250. #ifdef CONFIG_SUSPEND
  10251. /**
  10252. * suspend_set_ops - set platform dependent suspend operations
  10253. diff -Nur linux-4.8.15.orig/include/linux/swait.h linux-4.8.15/include/linux/swait.h
  10254. --- linux-4.8.15.orig/include/linux/swait.h 2016-12-15 17:50:48.000000000 +0100
  10255. +++ linux-4.8.15/include/linux/swait.h 2017-01-01 17:07:15.931420510 +0100
  10256. @@ -87,6 +87,7 @@
  10257. extern void swake_up(struct swait_queue_head *q);
  10258. extern void swake_up_all(struct swait_queue_head *q);
  10259. extern void swake_up_locked(struct swait_queue_head *q);
  10260. +extern void swake_up_all_locked(struct swait_queue_head *q);
  10261. extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
  10262. extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
  10263. diff -Nur linux-4.8.15.orig/include/linux/swap.h linux-4.8.15/include/linux/swap.h
  10264. --- linux-4.8.15.orig/include/linux/swap.h 2016-12-15 17:50:48.000000000 +0100
  10265. +++ linux-4.8.15/include/linux/swap.h 2017-01-01 17:07:15.931420510 +0100
  10266. @@ -11,6 +11,7 @@
  10267. #include <linux/fs.h>
  10268. #include <linux/atomic.h>
  10269. #include <linux/page-flags.h>
  10270. +#include <linux/locallock.h>
  10271. #include <asm/page.h>
  10272. struct notifier_block;
  10273. @@ -243,7 +244,8 @@
  10274. void *workingset_eviction(struct address_space *mapping, struct page *page);
  10275. bool workingset_refault(void *shadow);
  10276. void workingset_activation(struct page *page);
  10277. -extern struct list_lru workingset_shadow_nodes;
  10278. +extern struct list_lru __workingset_shadow_nodes;
  10279. +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
  10280. static inline unsigned int workingset_node_pages(struct radix_tree_node *node)
  10281. {
  10282. @@ -288,6 +290,7 @@
  10283. /* linux/mm/swap.c */
  10284. +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock);
  10285. extern void lru_cache_add(struct page *);
  10286. extern void lru_cache_add_anon(struct page *page);
  10287. extern void lru_cache_add_file(struct page *page);
  10288. diff -Nur linux-4.8.15.orig/include/linux/swork.h linux-4.8.15/include/linux/swork.h
  10289. --- linux-4.8.15.orig/include/linux/swork.h 1970-01-01 01:00:00.000000000 +0100
  10290. +++ linux-4.8.15/include/linux/swork.h 2017-01-01 17:07:15.931420510 +0100
  10291. @@ -0,0 +1,24 @@
  10292. +#ifndef _LINUX_SWORK_H
  10293. +#define _LINUX_SWORK_H
  10294. +
  10295. +#include <linux/list.h>
  10296. +
  10297. +struct swork_event {
  10298. + struct list_head item;
  10299. + unsigned long flags;
  10300. + void (*func)(struct swork_event *);
  10301. +};
  10302. +
  10303. +static inline void INIT_SWORK(struct swork_event *event,
  10304. + void (*func)(struct swork_event *))
  10305. +{
  10306. + event->flags = 0;
  10307. + event->func = func;
  10308. +}
  10309. +
  10310. +bool swork_queue(struct swork_event *sev);
  10311. +
  10312. +int swork_get(void);
  10313. +void swork_put(void);
  10314. +
  10315. +#endif /* _LINUX_SWORK_H */
  10316. diff -Nur linux-4.8.15.orig/include/linux/thread_info.h linux-4.8.15/include/linux/thread_info.h
  10317. --- linux-4.8.15.orig/include/linux/thread_info.h 2016-12-15 17:50:48.000000000 +0100
  10318. +++ linux-4.8.15/include/linux/thread_info.h 2017-01-01 17:07:15.931420510 +0100
  10319. @@ -103,7 +103,17 @@
  10320. #define test_thread_flag(flag) \
  10321. test_ti_thread_flag(current_thread_info(), flag)
  10322. -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
  10323. +#ifdef CONFIG_PREEMPT_LAZY
  10324. +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \
  10325. + test_thread_flag(TIF_NEED_RESCHED_LAZY))
  10326. +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
  10327. +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY))
  10328. +
  10329. +#else
  10330. +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
  10331. +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
  10332. +#define tif_need_resched_lazy() 0
  10333. +#endif
  10334. #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
  10335. static inline int arch_within_stack_frames(const void * const stack,
  10336. diff -Nur linux-4.8.15.orig/include/linux/timer.h linux-4.8.15/include/linux/timer.h
  10337. --- linux-4.8.15.orig/include/linux/timer.h 2016-12-15 17:50:48.000000000 +0100
  10338. +++ linux-4.8.15/include/linux/timer.h 2017-01-01 17:07:15.935420777 +0100
  10339. @@ -241,7 +241,7 @@
  10340. extern int try_to_del_timer_sync(struct timer_list *timer);
  10341. -#ifdef CONFIG_SMP
  10342. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  10343. extern int del_timer_sync(struct timer_list *timer);
  10344. #else
  10345. # define del_timer_sync(t) del_timer(t)
  10346. diff -Nur linux-4.8.15.orig/include/linux/trace_events.h linux-4.8.15/include/linux/trace_events.h
  10347. --- linux-4.8.15.orig/include/linux/trace_events.h 2016-12-15 17:50:48.000000000 +0100
  10348. +++ linux-4.8.15/include/linux/trace_events.h 2017-01-01 17:07:15.935420777 +0100
  10349. @@ -56,6 +56,9 @@
  10350. unsigned char flags;
  10351. unsigned char preempt_count;
  10352. int pid;
  10353. + unsigned short migrate_disable;
  10354. + unsigned short padding;
  10355. + unsigned char preempt_lazy_count;
  10356. };
  10357. #define TRACE_EVENT_TYPE_MAX \
  10358. diff -Nur linux-4.8.15.orig/include/linux/uaccess.h linux-4.8.15/include/linux/uaccess.h
  10359. --- linux-4.8.15.orig/include/linux/uaccess.h 2016-12-15 17:50:48.000000000 +0100
  10360. +++ linux-4.8.15/include/linux/uaccess.h 2017-01-01 17:07:15.935420777 +0100
  10361. @@ -24,6 +24,7 @@
  10362. */
  10363. static inline void pagefault_disable(void)
  10364. {
  10365. + migrate_disable();
  10366. pagefault_disabled_inc();
  10367. /*
  10368. * make sure to have issued the store before a pagefault
  10369. @@ -40,6 +41,7 @@
  10370. */
  10371. barrier();
  10372. pagefault_disabled_dec();
  10373. + migrate_enable();
  10374. }
  10375. /*
  10376. diff -Nur linux-4.8.15.orig/include/linux/uprobes.h linux-4.8.15/include/linux/uprobes.h
  10377. --- linux-4.8.15.orig/include/linux/uprobes.h 2016-12-15 17:50:48.000000000 +0100
  10378. +++ linux-4.8.15/include/linux/uprobes.h 2017-01-01 17:07:15.939421032 +0100
  10379. @@ -27,6 +27,7 @@
  10380. #include <linux/errno.h>
  10381. #include <linux/rbtree.h>
  10382. #include <linux/types.h>
  10383. +#include <linux/wait.h>
  10384. struct vm_area_struct;
  10385. struct mm_struct;
  10386. diff -Nur linux-4.8.15.orig/include/linux/vmstat.h linux-4.8.15/include/linux/vmstat.h
  10387. --- linux-4.8.15.orig/include/linux/vmstat.h 2016-12-15 17:50:48.000000000 +0100
  10388. +++ linux-4.8.15/include/linux/vmstat.h 2017-01-01 17:07:15.951421804 +0100
  10389. @@ -33,7 +33,9 @@
  10390. */
  10391. static inline void __count_vm_event(enum vm_event_item item)
  10392. {
  10393. + preempt_disable_rt();
  10394. raw_cpu_inc(vm_event_states.event[item]);
  10395. + preempt_enable_rt();
  10396. }
  10397. static inline void count_vm_event(enum vm_event_item item)
  10398. @@ -43,7 +45,9 @@
  10399. static inline void __count_vm_events(enum vm_event_item item, long delta)
  10400. {
  10401. + preempt_disable_rt();
  10402. raw_cpu_add(vm_event_states.event[item], delta);
  10403. + preempt_enable_rt();
  10404. }
  10405. static inline void count_vm_events(enum vm_event_item item, long delta)
  10406. diff -Nur linux-4.8.15.orig/include/linux/wait.h linux-4.8.15/include/linux/wait.h
  10407. --- linux-4.8.15.orig/include/linux/wait.h 2016-12-15 17:50:48.000000000 +0100
  10408. +++ linux-4.8.15/include/linux/wait.h 2017-01-01 17:07:15.955422055 +0100
  10409. @@ -8,6 +8,7 @@
  10410. #include <linux/spinlock.h>
  10411. #include <asm/current.h>
  10412. #include <uapi/linux/wait.h>
  10413. +#include <linux/atomic.h>
  10414. typedef struct __wait_queue wait_queue_t;
  10415. typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
  10416. diff -Nur linux-4.8.15.orig/include/net/dst.h linux-4.8.15/include/net/dst.h
  10417. --- linux-4.8.15.orig/include/net/dst.h 2016-12-15 17:50:48.000000000 +0100
  10418. +++ linux-4.8.15/include/net/dst.h 2017-01-01 17:07:15.963422578 +0100
  10419. @@ -446,7 +446,7 @@
  10420. static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
  10421. struct sk_buff *skb)
  10422. {
  10423. - const struct hh_cache *hh;
  10424. + struct hh_cache *hh;
  10425. if (dst->pending_confirm) {
  10426. unsigned long now = jiffies;
  10427. diff -Nur linux-4.8.15.orig/include/net/gen_stats.h linux-4.8.15/include/net/gen_stats.h
  10428. --- linux-4.8.15.orig/include/net/gen_stats.h 2016-12-15 17:50:48.000000000 +0100
  10429. +++ linux-4.8.15/include/net/gen_stats.h 2017-01-01 17:07:15.963422578 +0100
  10430. @@ -5,6 +5,7 @@
  10431. #include <linux/socket.h>
  10432. #include <linux/rtnetlink.h>
  10433. #include <linux/pkt_sched.h>
  10434. +#include <net/net_seq_lock.h>
  10435. struct gnet_stats_basic_cpu {
  10436. struct gnet_stats_basic_packed bstats;
  10437. @@ -33,11 +34,11 @@
  10438. spinlock_t *lock, struct gnet_dump *d,
  10439. int padattr);
  10440. -int gnet_stats_copy_basic(const seqcount_t *running,
  10441. +int gnet_stats_copy_basic(net_seqlock_t *running,
  10442. struct gnet_dump *d,
  10443. struct gnet_stats_basic_cpu __percpu *cpu,
  10444. struct gnet_stats_basic_packed *b);
  10445. -void __gnet_stats_copy_basic(const seqcount_t *running,
  10446. +void __gnet_stats_copy_basic(net_seqlock_t *running,
  10447. struct gnet_stats_basic_packed *bstats,
  10448. struct gnet_stats_basic_cpu __percpu *cpu,
  10449. struct gnet_stats_basic_packed *b);
  10450. @@ -55,14 +56,14 @@
  10451. struct gnet_stats_basic_cpu __percpu *cpu_bstats,
  10452. struct gnet_stats_rate_est64 *rate_est,
  10453. spinlock_t *stats_lock,
  10454. - seqcount_t *running, struct nlattr *opt);
  10455. + net_seqlock_t *running, struct nlattr *opt);
  10456. void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
  10457. struct gnet_stats_rate_est64 *rate_est);
  10458. int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
  10459. struct gnet_stats_basic_cpu __percpu *cpu_bstats,
  10460. struct gnet_stats_rate_est64 *rate_est,
  10461. spinlock_t *stats_lock,
  10462. - seqcount_t *running, struct nlattr *opt);
  10463. + net_seqlock_t *running, struct nlattr *opt);
  10464. bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
  10465. const struct gnet_stats_rate_est64 *rate_est);
  10466. #endif
  10467. diff -Nur linux-4.8.15.orig/include/net/neighbour.h linux-4.8.15/include/net/neighbour.h
  10468. --- linux-4.8.15.orig/include/net/neighbour.h 2016-12-15 17:50:48.000000000 +0100
  10469. +++ linux-4.8.15/include/net/neighbour.h 2017-01-01 17:07:15.971423099 +0100
  10470. @@ -446,7 +446,7 @@
  10471. }
  10472. #endif
  10473. -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
  10474. +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
  10475. {
  10476. unsigned int seq;
  10477. int hh_len;
  10478. @@ -501,7 +501,7 @@
  10479. #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
  10480. -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
  10481. +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n,
  10482. const struct net_device *dev)
  10483. {
  10484. unsigned int seq;
  10485. diff -Nur linux-4.8.15.orig/include/net/netns/ipv4.h linux-4.8.15/include/net/netns/ipv4.h
  10486. --- linux-4.8.15.orig/include/net/netns/ipv4.h 2016-12-15 17:50:48.000000000 +0100
  10487. +++ linux-4.8.15/include/net/netns/ipv4.h 2017-01-01 17:07:15.975423350 +0100
  10488. @@ -70,6 +70,7 @@
  10489. int sysctl_icmp_echo_ignore_all;
  10490. int sysctl_icmp_echo_ignore_broadcasts;
  10491. + int sysctl_icmp_echo_sysrq;
  10492. int sysctl_icmp_ignore_bogus_error_responses;
  10493. int sysctl_icmp_ratelimit;
  10494. int sysctl_icmp_ratemask;
  10495. diff -Nur linux-4.8.15.orig/include/net/net_seq_lock.h linux-4.8.15/include/net/net_seq_lock.h
  10496. --- linux-4.8.15.orig/include/net/net_seq_lock.h 1970-01-01 01:00:00.000000000 +0100
  10497. +++ linux-4.8.15/include/net/net_seq_lock.h 2017-01-01 17:07:15.971423099 +0100
  10498. @@ -0,0 +1,15 @@
  10499. +#ifndef __NET_NET_SEQ_LOCK_H__
  10500. +#define __NET_NET_SEQ_LOCK_H__
  10501. +
  10502. +#ifdef CONFIG_PREEMPT_RT_BASE
  10503. +# define net_seqlock_t seqlock_t
  10504. +# define net_seq_begin(__r) read_seqbegin(__r)
  10505. +# define net_seq_retry(__r, __s) read_seqretry(__r, __s)
  10506. +
  10507. +#else
  10508. +# define net_seqlock_t seqcount_t
  10509. +# define net_seq_begin(__r) read_seqcount_begin(__r)
  10510. +# define net_seq_retry(__r, __s) read_seqcount_retry(__r, __s)
  10511. +#endif
  10512. +
  10513. +#endif
  10514. diff -Nur linux-4.8.15.orig/include/net/sch_generic.h linux-4.8.15/include/net/sch_generic.h
  10515. --- linux-4.8.15.orig/include/net/sch_generic.h 2016-12-15 17:50:48.000000000 +0100
  10516. +++ linux-4.8.15/include/net/sch_generic.h 2017-01-01 17:07:15.987424124 +0100
  10517. @@ -10,6 +10,7 @@
  10518. #include <linux/dynamic_queue_limits.h>
  10519. #include <net/gen_stats.h>
  10520. #include <net/rtnetlink.h>
  10521. +#include <net/net_seq_lock.h>
  10522. struct Qdisc_ops;
  10523. struct qdisc_walker;
  10524. @@ -78,7 +79,7 @@
  10525. struct sk_buff *gso_skb ____cacheline_aligned_in_smp;
  10526. struct sk_buff_head q;
  10527. struct gnet_stats_basic_packed bstats;
  10528. - seqcount_t running;
  10529. + net_seqlock_t running;
  10530. struct gnet_stats_queue qstats;
  10531. unsigned long state;
  10532. struct Qdisc *next_sched;
  10533. @@ -90,13 +91,22 @@
  10534. spinlock_t busylock ____cacheline_aligned_in_smp;
  10535. };
  10536. -static inline bool qdisc_is_running(const struct Qdisc *qdisc)
  10537. +static inline bool qdisc_is_running(struct Qdisc *qdisc)
  10538. {
  10539. +#ifdef CONFIG_PREEMPT_RT_BASE
  10540. + return spin_is_locked(&qdisc->running.lock) ? true : false;
  10541. +#else
  10542. return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
  10543. +#endif
  10544. }
  10545. static inline bool qdisc_run_begin(struct Qdisc *qdisc)
  10546. {
  10547. +#ifdef CONFIG_PREEMPT_RT_BASE
  10548. + if (try_write_seqlock(&qdisc->running))
  10549. + return true;
  10550. + return false;
  10551. +#else
  10552. if (qdisc_is_running(qdisc))
  10553. return false;
  10554. /* Variant of write_seqcount_begin() telling lockdep a trylock
  10555. @@ -105,11 +115,16 @@
  10556. raw_write_seqcount_begin(&qdisc->running);
  10557. seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
  10558. return true;
  10559. +#endif
  10560. }
  10561. static inline void qdisc_run_end(struct Qdisc *qdisc)
  10562. {
  10563. +#ifdef CONFIG_PREEMPT_RT_BASE
  10564. + write_sequnlock(&qdisc->running);
  10565. +#else
  10566. write_seqcount_end(&qdisc->running);
  10567. +#endif
  10568. }
  10569. static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
  10570. @@ -300,7 +315,7 @@
  10571. return qdisc_lock(root);
  10572. }
  10573. -static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
  10574. +static inline net_seqlock_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
  10575. {
  10576. struct Qdisc *root = qdisc_root_sleeping(qdisc);
  10577. diff -Nur linux-4.8.15.orig/include/trace/events/hist.h linux-4.8.15/include/trace/events/hist.h
  10578. --- linux-4.8.15.orig/include/trace/events/hist.h 1970-01-01 01:00:00.000000000 +0100
  10579. +++ linux-4.8.15/include/trace/events/hist.h 2017-01-01 17:07:15.987424124 +0100
  10580. @@ -0,0 +1,73 @@
  10581. +#undef TRACE_SYSTEM
  10582. +#define TRACE_SYSTEM hist
  10583. +
  10584. +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ)
  10585. +#define _TRACE_HIST_H
  10586. +
  10587. +#include "latency_hist.h"
  10588. +#include <linux/tracepoint.h>
  10589. +
  10590. +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST)
  10591. +#define trace_preemptirqsoff_hist(a, b)
  10592. +#define trace_preemptirqsoff_hist_rcuidle(a, b)
  10593. +#else
  10594. +TRACE_EVENT(preemptirqsoff_hist,
  10595. +
  10596. + TP_PROTO(int reason, int starthist),
  10597. +
  10598. + TP_ARGS(reason, starthist),
  10599. +
  10600. + TP_STRUCT__entry(
  10601. + __field(int, reason)
  10602. + __field(int, starthist)
  10603. + ),
  10604. +
  10605. + TP_fast_assign(
  10606. + __entry->reason = reason;
  10607. + __entry->starthist = starthist;
  10608. + ),
  10609. +
  10610. + TP_printk("reason=%s starthist=%s", getaction(__entry->reason),
  10611. + __entry->starthist ? "start" : "stop")
  10612. +);
  10613. +#endif
  10614. +
  10615. +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST
  10616. +#define trace_hrtimer_interrupt(a, b, c, d)
  10617. +#else
  10618. +TRACE_EVENT(hrtimer_interrupt,
  10619. +
  10620. + TP_PROTO(int cpu, long long offset, struct task_struct *curr,
  10621. + struct task_struct *task),
  10622. +
  10623. + TP_ARGS(cpu, offset, curr, task),
  10624. +
  10625. + TP_STRUCT__entry(
  10626. + __field(int, cpu)
  10627. + __field(long long, offset)
  10628. + __array(char, ccomm, TASK_COMM_LEN)
  10629. + __field(int, cprio)
  10630. + __array(char, tcomm, TASK_COMM_LEN)
  10631. + __field(int, tprio)
  10632. + ),
  10633. +
  10634. + TP_fast_assign(
  10635. + __entry->cpu = cpu;
  10636. + __entry->offset = offset;
  10637. + memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN);
  10638. + __entry->cprio = curr->prio;
  10639. + memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>",
  10640. + task != NULL ? TASK_COMM_LEN : 7);
  10641. + __entry->tprio = task != NULL ? task->prio : -1;
  10642. + ),
  10643. +
  10644. + TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]",
  10645. + __entry->cpu, __entry->offset, __entry->ccomm,
  10646. + __entry->cprio, __entry->tcomm, __entry->tprio)
  10647. +);
  10648. +#endif
  10649. +
  10650. +#endif /* _TRACE_HIST_H */
  10651. +
  10652. +/* This part must be outside protection */
  10653. +#include <trace/define_trace.h>
  10654. diff -Nur linux-4.8.15.orig/include/trace/events/latency_hist.h linux-4.8.15/include/trace/events/latency_hist.h
  10655. --- linux-4.8.15.orig/include/trace/events/latency_hist.h 1970-01-01 01:00:00.000000000 +0100
  10656. +++ linux-4.8.15/include/trace/events/latency_hist.h 2017-01-01 17:07:15.987424124 +0100
  10657. @@ -0,0 +1,29 @@
  10658. +#ifndef _LATENCY_HIST_H
  10659. +#define _LATENCY_HIST_H
  10660. +
  10661. +enum hist_action {
  10662. + IRQS_ON,
  10663. + PREEMPT_ON,
  10664. + TRACE_STOP,
  10665. + IRQS_OFF,
  10666. + PREEMPT_OFF,
  10667. + TRACE_START,
  10668. +};
  10669. +
  10670. +static char *actions[] = {
  10671. + "IRQS_ON",
  10672. + "PREEMPT_ON",
  10673. + "TRACE_STOP",
  10674. + "IRQS_OFF",
  10675. + "PREEMPT_OFF",
  10676. + "TRACE_START",
  10677. +};
  10678. +
  10679. +static inline char *getaction(int action)
  10680. +{
  10681. + if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0]))
  10682. + return actions[action];
  10683. + return "unknown";
  10684. +}
  10685. +
  10686. +#endif /* _LATENCY_HIST_H */
  10687. diff -Nur linux-4.8.15.orig/init/Kconfig linux-4.8.15/init/Kconfig
  10688. --- linux-4.8.15.orig/init/Kconfig 2016-12-15 17:50:48.000000000 +0100
  10689. +++ linux-4.8.15/init/Kconfig 2017-01-01 17:07:15.987424124 +0100
  10690. @@ -496,7 +496,7 @@
  10691. config RCU_EXPERT
  10692. bool "Make expert-level adjustments to RCU configuration"
  10693. - default n
  10694. + default y if PREEMPT_RT_FULL
  10695. help
  10696. This option needs to be enabled if you wish to make
  10697. expert-level adjustments to RCU configuration. By default,
  10698. @@ -613,7 +613,7 @@
  10699. config RCU_FAST_NO_HZ
  10700. bool "Accelerate last non-dyntick-idle CPU's grace periods"
  10701. - depends on NO_HZ_COMMON && SMP && RCU_EXPERT
  10702. + depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL
  10703. default n
  10704. help
  10705. This option permits CPUs to enter dynticks-idle state even if
  10706. @@ -640,7 +640,7 @@
  10707. config RCU_BOOST
  10708. bool "Enable RCU priority boosting"
  10709. depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
  10710. - default n
  10711. + default y if PREEMPT_RT_FULL
  10712. help
  10713. This option boosts the priority of preempted RCU readers that
  10714. block the current preemptible RCU grace period for too long.
  10715. @@ -771,19 +771,6 @@
  10716. endchoice
  10717. -config RCU_EXPEDITE_BOOT
  10718. - bool
  10719. - default n
  10720. - help
  10721. - This option enables expedited grace periods at boot time,
  10722. - as if rcu_expedite_gp() had been invoked early in boot.
  10723. - The corresponding rcu_unexpedite_gp() is invoked from
  10724. - rcu_end_inkernel_boot(), which is intended to be invoked
  10725. - at the end of the kernel-only boot sequence, just before
  10726. - init is exec'ed.
  10727. -
  10728. - Accept the default if unsure.
  10729. -
  10730. endmenu # "RCU Subsystem"
  10731. config BUILD_BIN2C
  10732. @@ -1054,6 +1041,7 @@
  10733. config RT_GROUP_SCHED
  10734. bool "Group scheduling for SCHED_RR/FIFO"
  10735. depends on CGROUP_SCHED
  10736. + depends on !PREEMPT_RT_FULL
  10737. default n
  10738. help
  10739. This feature lets you explicitly allocate real CPU bandwidth
  10740. @@ -1761,6 +1749,7 @@
  10741. config SLAB
  10742. bool "SLAB"
  10743. + depends on !PREEMPT_RT_FULL
  10744. select HAVE_HARDENED_USERCOPY_ALLOCATOR
  10745. help
  10746. The regular slab allocator that is established and known to work
  10747. @@ -1781,6 +1770,7 @@
  10748. config SLOB
  10749. depends on EXPERT
  10750. bool "SLOB (Simple Allocator)"
  10751. + depends on !PREEMPT_RT_FULL
  10752. help
  10753. SLOB replaces the stock allocator with a drastically simpler
  10754. allocator. SLOB is generally more space efficient but
  10755. @@ -1799,7 +1789,7 @@
  10756. config SLUB_CPU_PARTIAL
  10757. default y
  10758. - depends on SLUB && SMP
  10759. + depends on SLUB && SMP && !PREEMPT_RT_FULL
  10760. bool "SLUB per cpu partial cache"
  10761. help
  10762. Per cpu partial caches accellerate objects allocation and freeing
  10763. diff -Nur linux-4.8.15.orig/init/main.c linux-4.8.15/init/main.c
  10764. --- linux-4.8.15.orig/init/main.c 2016-12-15 17:50:48.000000000 +0100
  10765. +++ linux-4.8.15/init/main.c 2017-01-01 17:07:15.991424381 +0100
  10766. @@ -507,6 +507,7 @@
  10767. setup_command_line(command_line);
  10768. setup_nr_cpu_ids();
  10769. setup_per_cpu_areas();
  10770. + softirq_early_init();
  10771. boot_cpu_state_init();
  10772. smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
  10773. diff -Nur linux-4.8.15.orig/init/Makefile linux-4.8.15/init/Makefile
  10774. --- linux-4.8.15.orig/init/Makefile 2016-12-15 17:50:48.000000000 +0100
  10775. +++ linux-4.8.15/init/Makefile 2017-01-01 17:07:15.991424381 +0100
  10776. @@ -33,4 +33,4 @@
  10777. include/generated/compile.h: FORCE
  10778. @$($(quiet)chk_compile.h)
  10779. $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
  10780. - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
  10781. + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)"
  10782. diff -Nur linux-4.8.15.orig/ipc/msg.c linux-4.8.15/ipc/msg.c
  10783. --- linux-4.8.15.orig/ipc/msg.c 2016-12-15 17:50:48.000000000 +0100
  10784. +++ linux-4.8.15/ipc/msg.c 2017-01-01 17:07:15.991424381 +0100
  10785. @@ -183,20 +183,14 @@
  10786. }
  10787. }
  10788. -static void expunge_all(struct msg_queue *msq, int res)
  10789. +static void expunge_all(struct msg_queue *msq, int res,
  10790. + struct wake_q_head *wake_q)
  10791. {
  10792. struct msg_receiver *msr, *t;
  10793. list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
  10794. - msr->r_msg = NULL; /* initialize expunge ordering */
  10795. - wake_up_process(msr->r_tsk);
  10796. - /*
  10797. - * Ensure that the wakeup is visible before setting r_msg as
  10798. - * the receiving end depends on it: either spinning on a nil,
  10799. - * or dealing with -EAGAIN cases. See lockless receive part 1
  10800. - * and 2 in do_msgrcv().
  10801. - */
  10802. - smp_wmb(); /* barrier (B) */
  10803. +
  10804. + wake_q_add(wake_q, msr->r_tsk);
  10805. msr->r_msg = ERR_PTR(res);
  10806. }
  10807. }
  10808. @@ -213,11 +207,13 @@
  10809. {
  10810. struct msg_msg *msg, *t;
  10811. struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
  10812. + WAKE_Q(wake_q);
  10813. - expunge_all(msq, -EIDRM);
  10814. + expunge_all(msq, -EIDRM, &wake_q);
  10815. ss_wakeup(&msq->q_senders, 1);
  10816. msg_rmid(ns, msq);
  10817. ipc_unlock_object(&msq->q_perm);
  10818. + wake_up_q(&wake_q);
  10819. rcu_read_unlock();
  10820. list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
  10821. @@ -342,6 +338,7 @@
  10822. struct kern_ipc_perm *ipcp;
  10823. struct msqid64_ds uninitialized_var(msqid64);
  10824. struct msg_queue *msq;
  10825. + WAKE_Q(wake_q);
  10826. int err;
  10827. if (cmd == IPC_SET) {
  10828. @@ -389,7 +386,7 @@
  10829. /* sleeping receivers might be excluded by
  10830. * stricter permissions.
  10831. */
  10832. - expunge_all(msq, -EAGAIN);
  10833. + expunge_all(msq, -EAGAIN, &wake_q);
  10834. /* sleeping senders might be able to send
  10835. * due to a larger queue size.
  10836. */
  10837. @@ -402,6 +399,7 @@
  10838. out_unlock0:
  10839. ipc_unlock_object(&msq->q_perm);
  10840. + wake_up_q(&wake_q);
  10841. out_unlock1:
  10842. rcu_read_unlock();
  10843. out_up:
  10844. @@ -566,7 +564,8 @@
  10845. return 0;
  10846. }
  10847. -static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
  10848. +static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
  10849. + struct wake_q_head *wake_q)
  10850. {
  10851. struct msg_receiver *msr, *t;
  10852. @@ -577,27 +576,13 @@
  10853. list_del(&msr->r_list);
  10854. if (msr->r_maxsize < msg->m_ts) {
  10855. - /* initialize pipelined send ordering */
  10856. - msr->r_msg = NULL;
  10857. - wake_up_process(msr->r_tsk);
  10858. - /* barrier (B) see barrier comment below */
  10859. - smp_wmb();
  10860. + wake_q_add(wake_q, msr->r_tsk);
  10861. msr->r_msg = ERR_PTR(-E2BIG);
  10862. } else {
  10863. - msr->r_msg = NULL;
  10864. msq->q_lrpid = task_pid_vnr(msr->r_tsk);
  10865. msq->q_rtime = get_seconds();
  10866. - wake_up_process(msr->r_tsk);
  10867. - /*
  10868. - * Ensure that the wakeup is visible before
  10869. - * setting r_msg, as the receiving can otherwise
  10870. - * exit - once r_msg is set, the receiver can
  10871. - * continue. See lockless receive part 1 and 2
  10872. - * in do_msgrcv(). Barrier (B).
  10873. - */
  10874. - smp_wmb();
  10875. + wake_q_add(wake_q, msr->r_tsk);
  10876. msr->r_msg = msg;
  10877. -
  10878. return 1;
  10879. }
  10880. }
  10881. @@ -613,6 +598,7 @@
  10882. struct msg_msg *msg;
  10883. int err;
  10884. struct ipc_namespace *ns;
  10885. + WAKE_Q(wake_q);
  10886. ns = current->nsproxy->ipc_ns;
  10887. @@ -698,7 +684,7 @@
  10888. msq->q_lspid = task_tgid_vnr(current);
  10889. msq->q_stime = get_seconds();
  10890. - if (!pipelined_send(msq, msg)) {
  10891. + if (!pipelined_send(msq, msg, &wake_q)) {
  10892. /* no one is waiting for this message, enqueue it */
  10893. list_add_tail(&msg->m_list, &msq->q_messages);
  10894. msq->q_cbytes += msgsz;
  10895. @@ -712,6 +698,7 @@
  10896. out_unlock0:
  10897. ipc_unlock_object(&msq->q_perm);
  10898. + wake_up_q(&wake_q);
  10899. out_unlock1:
  10900. rcu_read_unlock();
  10901. if (msg != NULL)
  10902. @@ -932,57 +919,25 @@
  10903. rcu_read_lock();
  10904. /* Lockless receive, part 2:
  10905. - * Wait until pipelined_send or expunge_all are outside of
  10906. - * wake_up_process(). There is a race with exit(), see
  10907. - * ipc/mqueue.c for the details. The correct serialization
  10908. - * ensures that a receiver cannot continue without the wakeup
  10909. - * being visibible _before_ setting r_msg:
  10910. + * The work in pipelined_send() and expunge_all():
  10911. + * - Set pointer to message
  10912. + * - Queue the receiver task for later wakeup
  10913. + * - Wake up the process after the lock is dropped.
  10914. *
  10915. - * CPU 0 CPU 1
  10916. - * <loop receiver>
  10917. - * smp_rmb(); (A) <-- pair -. <waker thread>
  10918. - * <load ->r_msg> | msr->r_msg = NULL;
  10919. - * | wake_up_process();
  10920. - * <continue> `------> smp_wmb(); (B)
  10921. - * msr->r_msg = msg;
  10922. - *
  10923. - * Where (A) orders the message value read and where (B) orders
  10924. - * the write to the r_msg -- done in both pipelined_send and
  10925. - * expunge_all.
  10926. + * Should the process wake up before this wakeup (due to a
  10927. + * signal) it will either see the message and continue …
  10928. */
  10929. - for (;;) {
  10930. - /*
  10931. - * Pairs with writer barrier in pipelined_send
  10932. - * or expunge_all.
  10933. - */
  10934. - smp_rmb(); /* barrier (A) */
  10935. - msg = (struct msg_msg *)msr_d.r_msg;
  10936. - if (msg)
  10937. - break;
  10938. - /*
  10939. - * The cpu_relax() call is a compiler barrier
  10940. - * which forces everything in this loop to be
  10941. - * re-loaded.
  10942. - */
  10943. - cpu_relax();
  10944. - }
  10945. -
  10946. - /* Lockless receive, part 3:
  10947. - * If there is a message or an error then accept it without
  10948. - * locking.
  10949. - */
  10950. + msg = (struct msg_msg *)msr_d.r_msg;
  10951. if (msg != ERR_PTR(-EAGAIN))
  10952. goto out_unlock1;
  10953. - /* Lockless receive, part 3:
  10954. - * Acquire the queue spinlock.
  10955. - */
  10956. + /*
  10957. + * … or see -EAGAIN, acquire the lock to check the message
  10958. + * again.
  10959. + */
  10960. ipc_lock_object(&msq->q_perm);
  10961. - /* Lockless receive, part 4:
  10962. - * Repeat test after acquiring the spinlock.
  10963. - */
  10964. msg = (struct msg_msg *)msr_d.r_msg;
  10965. if (msg != ERR_PTR(-EAGAIN))
  10966. goto out_unlock0;
  10967. diff -Nur linux-4.8.15.orig/ipc/sem.c linux-4.8.15/ipc/sem.c
  10968. --- linux-4.8.15.orig/ipc/sem.c 2016-12-15 17:50:48.000000000 +0100
  10969. +++ linux-4.8.15/ipc/sem.c 2017-01-01 17:07:15.991424381 +0100
  10970. @@ -712,6 +712,13 @@
  10971. static void wake_up_sem_queue_prepare(struct list_head *pt,
  10972. struct sem_queue *q, int error)
  10973. {
  10974. +#ifdef CONFIG_PREEMPT_RT_BASE
  10975. + struct task_struct *p = q->sleeper;
  10976. + get_task_struct(p);
  10977. + q->status = error;
  10978. + wake_up_process(p);
  10979. + put_task_struct(p);
  10980. +#else
  10981. if (list_empty(pt)) {
  10982. /*
  10983. * Hold preempt off so that we don't get preempted and have the
  10984. @@ -723,6 +730,7 @@
  10985. q->pid = error;
  10986. list_add_tail(&q->list, pt);
  10987. +#endif
  10988. }
  10989. /**
  10990. @@ -736,6 +744,7 @@
  10991. */
  10992. static void wake_up_sem_queue_do(struct list_head *pt)
  10993. {
  10994. +#ifndef CONFIG_PREEMPT_RT_BASE
  10995. struct sem_queue *q, *t;
  10996. int did_something;
  10997. @@ -748,6 +757,7 @@
  10998. }
  10999. if (did_something)
  11000. preempt_enable();
  11001. +#endif
  11002. }
  11003. static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
  11004. diff -Nur linux-4.8.15.orig/kernel/cgroup.c linux-4.8.15/kernel/cgroup.c
  11005. --- linux-4.8.15.orig/kernel/cgroup.c 2016-12-15 17:50:48.000000000 +0100
  11006. +++ linux-4.8.15/kernel/cgroup.c 2017-01-01 17:07:15.999424895 +0100
  11007. @@ -5027,10 +5027,10 @@
  11008. queue_work(cgroup_destroy_wq, &css->destroy_work);
  11009. }
  11010. -static void css_release_work_fn(struct work_struct *work)
  11011. +static void css_release_work_fn(struct swork_event *sev)
  11012. {
  11013. struct cgroup_subsys_state *css =
  11014. - container_of(work, struct cgroup_subsys_state, destroy_work);
  11015. + container_of(sev, struct cgroup_subsys_state, destroy_swork);
  11016. struct cgroup_subsys *ss = css->ss;
  11017. struct cgroup *cgrp = css->cgroup;
  11018. @@ -5071,8 +5071,8 @@
  11019. struct cgroup_subsys_state *css =
  11020. container_of(ref, struct cgroup_subsys_state, refcnt);
  11021. - INIT_WORK(&css->destroy_work, css_release_work_fn);
  11022. - queue_work(cgroup_destroy_wq, &css->destroy_work);
  11023. + INIT_SWORK(&css->destroy_swork, css_release_work_fn);
  11024. + swork_queue(&css->destroy_swork);
  11025. }
  11026. static void init_and_link_css(struct cgroup_subsys_state *css,
  11027. @@ -5716,6 +5716,7 @@
  11028. */
  11029. cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
  11030. BUG_ON(!cgroup_destroy_wq);
  11031. + BUG_ON(swork_get());
  11032. /*
  11033. * Used to destroy pidlists and separate to serve as flush domain.
  11034. diff -Nur linux-4.8.15.orig/kernel/cpu.c linux-4.8.15/kernel/cpu.c
  11035. --- linux-4.8.15.orig/kernel/cpu.c 2016-12-15 17:50:48.000000000 +0100
  11036. +++ linux-4.8.15/kernel/cpu.c 2017-01-01 17:07:15.999424895 +0100
  11037. @@ -152,8 +152,8 @@
  11038. #endif
  11039. } cpu_hotplug = {
  11040. .active_writer = NULL,
  11041. - .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
  11042. .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
  11043. + .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
  11044. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  11045. .dep_map = {.name = "cpu_hotplug.lock" },
  11046. #endif
  11047. @@ -166,6 +166,289 @@
  11048. #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
  11049. #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
  11050. +/**
  11051. + * hotplug_pcp - per cpu hotplug descriptor
  11052. + * @unplug: set when pin_current_cpu() needs to sync tasks
  11053. + * @sync_tsk: the task that waits for tasks to finish pinned sections
  11054. + * @refcount: counter of tasks in pinned sections
  11055. + * @grab_lock: set when the tasks entering pinned sections should wait
  11056. + * @synced: notifier for @sync_tsk to tell cpu_down it's finished
  11057. + * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
  11058. + * @mutex_init: zero if the mutex hasn't been initialized yet.
  11059. + *
  11060. + * Although @unplug and @sync_tsk may point to the same task, the @unplug
  11061. + * is used as a flag and still exists after @sync_tsk has exited and
  11062. + * @sync_tsk set to NULL.
  11063. + */
  11064. +struct hotplug_pcp {
  11065. + struct task_struct *unplug;
  11066. + struct task_struct *sync_tsk;
  11067. + int refcount;
  11068. + int grab_lock;
  11069. + struct completion synced;
  11070. + struct completion unplug_wait;
  11071. +#ifdef CONFIG_PREEMPT_RT_FULL
  11072. + /*
  11073. + * Note, on PREEMPT_RT, the hotplug lock must save the state of
  11074. + * the task, otherwise the mutex will cause the task to fail
  11075. + * to sleep when required. (Because it's called from migrate_disable())
  11076. + *
  11077. + * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
  11078. + * state.
  11079. + */
  11080. + spinlock_t lock;
  11081. +#else
  11082. + struct mutex mutex;
  11083. +#endif
  11084. + int mutex_init;
  11085. +};
  11086. +
  11087. +#ifdef CONFIG_PREEMPT_RT_FULL
  11088. +# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock)
  11089. +# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock)
  11090. +#else
  11091. +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
  11092. +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
  11093. +#endif
  11094. +
  11095. +static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
  11096. +
  11097. +/**
  11098. + * pin_current_cpu - Prevent the current cpu from being unplugged
  11099. + *
  11100. + * Lightweight version of get_online_cpus() to prevent cpu from being
  11101. + * unplugged when code runs in a migration disabled region.
  11102. + *
  11103. + * Must be called with preemption disabled (preempt_count = 1)!
  11104. + */
  11105. +void pin_current_cpu(void)
  11106. +{
  11107. + struct hotplug_pcp *hp;
  11108. + int force = 0;
  11109. +
  11110. +retry:
  11111. + hp = this_cpu_ptr(&hotplug_pcp);
  11112. +
  11113. + if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
  11114. + hp->unplug == current) {
  11115. + hp->refcount++;
  11116. + return;
  11117. + }
  11118. + if (hp->grab_lock) {
  11119. + preempt_enable();
  11120. + hotplug_lock(hp);
  11121. + hotplug_unlock(hp);
  11122. + } else {
  11123. + preempt_enable();
  11124. + /*
  11125. + * Try to push this task off of this CPU.
  11126. + */
  11127. + if (!migrate_me()) {
  11128. + preempt_disable();
  11129. + hp = this_cpu_ptr(&hotplug_pcp);
  11130. + if (!hp->grab_lock) {
  11131. + /*
  11132. + * Just let it continue it's already pinned
  11133. + * or about to sleep.
  11134. + */
  11135. + force = 1;
  11136. + goto retry;
  11137. + }
  11138. + preempt_enable();
  11139. + }
  11140. + }
  11141. + preempt_disable();
  11142. + goto retry;
  11143. +}
  11144. +
  11145. +/**
  11146. + * unpin_current_cpu - Allow unplug of current cpu
  11147. + *
  11148. + * Must be called with preemption or interrupts disabled!
  11149. + */
  11150. +void unpin_current_cpu(void)
  11151. +{
  11152. + struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
  11153. +
  11154. + WARN_ON(hp->refcount <= 0);
  11155. +
  11156. + /* This is safe. sync_unplug_thread is pinned to this cpu */
  11157. + if (!--hp->refcount && hp->unplug && hp->unplug != current)
  11158. + wake_up_process(hp->unplug);
  11159. +}
  11160. +
  11161. +static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
  11162. +{
  11163. + set_current_state(TASK_UNINTERRUPTIBLE);
  11164. + while (hp->refcount) {
  11165. + schedule_preempt_disabled();
  11166. + set_current_state(TASK_UNINTERRUPTIBLE);
  11167. + }
  11168. +}
  11169. +
  11170. +static int sync_unplug_thread(void *data)
  11171. +{
  11172. + struct hotplug_pcp *hp = data;
  11173. +
  11174. + wait_for_completion(&hp->unplug_wait);
  11175. + preempt_disable();
  11176. + hp->unplug = current;
  11177. + wait_for_pinned_cpus(hp);
  11178. +
  11179. + /*
  11180. + * This thread will synchronize the cpu_down() with threads
  11181. + * that have pinned the CPU. When the pinned CPU count reaches
  11182. + * zero, we inform the cpu_down code to continue to the next step.
  11183. + */
  11184. + set_current_state(TASK_UNINTERRUPTIBLE);
  11185. + preempt_enable();
  11186. + complete(&hp->synced);
  11187. +
  11188. + /*
  11189. + * If all succeeds, the next step will need tasks to wait till
  11190. + * the CPU is offline before continuing. To do this, the grab_lock
  11191. + * is set and tasks going into pin_current_cpu() will block on the
  11192. + * mutex. But we still need to wait for those that are already in
  11193. + * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
  11194. + * will kick this thread out.
  11195. + */
  11196. + while (!hp->grab_lock && !kthread_should_stop()) {
  11197. + schedule();
  11198. + set_current_state(TASK_UNINTERRUPTIBLE);
  11199. + }
  11200. +
  11201. + /* Make sure grab_lock is seen before we see a stale completion */
  11202. + smp_mb();
  11203. +
  11204. + /*
  11205. + * Now just before cpu_down() enters stop machine, we need to make
  11206. + * sure all tasks that are in pinned CPU sections are out, and new
  11207. + * tasks will now grab the lock, keeping them from entering pinned
  11208. + * CPU sections.
  11209. + */
  11210. + if (!kthread_should_stop()) {
  11211. + preempt_disable();
  11212. + wait_for_pinned_cpus(hp);
  11213. + preempt_enable();
  11214. + complete(&hp->synced);
  11215. + }
  11216. +
  11217. + set_current_state(TASK_UNINTERRUPTIBLE);
  11218. + while (!kthread_should_stop()) {
  11219. + schedule();
  11220. + set_current_state(TASK_UNINTERRUPTIBLE);
  11221. + }
  11222. + set_current_state(TASK_RUNNING);
  11223. +
  11224. + /*
  11225. + * Force this thread off this CPU as it's going down and
  11226. + * we don't want any more work on this CPU.
  11227. + */
  11228. + current->flags &= ~PF_NO_SETAFFINITY;
  11229. + set_cpus_allowed_ptr(current, cpu_present_mask);
  11230. + migrate_me();
  11231. + return 0;
  11232. +}
  11233. +
  11234. +static void __cpu_unplug_sync(struct hotplug_pcp *hp)
  11235. +{
  11236. + wake_up_process(hp->sync_tsk);
  11237. + wait_for_completion(&hp->synced);
  11238. +}
  11239. +
  11240. +static void __cpu_unplug_wait(unsigned int cpu)
  11241. +{
  11242. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  11243. +
  11244. + complete(&hp->unplug_wait);
  11245. + wait_for_completion(&hp->synced);
  11246. +}
  11247. +
  11248. +/*
  11249. + * Start the sync_unplug_thread on the target cpu and wait for it to
  11250. + * complete.
  11251. + */
  11252. +static int cpu_unplug_begin(unsigned int cpu)
  11253. +{
  11254. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  11255. + int err;
  11256. +
  11257. + /* Protected by cpu_hotplug.lock */
  11258. + if (!hp->mutex_init) {
  11259. +#ifdef CONFIG_PREEMPT_RT_FULL
  11260. + spin_lock_init(&hp->lock);
  11261. +#else
  11262. + mutex_init(&hp->mutex);
  11263. +#endif
  11264. + hp->mutex_init = 1;
  11265. + }
  11266. +
  11267. + /* Inform the scheduler to migrate tasks off this CPU */
  11268. + tell_sched_cpu_down_begin(cpu);
  11269. +
  11270. + init_completion(&hp->synced);
  11271. + init_completion(&hp->unplug_wait);
  11272. +
  11273. + hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
  11274. + if (IS_ERR(hp->sync_tsk)) {
  11275. + err = PTR_ERR(hp->sync_tsk);
  11276. + hp->sync_tsk = NULL;
  11277. + return err;
  11278. + }
  11279. + kthread_bind(hp->sync_tsk, cpu);
  11280. +
  11281. + /*
  11282. + * Wait for tasks to get out of the pinned sections,
  11283. + * it's still OK if new tasks enter. Some CPU notifiers will
  11284. + * wait for tasks that are going to enter these sections and
  11285. + * we must not have them block.
  11286. + */
  11287. + wake_up_process(hp->sync_tsk);
  11288. + return 0;
  11289. +}
  11290. +
  11291. +static void cpu_unplug_sync(unsigned int cpu)
  11292. +{
  11293. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  11294. +
  11295. + init_completion(&hp->synced);
  11296. + /* The completion needs to be initialzied before setting grab_lock */
  11297. + smp_wmb();
  11298. +
  11299. + /* Grab the mutex before setting grab_lock */
  11300. + hotplug_lock(hp);
  11301. + hp->grab_lock = 1;
  11302. +
  11303. + /*
  11304. + * The CPU notifiers have been completed.
  11305. + * Wait for tasks to get out of pinned CPU sections and have new
  11306. + * tasks block until the CPU is completely down.
  11307. + */
  11308. + __cpu_unplug_sync(hp);
  11309. +
  11310. + /* All done with the sync thread */
  11311. + kthread_stop(hp->sync_tsk);
  11312. + hp->sync_tsk = NULL;
  11313. +}
  11314. +
  11315. +static void cpu_unplug_done(unsigned int cpu)
  11316. +{
  11317. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  11318. +
  11319. + hp->unplug = NULL;
  11320. + /* Let all tasks know cpu unplug is finished before cleaning up */
  11321. + smp_wmb();
  11322. +
  11323. + if (hp->sync_tsk)
  11324. + kthread_stop(hp->sync_tsk);
  11325. +
  11326. + if (hp->grab_lock) {
  11327. + hotplug_unlock(hp);
  11328. + /* protected by cpu_hotplug.lock */
  11329. + hp->grab_lock = 0;
  11330. + }
  11331. + tell_sched_cpu_down_done(cpu);
  11332. +}
  11333. void get_online_cpus(void)
  11334. {
  11335. @@ -710,10 +993,14 @@
  11336. struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
  11337. int err;
  11338. + __cpu_unplug_wait(cpu);
  11339. /* Park the smpboot threads */
  11340. kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
  11341. smpboot_park_threads(cpu);
  11342. + /* Notifiers are done. Don't let any more tasks pin this CPU. */
  11343. + cpu_unplug_sync(cpu);
  11344. +
  11345. /*
  11346. * Prevent irq alloc/free while the dying cpu reorganizes the
  11347. * interrupt affinities.
  11348. @@ -799,6 +1086,9 @@
  11349. struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
  11350. int prev_state, ret = 0;
  11351. bool hasdied = false;
  11352. + int mycpu;
  11353. + cpumask_var_t cpumask;
  11354. + cpumask_var_t cpumask_org;
  11355. if (num_online_cpus() == 1)
  11356. return -EBUSY;
  11357. @@ -806,7 +1096,34 @@
  11358. if (!cpu_present(cpu))
  11359. return -EINVAL;
  11360. + /* Move the downtaker off the unplug cpu */
  11361. + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
  11362. + return -ENOMEM;
  11363. + if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) {
  11364. + free_cpumask_var(cpumask);
  11365. + return -ENOMEM;
  11366. + }
  11367. +
  11368. + cpumask_copy(cpumask_org, tsk_cpus_allowed(current));
  11369. + cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
  11370. + set_cpus_allowed_ptr(current, cpumask);
  11371. + free_cpumask_var(cpumask);
  11372. + migrate_disable();
  11373. + mycpu = smp_processor_id();
  11374. + if (mycpu == cpu) {
  11375. + printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
  11376. + migrate_enable();
  11377. + ret = -EBUSY;
  11378. + goto restore_cpus;
  11379. + }
  11380. +
  11381. + migrate_enable();
  11382. cpu_hotplug_begin();
  11383. + ret = cpu_unplug_begin(cpu);
  11384. + if (ret) {
  11385. + printk("cpu_unplug_begin(%d) failed\n", cpu);
  11386. + goto out_cancel;
  11387. + }
  11388. cpuhp_tasks_frozen = tasks_frozen;
  11389. @@ -845,10 +1162,15 @@
  11390. hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
  11391. out:
  11392. + cpu_unplug_done(cpu);
  11393. +out_cancel:
  11394. cpu_hotplug_done();
  11395. /* This post dead nonsense must die */
  11396. if (!ret && hasdied)
  11397. cpu_notify_nofail(CPU_POST_DEAD, cpu);
  11398. +restore_cpus:
  11399. + set_cpus_allowed_ptr(current, cpumask_org);
  11400. + free_cpumask_var(cpumask_org);
  11401. return ret;
  11402. }
  11403. diff -Nur linux-4.8.15.orig/kernel/debug/kdb/kdb_io.c linux-4.8.15/kernel/debug/kdb/kdb_io.c
  11404. --- linux-4.8.15.orig/kernel/debug/kdb/kdb_io.c 2016-12-15 17:50:48.000000000 +0100
  11405. +++ linux-4.8.15/kernel/debug/kdb/kdb_io.c 2017-01-01 17:07:16.003425149 +0100
  11406. @@ -554,7 +554,6 @@
  11407. int linecount;
  11408. int colcount;
  11409. int logging, saved_loglevel = 0;
  11410. - int saved_trap_printk;
  11411. int got_printf_lock = 0;
  11412. int retlen = 0;
  11413. int fnd, len;
  11414. @@ -565,8 +564,6 @@
  11415. unsigned long uninitialized_var(flags);
  11416. preempt_disable();
  11417. - saved_trap_printk = kdb_trap_printk;
  11418. - kdb_trap_printk = 0;
  11419. /* Serialize kdb_printf if multiple cpus try to write at once.
  11420. * But if any cpu goes recursive in kdb, just print the output,
  11421. @@ -855,7 +852,6 @@
  11422. } else {
  11423. __release(kdb_printf_lock);
  11424. }
  11425. - kdb_trap_printk = saved_trap_printk;
  11426. preempt_enable();
  11427. return retlen;
  11428. }
  11429. @@ -865,9 +861,11 @@
  11430. va_list ap;
  11431. int r;
  11432. + kdb_trap_printk++;
  11433. va_start(ap, fmt);
  11434. r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap);
  11435. va_end(ap);
  11436. + kdb_trap_printk--;
  11437. return r;
  11438. }
  11439. diff -Nur linux-4.8.15.orig/kernel/events/core.c linux-4.8.15/kernel/events/core.c
  11440. --- linux-4.8.15.orig/kernel/events/core.c 2016-12-15 17:50:48.000000000 +0100
  11441. +++ linux-4.8.15/kernel/events/core.c 2017-01-01 17:07:16.007425407 +0100
  11442. @@ -1042,6 +1042,7 @@
  11443. raw_spin_lock_init(&cpuctx->hrtimer_lock);
  11444. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
  11445. timer->function = perf_mux_hrtimer_handler;
  11446. + timer->irqsafe = 1;
  11447. }
  11448. static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
  11449. @@ -8217,6 +8218,7 @@
  11450. hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  11451. hwc->hrtimer.function = perf_swevent_hrtimer;
  11452. + hwc->hrtimer.irqsafe = 1;
  11453. /*
  11454. * Since hrtimers have a fixed rate, we can do a static freq->period
  11455. diff -Nur linux-4.8.15.orig/kernel/exit.c linux-4.8.15/kernel/exit.c
  11456. --- linux-4.8.15.orig/kernel/exit.c 2016-12-15 17:50:48.000000000 +0100
  11457. +++ linux-4.8.15/kernel/exit.c 2017-01-01 17:07:16.011425679 +0100
  11458. @@ -143,7 +143,7 @@
  11459. * Do this under ->siglock, we can race with another thread
  11460. * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
  11461. */
  11462. - flush_sigqueue(&tsk->pending);
  11463. + flush_task_sigqueue(tsk);
  11464. tsk->sighand = NULL;
  11465. spin_unlock(&sighand->siglock);
  11466. diff -Nur linux-4.8.15.orig/kernel/fork.c linux-4.8.15/kernel/fork.c
  11467. --- linux-4.8.15.orig/kernel/fork.c 2016-12-15 17:50:48.000000000 +0100
  11468. +++ linux-4.8.15/kernel/fork.c 2017-01-01 17:07:16.011425679 +0100
  11469. @@ -251,7 +251,9 @@
  11470. if (atomic_dec_and_test(&sig->sigcnt))
  11471. free_signal_struct(sig);
  11472. }
  11473. -
  11474. +#ifdef CONFIG_PREEMPT_RT_BASE
  11475. +static
  11476. +#endif
  11477. void __put_task_struct(struct task_struct *tsk)
  11478. {
  11479. WARN_ON(!tsk->exit_state);
  11480. @@ -268,7 +270,18 @@
  11481. if (!profile_handoff_task(tsk))
  11482. free_task(tsk);
  11483. }
  11484. +#ifndef CONFIG_PREEMPT_RT_BASE
  11485. EXPORT_SYMBOL_GPL(__put_task_struct);
  11486. +#else
  11487. +void __put_task_struct_cb(struct rcu_head *rhp)
  11488. +{
  11489. + struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu);
  11490. +
  11491. + __put_task_struct(tsk);
  11492. +
  11493. +}
  11494. +EXPORT_SYMBOL_GPL(__put_task_struct_cb);
  11495. +#endif
  11496. void __init __weak arch_task_cache_init(void) { }
  11497. @@ -702,6 +715,19 @@
  11498. }
  11499. EXPORT_SYMBOL_GPL(__mmdrop);
  11500. +#ifdef CONFIG_PREEMPT_RT_BASE
  11501. +/*
  11502. + * RCU callback for delayed mm drop. Not strictly rcu, but we don't
  11503. + * want another facility to make this work.
  11504. + */
  11505. +void __mmdrop_delayed(struct rcu_head *rhp)
  11506. +{
  11507. + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
  11508. +
  11509. + __mmdrop(mm);
  11510. +}
  11511. +#endif
  11512. +
  11513. static inline void __mmput(struct mm_struct *mm)
  11514. {
  11515. VM_BUG_ON(atomic_read(&mm->mm_users));
  11516. @@ -1274,6 +1300,9 @@
  11517. */
  11518. static void posix_cpu_timers_init(struct task_struct *tsk)
  11519. {
  11520. +#ifdef CONFIG_PREEMPT_RT_BASE
  11521. + tsk->posix_timer_list = NULL;
  11522. +#endif
  11523. tsk->cputime_expires.prof_exp = 0;
  11524. tsk->cputime_expires.virt_exp = 0;
  11525. tsk->cputime_expires.sched_exp = 0;
  11526. @@ -1399,6 +1428,7 @@
  11527. spin_lock_init(&p->alloc_lock);
  11528. init_sigpending(&p->pending);
  11529. + p->sigqueue_cache = NULL;
  11530. p->utime = p->stime = p->gtime = 0;
  11531. p->utimescaled = p->stimescaled = 0;
  11532. diff -Nur linux-4.8.15.orig/kernel/futex.c linux-4.8.15/kernel/futex.c
  11533. --- linux-4.8.15.orig/kernel/futex.c 2016-12-15 17:50:48.000000000 +0100
  11534. +++ linux-4.8.15/kernel/futex.c 2017-01-01 17:07:16.015425924 +0100
  11535. @@ -895,7 +895,9 @@
  11536. * task still owns the PI-state:
  11537. */
  11538. if (head->next != next) {
  11539. + raw_spin_unlock_irq(&curr->pi_lock);
  11540. spin_unlock(&hb->lock);
  11541. + raw_spin_lock_irq(&curr->pi_lock);
  11542. continue;
  11543. }
  11544. @@ -1290,6 +1292,7 @@
  11545. struct futex_pi_state *pi_state = this->pi_state;
  11546. u32 uninitialized_var(curval), newval;
  11547. WAKE_Q(wake_q);
  11548. + WAKE_Q(wake_sleeper_q);
  11549. bool deboost;
  11550. int ret = 0;
  11551. @@ -1356,7 +1359,8 @@
  11552. raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
  11553. - deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
  11554. + deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
  11555. + &wake_sleeper_q);
  11556. /*
  11557. * First unlock HB so the waiter does not spin on it once he got woken
  11558. @@ -1364,8 +1368,9 @@
  11559. * deboost first (and lose our higher priority), then the task might get
  11560. * scheduled away before the wake up can take place.
  11561. */
  11562. - spin_unlock(&hb->lock);
  11563. + deboost |= spin_unlock_no_deboost(&hb->lock);
  11564. wake_up_q(&wake_q);
  11565. + wake_up_q_sleeper(&wake_sleeper_q);
  11566. if (deboost)
  11567. rt_mutex_adjust_prio(current);
  11568. @@ -1915,6 +1920,16 @@
  11569. requeue_pi_wake_futex(this, &key2, hb2);
  11570. drop_count++;
  11571. continue;
  11572. + } else if (ret == -EAGAIN) {
  11573. + /*
  11574. + * Waiter was woken by timeout or
  11575. + * signal and has set pi_blocked_on to
  11576. + * PI_WAKEUP_INPROGRESS before we
  11577. + * tried to enqueue it on the rtmutex.
  11578. + */
  11579. + this->pi_state = NULL;
  11580. + put_pi_state(pi_state);
  11581. + continue;
  11582. } else if (ret) {
  11583. /*
  11584. * rt_mutex_start_proxy_lock() detected a
  11585. @@ -2805,7 +2820,7 @@
  11586. struct hrtimer_sleeper timeout, *to = NULL;
  11587. struct rt_mutex_waiter rt_waiter;
  11588. struct rt_mutex *pi_mutex = NULL;
  11589. - struct futex_hash_bucket *hb;
  11590. + struct futex_hash_bucket *hb, *hb2;
  11591. union futex_key key2 = FUTEX_KEY_INIT;
  11592. struct futex_q q = futex_q_init;
  11593. int res, ret;
  11594. @@ -2830,10 +2845,7 @@
  11595. * The waiter is allocated on our stack, manipulated by the requeue
  11596. * code while we sleep on uaddr.
  11597. */
  11598. - debug_rt_mutex_init_waiter(&rt_waiter);
  11599. - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
  11600. - RB_CLEAR_NODE(&rt_waiter.tree_entry);
  11601. - rt_waiter.task = NULL;
  11602. + rt_mutex_init_waiter(&rt_waiter, false);
  11603. ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
  11604. if (unlikely(ret != 0))
  11605. @@ -2864,20 +2876,55 @@
  11606. /* Queue the futex_q, drop the hb lock, wait for wakeup. */
  11607. futex_wait_queue_me(hb, &q, to);
  11608. - spin_lock(&hb->lock);
  11609. - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  11610. - spin_unlock(&hb->lock);
  11611. - if (ret)
  11612. - goto out_put_keys;
  11613. + /*
  11614. + * On RT we must avoid races with requeue and trying to block
  11615. + * on two mutexes (hb->lock and uaddr2's rtmutex) by
  11616. + * serializing access to pi_blocked_on with pi_lock.
  11617. + */
  11618. + raw_spin_lock_irq(&current->pi_lock);
  11619. + if (current->pi_blocked_on) {
  11620. + /*
  11621. + * We have been requeued or are in the process of
  11622. + * being requeued.
  11623. + */
  11624. + raw_spin_unlock_irq(&current->pi_lock);
  11625. + } else {
  11626. + /*
  11627. + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS
  11628. + * prevents a concurrent requeue from moving us to the
  11629. + * uaddr2 rtmutex. After that we can safely acquire
  11630. + * (and possibly block on) hb->lock.
  11631. + */
  11632. + current->pi_blocked_on = PI_WAKEUP_INPROGRESS;
  11633. + raw_spin_unlock_irq(&current->pi_lock);
  11634. +
  11635. + spin_lock(&hb->lock);
  11636. +
  11637. + /*
  11638. + * Clean up pi_blocked_on. We might leak it otherwise
  11639. + * when we succeeded with the hb->lock in the fast
  11640. + * path.
  11641. + */
  11642. + raw_spin_lock_irq(&current->pi_lock);
  11643. + current->pi_blocked_on = NULL;
  11644. + raw_spin_unlock_irq(&current->pi_lock);
  11645. +
  11646. + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  11647. + spin_unlock(&hb->lock);
  11648. + if (ret)
  11649. + goto out_put_keys;
  11650. + }
  11651. /*
  11652. - * In order for us to be here, we know our q.key == key2, and since
  11653. - * we took the hb->lock above, we also know that futex_requeue() has
  11654. - * completed and we no longer have to concern ourselves with a wakeup
  11655. - * race with the atomic proxy lock acquisition by the requeue code. The
  11656. - * futex_requeue dropped our key1 reference and incremented our key2
  11657. - * reference count.
  11658. + * In order to be here, we have either been requeued, are in
  11659. + * the process of being requeued, or requeue successfully
  11660. + * acquired uaddr2 on our behalf. If pi_blocked_on was
  11661. + * non-null above, we may be racing with a requeue. Do not
  11662. + * rely on q->lock_ptr to be hb2->lock until after blocking on
  11663. + * hb->lock or hb2->lock. The futex_requeue dropped our key1
  11664. + * reference and incremented our key2 reference count.
  11665. */
  11666. + hb2 = hash_futex(&key2);
  11667. /* Check if the requeue code acquired the second futex for us. */
  11668. if (!q.rt_waiter) {
  11669. @@ -2886,14 +2933,15 @@
  11670. * did a lock-steal - fix up the PI-state in that case.
  11671. */
  11672. if (q.pi_state && (q.pi_state->owner != current)) {
  11673. - spin_lock(q.lock_ptr);
  11674. + spin_lock(&hb2->lock);
  11675. + BUG_ON(&hb2->lock != q.lock_ptr);
  11676. ret = fixup_pi_state_owner(uaddr2, &q, current);
  11677. /*
  11678. * Drop the reference to the pi state which
  11679. * the requeue_pi() code acquired for us.
  11680. */
  11681. put_pi_state(q.pi_state);
  11682. - spin_unlock(q.lock_ptr);
  11683. + spin_unlock(&hb2->lock);
  11684. }
  11685. } else {
  11686. /*
  11687. @@ -2906,7 +2954,8 @@
  11688. ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
  11689. debug_rt_mutex_free_waiter(&rt_waiter);
  11690. - spin_lock(q.lock_ptr);
  11691. + spin_lock(&hb2->lock);
  11692. + BUG_ON(&hb2->lock != q.lock_ptr);
  11693. /*
  11694. * Fixup the pi_state owner and possibly acquire the lock if we
  11695. * haven't already.
  11696. diff -Nur linux-4.8.15.orig/kernel/irq/handle.c linux-4.8.15/kernel/irq/handle.c
  11697. --- linux-4.8.15.orig/kernel/irq/handle.c 2016-12-15 17:50:48.000000000 +0100
  11698. +++ linux-4.8.15/kernel/irq/handle.c 2017-01-01 17:07:16.015425924 +0100
  11699. @@ -181,10 +181,16 @@
  11700. {
  11701. irqreturn_t retval;
  11702. unsigned int flags = 0;
  11703. + struct pt_regs *regs = get_irq_regs();
  11704. + u64 ip = regs ? instruction_pointer(regs) : 0;
  11705. retval = __handle_irq_event_percpu(desc, &flags);
  11706. - add_interrupt_randomness(desc->irq_data.irq, flags);
  11707. +#ifdef CONFIG_PREEMPT_RT_FULL
  11708. + desc->random_ip = ip;
  11709. +#else
  11710. + add_interrupt_randomness(desc->irq_data.irq, flags, ip);
  11711. +#endif
  11712. if (!noirqdebug)
  11713. note_interrupt(desc, retval);
  11714. diff -Nur linux-4.8.15.orig/kernel/irq/manage.c linux-4.8.15/kernel/irq/manage.c
  11715. --- linux-4.8.15.orig/kernel/irq/manage.c 2016-12-15 17:50:48.000000000 +0100
  11716. +++ linux-4.8.15/kernel/irq/manage.c 2017-01-01 17:07:16.015425924 +0100
  11717. @@ -22,6 +22,7 @@
  11718. #include "internals.h"
  11719. #ifdef CONFIG_IRQ_FORCED_THREADING
  11720. +# ifndef CONFIG_PREEMPT_RT_BASE
  11721. __read_mostly bool force_irqthreads;
  11722. static int __init setup_forced_irqthreads(char *arg)
  11723. @@ -30,6 +31,7 @@
  11724. return 0;
  11725. }
  11726. early_param("threadirqs", setup_forced_irqthreads);
  11727. +# endif
  11728. #endif
  11729. static void __synchronize_hardirq(struct irq_desc *desc)
  11730. @@ -233,7 +235,12 @@
  11731. if (desc->affinity_notify) {
  11732. kref_get(&desc->affinity_notify->kref);
  11733. +
  11734. +#ifdef CONFIG_PREEMPT_RT_BASE
  11735. + swork_queue(&desc->affinity_notify->swork);
  11736. +#else
  11737. schedule_work(&desc->affinity_notify->work);
  11738. +#endif
  11739. }
  11740. irqd_set(data, IRQD_AFFINITY_SET);
  11741. @@ -271,10 +278,8 @@
  11742. }
  11743. EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
  11744. -static void irq_affinity_notify(struct work_struct *work)
  11745. +static void _irq_affinity_notify(struct irq_affinity_notify *notify)
  11746. {
  11747. - struct irq_affinity_notify *notify =
  11748. - container_of(work, struct irq_affinity_notify, work);
  11749. struct irq_desc *desc = irq_to_desc(notify->irq);
  11750. cpumask_var_t cpumask;
  11751. unsigned long flags;
  11752. @@ -296,6 +301,35 @@
  11753. kref_put(&notify->kref, notify->release);
  11754. }
  11755. +#ifdef CONFIG_PREEMPT_RT_BASE
  11756. +static void init_helper_thread(void)
  11757. +{
  11758. + static int init_sworker_once;
  11759. +
  11760. + if (init_sworker_once)
  11761. + return;
  11762. + if (WARN_ON(swork_get()))
  11763. + return;
  11764. + init_sworker_once = 1;
  11765. +}
  11766. +
  11767. +static void irq_affinity_notify(struct swork_event *swork)
  11768. +{
  11769. + struct irq_affinity_notify *notify =
  11770. + container_of(swork, struct irq_affinity_notify, swork);
  11771. + _irq_affinity_notify(notify);
  11772. +}
  11773. +
  11774. +#else
  11775. +
  11776. +static void irq_affinity_notify(struct work_struct *work)
  11777. +{
  11778. + struct irq_affinity_notify *notify =
  11779. + container_of(work, struct irq_affinity_notify, work);
  11780. + _irq_affinity_notify(notify);
  11781. +}
  11782. +#endif
  11783. +
  11784. /**
  11785. * irq_set_affinity_notifier - control notification of IRQ affinity changes
  11786. * @irq: Interrupt for which to enable/disable notification
  11787. @@ -324,7 +358,12 @@
  11788. if (notify) {
  11789. notify->irq = irq;
  11790. kref_init(&notify->kref);
  11791. +#ifdef CONFIG_PREEMPT_RT_BASE
  11792. + INIT_SWORK(&notify->swork, irq_affinity_notify);
  11793. + init_helper_thread();
  11794. +#else
  11795. INIT_WORK(&notify->work, irq_affinity_notify);
  11796. +#endif
  11797. }
  11798. raw_spin_lock_irqsave(&desc->lock, flags);
  11799. @@ -879,7 +918,15 @@
  11800. local_bh_disable();
  11801. ret = action->thread_fn(action->irq, action->dev_id);
  11802. irq_finalize_oneshot(desc, action);
  11803. - local_bh_enable();
  11804. + /*
  11805. + * Interrupts which have real time requirements can be set up
  11806. + * to avoid softirq processing in the thread handler. This is
  11807. + * safe as these interrupts do not raise soft interrupts.
  11808. + */
  11809. + if (irq_settings_no_softirq_call(desc))
  11810. + _local_bh_enable();
  11811. + else
  11812. + local_bh_enable();
  11813. return ret;
  11814. }
  11815. @@ -976,6 +1023,12 @@
  11816. if (action_ret == IRQ_WAKE_THREAD)
  11817. irq_wake_secondary(desc, action);
  11818. +#ifdef CONFIG_PREEMPT_RT_FULL
  11819. + migrate_disable();
  11820. + add_interrupt_randomness(action->irq, 0,
  11821. + desc->random_ip ^ (unsigned long) action);
  11822. + migrate_enable();
  11823. +#endif
  11824. wake_threads_waitq(desc);
  11825. }
  11826. @@ -1336,6 +1389,9 @@
  11827. irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
  11828. }
  11829. + if (new->flags & IRQF_NO_SOFTIRQ_CALL)
  11830. + irq_settings_set_no_softirq_call(desc);
  11831. +
  11832. /* Set default affinity mask once everything is setup */
  11833. setup_affinity(desc, mask);
  11834. @@ -2061,7 +2117,7 @@
  11835. * This call sets the internal irqchip state of an interrupt,
  11836. * depending on the value of @which.
  11837. *
  11838. - * This function should be called with preemption disabled if the
  11839. + * This function should be called with migration disabled if the
  11840. * interrupt controller has per-cpu registers.
  11841. */
  11842. int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
  11843. diff -Nur linux-4.8.15.orig/kernel/irq/settings.h linux-4.8.15/kernel/irq/settings.h
  11844. --- linux-4.8.15.orig/kernel/irq/settings.h 2016-12-15 17:50:48.000000000 +0100
  11845. +++ linux-4.8.15/kernel/irq/settings.h 2017-01-01 17:07:16.015425924 +0100
  11846. @@ -16,6 +16,7 @@
  11847. _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID,
  11848. _IRQ_IS_POLLED = IRQ_IS_POLLED,
  11849. _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY,
  11850. + _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL,
  11851. _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
  11852. };
  11853. @@ -30,6 +31,7 @@
  11854. #define IRQ_PER_CPU_DEVID GOT_YOU_MORON
  11855. #define IRQ_IS_POLLED GOT_YOU_MORON
  11856. #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON
  11857. +#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON
  11858. #undef IRQF_MODIFY_MASK
  11859. #define IRQF_MODIFY_MASK GOT_YOU_MORON
  11860. @@ -40,6 +42,16 @@
  11861. desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
  11862. }
  11863. +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc)
  11864. +{
  11865. + return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL;
  11866. +}
  11867. +
  11868. +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc)
  11869. +{
  11870. + desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL;
  11871. +}
  11872. +
  11873. static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
  11874. {
  11875. return desc->status_use_accessors & _IRQ_PER_CPU;
  11876. diff -Nur linux-4.8.15.orig/kernel/irq/spurious.c linux-4.8.15/kernel/irq/spurious.c
  11877. --- linux-4.8.15.orig/kernel/irq/spurious.c 2016-12-15 17:50:48.000000000 +0100
  11878. +++ linux-4.8.15/kernel/irq/spurious.c 2017-01-01 17:07:16.015425924 +0100
  11879. @@ -442,6 +442,10 @@
  11880. static int __init irqfixup_setup(char *str)
  11881. {
  11882. +#ifdef CONFIG_PREEMPT_RT_BASE
  11883. + pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
  11884. + return 1;
  11885. +#endif
  11886. irqfixup = 1;
  11887. printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
  11888. printk(KERN_WARNING "This may impact system performance.\n");
  11889. @@ -454,6 +458,10 @@
  11890. static int __init irqpoll_setup(char *str)
  11891. {
  11892. +#ifdef CONFIG_PREEMPT_RT_BASE
  11893. + pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
  11894. + return 1;
  11895. +#endif
  11896. irqfixup = 2;
  11897. printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
  11898. "enabled\n");
  11899. diff -Nur linux-4.8.15.orig/kernel/irq_work.c linux-4.8.15/kernel/irq_work.c
  11900. --- linux-4.8.15.orig/kernel/irq_work.c 2016-12-15 17:50:48.000000000 +0100
  11901. +++ linux-4.8.15/kernel/irq_work.c 2017-01-01 17:07:16.019426176 +0100
  11902. @@ -17,6 +17,7 @@
  11903. #include <linux/cpu.h>
  11904. #include <linux/notifier.h>
  11905. #include <linux/smp.h>
  11906. +#include <linux/interrupt.h>
  11907. #include <asm/processor.h>
  11908. @@ -65,6 +66,8 @@
  11909. */
  11910. bool irq_work_queue_on(struct irq_work *work, int cpu)
  11911. {
  11912. + struct llist_head *list;
  11913. +
  11914. /* All work should have been flushed before going offline */
  11915. WARN_ON_ONCE(cpu_is_offline(cpu));
  11916. @@ -75,7 +78,12 @@
  11917. if (!irq_work_claim(work))
  11918. return false;
  11919. - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
  11920. + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ))
  11921. + list = &per_cpu(lazy_list, cpu);
  11922. + else
  11923. + list = &per_cpu(raised_list, cpu);
  11924. +
  11925. + if (llist_add(&work->llnode, list))
  11926. arch_send_call_function_single_ipi(cpu);
  11927. return true;
  11928. @@ -86,6 +94,9 @@
  11929. /* Enqueue the irq work @work on the current CPU */
  11930. bool irq_work_queue(struct irq_work *work)
  11931. {
  11932. + struct llist_head *list;
  11933. + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL);
  11934. +
  11935. /* Only queue if not already pending */
  11936. if (!irq_work_claim(work))
  11937. return false;
  11938. @@ -93,13 +104,15 @@
  11939. /* Queue the entry and raise the IPI if needed. */
  11940. preempt_disable();
  11941. - /* If the work is "lazy", handle it from next tick if any */
  11942. - if (work->flags & IRQ_WORK_LAZY) {
  11943. - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
  11944. - tick_nohz_tick_stopped())
  11945. - arch_irq_work_raise();
  11946. - } else {
  11947. - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
  11948. + lazy_work = work->flags & IRQ_WORK_LAZY;
  11949. +
  11950. + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ)))
  11951. + list = this_cpu_ptr(&lazy_list);
  11952. + else
  11953. + list = this_cpu_ptr(&raised_list);
  11954. +
  11955. + if (llist_add(&work->llnode, list)) {
  11956. + if (!lazy_work || tick_nohz_tick_stopped())
  11957. arch_irq_work_raise();
  11958. }
  11959. @@ -116,9 +129,8 @@
  11960. raised = this_cpu_ptr(&raised_list);
  11961. lazy = this_cpu_ptr(&lazy_list);
  11962. - if (llist_empty(raised) || arch_irq_work_has_interrupt())
  11963. - if (llist_empty(lazy))
  11964. - return false;
  11965. + if (llist_empty(raised) && llist_empty(lazy))
  11966. + return false;
  11967. /* All work should have been flushed before going offline */
  11968. WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
  11969. @@ -132,7 +144,7 @@
  11970. struct irq_work *work;
  11971. struct llist_node *llnode;
  11972. - BUG_ON(!irqs_disabled());
  11973. + BUG_ON_NONRT(!irqs_disabled());
  11974. if (llist_empty(list))
  11975. return;
  11976. @@ -169,7 +181,16 @@
  11977. void irq_work_run(void)
  11978. {
  11979. irq_work_run_list(this_cpu_ptr(&raised_list));
  11980. - irq_work_run_list(this_cpu_ptr(&lazy_list));
  11981. + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) {
  11982. + /*
  11983. + * NOTE: we raise softirq via IPI for safety,
  11984. + * and execute in irq_work_tick() to move the
  11985. + * overhead from hard to soft irq context.
  11986. + */
  11987. + if (!llist_empty(this_cpu_ptr(&lazy_list)))
  11988. + raise_softirq(TIMER_SOFTIRQ);
  11989. + } else
  11990. + irq_work_run_list(this_cpu_ptr(&lazy_list));
  11991. }
  11992. EXPORT_SYMBOL_GPL(irq_work_run);
  11993. @@ -179,8 +200,17 @@
  11994. if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
  11995. irq_work_run_list(raised);
  11996. +
  11997. + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
  11998. + irq_work_run_list(this_cpu_ptr(&lazy_list));
  11999. +}
  12000. +
  12001. +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
  12002. +void irq_work_tick_soft(void)
  12003. +{
  12004. irq_work_run_list(this_cpu_ptr(&lazy_list));
  12005. }
  12006. +#endif
  12007. /*
  12008. * Synchronize against the irq_work @entry, ensures the entry is not
  12009. diff -Nur linux-4.8.15.orig/kernel/Kconfig.locks linux-4.8.15/kernel/Kconfig.locks
  12010. --- linux-4.8.15.orig/kernel/Kconfig.locks 2016-12-15 17:50:48.000000000 +0100
  12011. +++ linux-4.8.15/kernel/Kconfig.locks 2017-01-01 17:07:15.995424645 +0100
  12012. @@ -225,11 +225,11 @@
  12013. config MUTEX_SPIN_ON_OWNER
  12014. def_bool y
  12015. - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
  12016. + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
  12017. config RWSEM_SPIN_ON_OWNER
  12018. def_bool y
  12019. - depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
  12020. + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
  12021. config LOCK_SPIN_ON_OWNER
  12022. def_bool y
  12023. diff -Nur linux-4.8.15.orig/kernel/Kconfig.preempt linux-4.8.15/kernel/Kconfig.preempt
  12024. --- linux-4.8.15.orig/kernel/Kconfig.preempt 2016-12-15 17:50:48.000000000 +0100
  12025. +++ linux-4.8.15/kernel/Kconfig.preempt 2017-01-01 17:07:15.995424645 +0100
  12026. @@ -1,3 +1,16 @@
  12027. +config PREEMPT
  12028. + bool
  12029. + select PREEMPT_COUNT
  12030. +
  12031. +config PREEMPT_RT_BASE
  12032. + bool
  12033. + select PREEMPT
  12034. +
  12035. +config HAVE_PREEMPT_LAZY
  12036. + bool
  12037. +
  12038. +config PREEMPT_LAZY
  12039. + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL
  12040. choice
  12041. prompt "Preemption Model"
  12042. @@ -33,9 +46,9 @@
  12043. Select this if you are building a kernel for a desktop system.
  12044. -config PREEMPT
  12045. +config PREEMPT__LL
  12046. bool "Preemptible Kernel (Low-Latency Desktop)"
  12047. - select PREEMPT_COUNT
  12048. + select PREEMPT
  12049. select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
  12050. help
  12051. This option reduces the latency of the kernel by making
  12052. @@ -52,6 +65,22 @@
  12053. embedded system with latency requirements in the milliseconds
  12054. range.
  12055. +config PREEMPT_RTB
  12056. + bool "Preemptible Kernel (Basic RT)"
  12057. + select PREEMPT_RT_BASE
  12058. + help
  12059. + This option is basically the same as (Low-Latency Desktop) but
  12060. + enables changes which are preliminary for the full preemptible
  12061. + RT kernel.
  12062. +
  12063. +config PREEMPT_RT_FULL
  12064. + bool "Fully Preemptible Kernel (RT)"
  12065. + depends on IRQ_FORCED_THREADING
  12066. + select PREEMPT_RT_BASE
  12067. + select PREEMPT_RCU
  12068. + help
  12069. + All and everything
  12070. +
  12071. endchoice
  12072. config PREEMPT_COUNT
  12073. diff -Nur linux-4.8.15.orig/kernel/ksysfs.c linux-4.8.15/kernel/ksysfs.c
  12074. --- linux-4.8.15.orig/kernel/ksysfs.c 2016-12-15 17:50:48.000000000 +0100
  12075. +++ linux-4.8.15/kernel/ksysfs.c 2017-01-01 17:07:16.019426176 +0100
  12076. @@ -136,6 +136,15 @@
  12077. #endif /* CONFIG_KEXEC_CORE */
  12078. +#if defined(CONFIG_PREEMPT_RT_FULL)
  12079. +static ssize_t realtime_show(struct kobject *kobj,
  12080. + struct kobj_attribute *attr, char *buf)
  12081. +{
  12082. + return sprintf(buf, "%d\n", 1);
  12083. +}
  12084. +KERNEL_ATTR_RO(realtime);
  12085. +#endif
  12086. +
  12087. /* whether file capabilities are enabled */
  12088. static ssize_t fscaps_show(struct kobject *kobj,
  12089. struct kobj_attribute *attr, char *buf)
  12090. @@ -225,6 +234,9 @@
  12091. &rcu_expedited_attr.attr,
  12092. &rcu_normal_attr.attr,
  12093. #endif
  12094. +#ifdef CONFIG_PREEMPT_RT_FULL
  12095. + &realtime_attr.attr,
  12096. +#endif
  12097. NULL
  12098. };
  12099. diff -Nur linux-4.8.15.orig/kernel/locking/lglock.c linux-4.8.15/kernel/locking/lglock.c
  12100. --- linux-4.8.15.orig/kernel/locking/lglock.c 2016-12-15 17:50:48.000000000 +0100
  12101. +++ linux-4.8.15/kernel/locking/lglock.c 2017-01-01 17:07:16.019426176 +0100
  12102. @@ -4,6 +4,15 @@
  12103. #include <linux/cpu.h>
  12104. #include <linux/string.h>
  12105. +#ifndef CONFIG_PREEMPT_RT_FULL
  12106. +# define lg_lock_ptr arch_spinlock_t
  12107. +# define lg_do_lock(l) arch_spin_lock(l)
  12108. +# define lg_do_unlock(l) arch_spin_unlock(l)
  12109. +#else
  12110. +# define lg_lock_ptr struct rt_mutex
  12111. +# define lg_do_lock(l) __rt_spin_lock__no_mg(l)
  12112. +# define lg_do_unlock(l) __rt_spin_unlock(l)
  12113. +#endif
  12114. /*
  12115. * Note there is no uninit, so lglocks cannot be defined in
  12116. * modules (but it's fine to use them from there)
  12117. @@ -12,51 +21,60 @@
  12118. void lg_lock_init(struct lglock *lg, char *name)
  12119. {
  12120. +#ifdef CONFIG_PREEMPT_RT_FULL
  12121. + int i;
  12122. +
  12123. + for_each_possible_cpu(i) {
  12124. + struct rt_mutex *lock = per_cpu_ptr(lg->lock, i);
  12125. +
  12126. + rt_mutex_init(lock);
  12127. + }
  12128. +#endif
  12129. LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
  12130. }
  12131. EXPORT_SYMBOL(lg_lock_init);
  12132. void lg_local_lock(struct lglock *lg)
  12133. {
  12134. - arch_spinlock_t *lock;
  12135. + lg_lock_ptr *lock;
  12136. - preempt_disable();
  12137. + migrate_disable();
  12138. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  12139. lock = this_cpu_ptr(lg->lock);
  12140. - arch_spin_lock(lock);
  12141. + lg_do_lock(lock);
  12142. }
  12143. EXPORT_SYMBOL(lg_local_lock);
  12144. void lg_local_unlock(struct lglock *lg)
  12145. {
  12146. - arch_spinlock_t *lock;
  12147. + lg_lock_ptr *lock;
  12148. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  12149. lock = this_cpu_ptr(lg->lock);
  12150. - arch_spin_unlock(lock);
  12151. - preempt_enable();
  12152. + lg_do_unlock(lock);
  12153. + migrate_enable();
  12154. }
  12155. EXPORT_SYMBOL(lg_local_unlock);
  12156. void lg_local_lock_cpu(struct lglock *lg, int cpu)
  12157. {
  12158. - arch_spinlock_t *lock;
  12159. + lg_lock_ptr *lock;
  12160. - preempt_disable();
  12161. + preempt_disable_nort();
  12162. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  12163. lock = per_cpu_ptr(lg->lock, cpu);
  12164. - arch_spin_lock(lock);
  12165. + lg_do_lock(lock);
  12166. }
  12167. EXPORT_SYMBOL(lg_local_lock_cpu);
  12168. void lg_local_unlock_cpu(struct lglock *lg, int cpu)
  12169. {
  12170. - arch_spinlock_t *lock;
  12171. + lg_lock_ptr *lock;
  12172. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  12173. lock = per_cpu_ptr(lg->lock, cpu);
  12174. - arch_spin_unlock(lock);
  12175. - preempt_enable();
  12176. + lg_do_unlock(lock);
  12177. + preempt_enable_nort();
  12178. }
  12179. EXPORT_SYMBOL(lg_local_unlock_cpu);
  12180. @@ -68,30 +86,30 @@
  12181. if (cpu2 < cpu1)
  12182. swap(cpu1, cpu2);
  12183. - preempt_disable();
  12184. + preempt_disable_nort();
  12185. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  12186. - arch_spin_lock(per_cpu_ptr(lg->lock, cpu1));
  12187. - arch_spin_lock(per_cpu_ptr(lg->lock, cpu2));
  12188. + lg_do_lock(per_cpu_ptr(lg->lock, cpu1));
  12189. + lg_do_lock(per_cpu_ptr(lg->lock, cpu2));
  12190. }
  12191. void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2)
  12192. {
  12193. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  12194. - arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1));
  12195. - arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2));
  12196. - preempt_enable();
  12197. + lg_do_unlock(per_cpu_ptr(lg->lock, cpu1));
  12198. + lg_do_unlock(per_cpu_ptr(lg->lock, cpu2));
  12199. + preempt_enable_nort();
  12200. }
  12201. void lg_global_lock(struct lglock *lg)
  12202. {
  12203. int i;
  12204. - preempt_disable();
  12205. + preempt_disable_nort();
  12206. lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  12207. for_each_possible_cpu(i) {
  12208. - arch_spinlock_t *lock;
  12209. + lg_lock_ptr *lock;
  12210. lock = per_cpu_ptr(lg->lock, i);
  12211. - arch_spin_lock(lock);
  12212. + lg_do_lock(lock);
  12213. }
  12214. }
  12215. EXPORT_SYMBOL(lg_global_lock);
  12216. @@ -102,10 +120,35 @@
  12217. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  12218. for_each_possible_cpu(i) {
  12219. - arch_spinlock_t *lock;
  12220. + lg_lock_ptr *lock;
  12221. lock = per_cpu_ptr(lg->lock, i);
  12222. - arch_spin_unlock(lock);
  12223. + lg_do_unlock(lock);
  12224. }
  12225. - preempt_enable();
  12226. + preempt_enable_nort();
  12227. }
  12228. EXPORT_SYMBOL(lg_global_unlock);
  12229. +
  12230. +#ifdef CONFIG_PREEMPT_RT_FULL
  12231. +/*
  12232. + * HACK: If you use this, you get to keep the pieces.
  12233. + * Used in queue_stop_cpus_work() when stop machinery
  12234. + * is called from inactive CPU, so we can't schedule.
  12235. + */
  12236. +# define lg_do_trylock_relax(l) \
  12237. + do { \
  12238. + while (!__rt_spin_trylock(l)) \
  12239. + cpu_relax(); \
  12240. + } while (0)
  12241. +
  12242. +void lg_global_trylock_relax(struct lglock *lg)
  12243. +{
  12244. + int i;
  12245. +
  12246. + lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  12247. + for_each_possible_cpu(i) {
  12248. + lg_lock_ptr *lock;
  12249. + lock = per_cpu_ptr(lg->lock, i);
  12250. + lg_do_trylock_relax(lock);
  12251. + }
  12252. +}
  12253. +#endif
  12254. diff -Nur linux-4.8.15.orig/kernel/locking/lockdep.c linux-4.8.15/kernel/locking/lockdep.c
  12255. --- linux-4.8.15.orig/kernel/locking/lockdep.c 2016-12-15 17:50:48.000000000 +0100
  12256. +++ linux-4.8.15/kernel/locking/lockdep.c 2017-01-01 17:07:16.019426176 +0100
  12257. @@ -3686,6 +3686,7 @@
  12258. }
  12259. }
  12260. +#ifndef CONFIG_PREEMPT_RT_FULL
  12261. /*
  12262. * We dont accurately track softirq state in e.g.
  12263. * hardirq contexts (such as on 4KSTACKS), so only
  12264. @@ -3700,6 +3701,7 @@
  12265. DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
  12266. }
  12267. }
  12268. +#endif
  12269. if (!debug_locks)
  12270. print_irqtrace_events(current);
  12271. diff -Nur linux-4.8.15.orig/kernel/locking/locktorture.c linux-4.8.15/kernel/locking/locktorture.c
  12272. --- linux-4.8.15.orig/kernel/locking/locktorture.c 2016-12-15 17:50:48.000000000 +0100
  12273. +++ linux-4.8.15/kernel/locking/locktorture.c 2017-01-01 17:07:16.019426176 +0100
  12274. @@ -26,7 +26,6 @@
  12275. #include <linux/kthread.h>
  12276. #include <linux/sched/rt.h>
  12277. #include <linux/spinlock.h>
  12278. -#include <linux/rwlock.h>
  12279. #include <linux/mutex.h>
  12280. #include <linux/rwsem.h>
  12281. #include <linux/smp.h>
  12282. diff -Nur linux-4.8.15.orig/kernel/locking/Makefile linux-4.8.15/kernel/locking/Makefile
  12283. --- linux-4.8.15.orig/kernel/locking/Makefile 2016-12-15 17:50:48.000000000 +0100
  12284. +++ linux-4.8.15/kernel/locking/Makefile 2017-01-01 17:07:16.019426176 +0100
  12285. @@ -2,7 +2,7 @@
  12286. # and is generally not a function of system call inputs.
  12287. KCOV_INSTRUMENT := n
  12288. -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
  12289. +obj-y += semaphore.o percpu-rwsem.o
  12290. ifdef CONFIG_FUNCTION_TRACER
  12291. CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
  12292. @@ -11,7 +11,11 @@
  12293. CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE)
  12294. endif
  12295. +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
  12296. +obj-y += mutex.o
  12297. obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
  12298. +obj-y += rwsem.o
  12299. +endif
  12300. obj-$(CONFIG_LOCKDEP) += lockdep.o
  12301. ifeq ($(CONFIG_PROC_FS),y)
  12302. obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
  12303. @@ -25,7 +29,10 @@
  12304. obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
  12305. obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
  12306. obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
  12307. +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
  12308. obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
  12309. obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
  12310. +endif
  12311. +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o
  12312. obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
  12313. obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
  12314. diff -Nur linux-4.8.15.orig/kernel/locking/rt.c linux-4.8.15/kernel/locking/rt.c
  12315. --- linux-4.8.15.orig/kernel/locking/rt.c 1970-01-01 01:00:00.000000000 +0100
  12316. +++ linux-4.8.15/kernel/locking/rt.c 2017-01-01 17:07:16.019426176 +0100
  12317. @@ -0,0 +1,498 @@
  12318. +/*
  12319. + * kernel/rt.c
  12320. + *
  12321. + * Real-Time Preemption Support
  12322. + *
  12323. + * started by Ingo Molnar:
  12324. + *
  12325. + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  12326. + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  12327. + *
  12328. + * historic credit for proving that Linux spinlocks can be implemented via
  12329. + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow
  12330. + * and others) who prototyped it on 2.4 and did lots of comparative
  12331. + * research and analysis; TimeSys, for proving that you can implement a
  12332. + * fully preemptible kernel via the use of IRQ threading and mutexes;
  12333. + * Bill Huey for persuasively arguing on lkml that the mutex model is the
  12334. + * right one; and to MontaVista, who ported pmutexes to 2.6.
  12335. + *
  12336. + * This code is a from-scratch implementation and is not based on pmutexes,
  12337. + * but the idea of converting spinlocks to mutexes is used here too.
  12338. + *
  12339. + * lock debugging, locking tree, deadlock detection:
  12340. + *
  12341. + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
  12342. + * Released under the General Public License (GPL).
  12343. + *
  12344. + * Includes portions of the generic R/W semaphore implementation from:
  12345. + *
  12346. + * Copyright (c) 2001 David Howells (dhowells@redhat.com).
  12347. + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
  12348. + * - Derived also from comments by Linus
  12349. + *
  12350. + * Pending ownership of locks and ownership stealing:
  12351. + *
  12352. + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt
  12353. + *
  12354. + * (also by Steven Rostedt)
  12355. + * - Converted single pi_lock to individual task locks.
  12356. + *
  12357. + * By Esben Nielsen:
  12358. + * Doing priority inheritance with help of the scheduler.
  12359. + *
  12360. + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  12361. + * - major rework based on Esben Nielsens initial patch
  12362. + * - replaced thread_info references by task_struct refs
  12363. + * - removed task->pending_owner dependency
  12364. + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks
  12365. + * in the scheduler return path as discussed with Steven Rostedt
  12366. + *
  12367. + * Copyright (C) 2006, Kihon Technologies Inc.
  12368. + * Steven Rostedt <rostedt@goodmis.org>
  12369. + * - debugged and patched Thomas Gleixner's rework.
  12370. + * - added back the cmpxchg to the rework.
  12371. + * - turned atomic require back on for SMP.
  12372. + */
  12373. +
  12374. +#include <linux/spinlock.h>
  12375. +#include <linux/rtmutex.h>
  12376. +#include <linux/sched.h>
  12377. +#include <linux/delay.h>
  12378. +#include <linux/module.h>
  12379. +#include <linux/kallsyms.h>
  12380. +#include <linux/syscalls.h>
  12381. +#include <linux/interrupt.h>
  12382. +#include <linux/plist.h>
  12383. +#include <linux/fs.h>
  12384. +#include <linux/futex.h>
  12385. +#include <linux/hrtimer.h>
  12386. +
  12387. +#include "rtmutex_common.h"
  12388. +
  12389. +/*
  12390. + * struct mutex functions
  12391. + */
  12392. +void __mutex_do_init(struct mutex *mutex, const char *name,
  12393. + struct lock_class_key *key)
  12394. +{
  12395. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12396. + /*
  12397. + * Make sure we are not reinitializing a held lock:
  12398. + */
  12399. + debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
  12400. + lockdep_init_map(&mutex->dep_map, name, key, 0);
  12401. +#endif
  12402. + mutex->lock.save_state = 0;
  12403. +}
  12404. +EXPORT_SYMBOL(__mutex_do_init);
  12405. +
  12406. +void __lockfunc _mutex_lock(struct mutex *lock)
  12407. +{
  12408. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  12409. + rt_mutex_lock(&lock->lock);
  12410. +}
  12411. +EXPORT_SYMBOL(_mutex_lock);
  12412. +
  12413. +int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
  12414. +{
  12415. + int ret;
  12416. +
  12417. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  12418. + ret = rt_mutex_lock_interruptible(&lock->lock);
  12419. + if (ret)
  12420. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12421. + return ret;
  12422. +}
  12423. +EXPORT_SYMBOL(_mutex_lock_interruptible);
  12424. +
  12425. +int __lockfunc _mutex_lock_killable(struct mutex *lock)
  12426. +{
  12427. + int ret;
  12428. +
  12429. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  12430. + ret = rt_mutex_lock_killable(&lock->lock);
  12431. + if (ret)
  12432. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12433. + return ret;
  12434. +}
  12435. +EXPORT_SYMBOL(_mutex_lock_killable);
  12436. +
  12437. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12438. +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
  12439. +{
  12440. + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
  12441. + rt_mutex_lock(&lock->lock);
  12442. +}
  12443. +EXPORT_SYMBOL(_mutex_lock_nested);
  12444. +
  12445. +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
  12446. +{
  12447. + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
  12448. + rt_mutex_lock(&lock->lock);
  12449. +}
  12450. +EXPORT_SYMBOL(_mutex_lock_nest_lock);
  12451. +
  12452. +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass)
  12453. +{
  12454. + int ret;
  12455. +
  12456. + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
  12457. + ret = rt_mutex_lock_interruptible(&lock->lock);
  12458. + if (ret)
  12459. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12460. + return ret;
  12461. +}
  12462. +EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
  12463. +
  12464. +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
  12465. +{
  12466. + int ret;
  12467. +
  12468. + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
  12469. + ret = rt_mutex_lock_killable(&lock->lock);
  12470. + if (ret)
  12471. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12472. + return ret;
  12473. +}
  12474. +EXPORT_SYMBOL(_mutex_lock_killable_nested);
  12475. +#endif
  12476. +
  12477. +int __lockfunc _mutex_trylock(struct mutex *lock)
  12478. +{
  12479. + int ret = rt_mutex_trylock(&lock->lock);
  12480. +
  12481. + if (ret)
  12482. + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  12483. +
  12484. + return ret;
  12485. +}
  12486. +EXPORT_SYMBOL(_mutex_trylock);
  12487. +
  12488. +void __lockfunc _mutex_unlock(struct mutex *lock)
  12489. +{
  12490. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  12491. + rt_mutex_unlock(&lock->lock);
  12492. +}
  12493. +EXPORT_SYMBOL(_mutex_unlock);
  12494. +
  12495. +/*
  12496. + * rwlock_t functions
  12497. + */
  12498. +int __lockfunc rt_write_trylock(rwlock_t *rwlock)
  12499. +{
  12500. + int ret;
  12501. +
  12502. + migrate_disable();
  12503. + ret = rt_mutex_trylock(&rwlock->lock);
  12504. + if (ret)
  12505. + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
  12506. + else
  12507. + migrate_enable();
  12508. +
  12509. + return ret;
  12510. +}
  12511. +EXPORT_SYMBOL(rt_write_trylock);
  12512. +
  12513. +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
  12514. +{
  12515. + int ret;
  12516. +
  12517. + *flags = 0;
  12518. + ret = rt_write_trylock(rwlock);
  12519. + return ret;
  12520. +}
  12521. +EXPORT_SYMBOL(rt_write_trylock_irqsave);
  12522. +
  12523. +int __lockfunc rt_read_trylock(rwlock_t *rwlock)
  12524. +{
  12525. + struct rt_mutex *lock = &rwlock->lock;
  12526. + int ret = 1;
  12527. +
  12528. + /*
  12529. + * recursive read locks succeed when current owns the lock,
  12530. + * but not when read_depth == 0 which means that the lock is
  12531. + * write locked.
  12532. + */
  12533. + if (rt_mutex_owner(lock) != current) {
  12534. + migrate_disable();
  12535. + ret = rt_mutex_trylock(lock);
  12536. + if (ret)
  12537. + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
  12538. + else
  12539. + migrate_enable();
  12540. +
  12541. + } else if (!rwlock->read_depth) {
  12542. + ret = 0;
  12543. + }
  12544. +
  12545. + if (ret)
  12546. + rwlock->read_depth++;
  12547. +
  12548. + return ret;
  12549. +}
  12550. +EXPORT_SYMBOL(rt_read_trylock);
  12551. +
  12552. +void __lockfunc rt_write_lock(rwlock_t *rwlock)
  12553. +{
  12554. + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
  12555. + __rt_spin_lock(&rwlock->lock);
  12556. +}
  12557. +EXPORT_SYMBOL(rt_write_lock);
  12558. +
  12559. +void __lockfunc rt_read_lock(rwlock_t *rwlock)
  12560. +{
  12561. + struct rt_mutex *lock = &rwlock->lock;
  12562. +
  12563. +
  12564. + /*
  12565. + * recursive read locks succeed when current owns the lock
  12566. + */
  12567. + if (rt_mutex_owner(lock) != current) {
  12568. + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
  12569. + __rt_spin_lock(lock);
  12570. + }
  12571. + rwlock->read_depth++;
  12572. +}
  12573. +
  12574. +EXPORT_SYMBOL(rt_read_lock);
  12575. +
  12576. +void __lockfunc rt_write_unlock(rwlock_t *rwlock)
  12577. +{
  12578. + /* NOTE: we always pass in '1' for nested, for simplicity */
  12579. + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
  12580. + __rt_spin_unlock(&rwlock->lock);
  12581. + migrate_enable();
  12582. +}
  12583. +EXPORT_SYMBOL(rt_write_unlock);
  12584. +
  12585. +void __lockfunc rt_read_unlock(rwlock_t *rwlock)
  12586. +{
  12587. + /* Release the lock only when read_depth is down to 0 */
  12588. + if (--rwlock->read_depth == 0) {
  12589. + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
  12590. + __rt_spin_unlock(&rwlock->lock);
  12591. + migrate_enable();
  12592. + }
  12593. +}
  12594. +EXPORT_SYMBOL(rt_read_unlock);
  12595. +
  12596. +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
  12597. +{
  12598. + rt_write_lock(rwlock);
  12599. +
  12600. + return 0;
  12601. +}
  12602. +EXPORT_SYMBOL(rt_write_lock_irqsave);
  12603. +
  12604. +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
  12605. +{
  12606. + rt_read_lock(rwlock);
  12607. +
  12608. + return 0;
  12609. +}
  12610. +EXPORT_SYMBOL(rt_read_lock_irqsave);
  12611. +
  12612. +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
  12613. +{
  12614. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12615. + /*
  12616. + * Make sure we are not reinitializing a held lock:
  12617. + */
  12618. + debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
  12619. + lockdep_init_map(&rwlock->dep_map, name, key, 0);
  12620. +#endif
  12621. + rwlock->lock.save_state = 1;
  12622. + rwlock->read_depth = 0;
  12623. +}
  12624. +EXPORT_SYMBOL(__rt_rwlock_init);
  12625. +
  12626. +/*
  12627. + * rw_semaphores
  12628. + */
  12629. +
  12630. +void rt_up_write(struct rw_semaphore *rwsem)
  12631. +{
  12632. + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
  12633. + rt_mutex_unlock(&rwsem->lock);
  12634. +}
  12635. +EXPORT_SYMBOL(rt_up_write);
  12636. +
  12637. +void __rt_up_read(struct rw_semaphore *rwsem)
  12638. +{
  12639. + if (--rwsem->read_depth == 0)
  12640. + rt_mutex_unlock(&rwsem->lock);
  12641. +}
  12642. +
  12643. +void rt_up_read(struct rw_semaphore *rwsem)
  12644. +{
  12645. + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
  12646. + __rt_up_read(rwsem);
  12647. +}
  12648. +EXPORT_SYMBOL(rt_up_read);
  12649. +
  12650. +/*
  12651. + * downgrade a write lock into a read lock
  12652. + * - just wake up any readers at the front of the queue
  12653. + */
  12654. +void rt_downgrade_write(struct rw_semaphore *rwsem)
  12655. +{
  12656. + BUG_ON(rt_mutex_owner(&rwsem->lock) != current);
  12657. + rwsem->read_depth = 1;
  12658. +}
  12659. +EXPORT_SYMBOL(rt_downgrade_write);
  12660. +
  12661. +int rt_down_write_trylock(struct rw_semaphore *rwsem)
  12662. +{
  12663. + int ret = rt_mutex_trylock(&rwsem->lock);
  12664. +
  12665. + if (ret)
  12666. + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
  12667. + return ret;
  12668. +}
  12669. +EXPORT_SYMBOL(rt_down_write_trylock);
  12670. +
  12671. +void rt_down_write(struct rw_semaphore *rwsem)
  12672. +{
  12673. + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_);
  12674. + rt_mutex_lock(&rwsem->lock);
  12675. +}
  12676. +EXPORT_SYMBOL(rt_down_write);
  12677. +
  12678. +int rt_down_write_killable(struct rw_semaphore *rwsem)
  12679. +{
  12680. + int ret;
  12681. +
  12682. + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_);
  12683. + ret = rt_mutex_lock_killable(&rwsem->lock);
  12684. + if (ret)
  12685. + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
  12686. + return ret;
  12687. +}
  12688. +EXPORT_SYMBOL(rt_down_write_killable);
  12689. +
  12690. +int rt_down_write_killable_nested(struct rw_semaphore *rwsem, int subclass)
  12691. +{
  12692. + int ret;
  12693. +
  12694. + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
  12695. + ret = rt_mutex_lock_killable(&rwsem->lock);
  12696. + if (ret)
  12697. + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
  12698. + return ret;
  12699. +}
  12700. +EXPORT_SYMBOL(rt_down_write_killable_nested);
  12701. +
  12702. +void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass)
  12703. +{
  12704. + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
  12705. + rt_mutex_lock(&rwsem->lock);
  12706. +}
  12707. +EXPORT_SYMBOL(rt_down_write_nested);
  12708. +
  12709. +void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
  12710. + struct lockdep_map *nest)
  12711. +{
  12712. + rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_);
  12713. + rt_mutex_lock(&rwsem->lock);
  12714. +}
  12715. +EXPORT_SYMBOL(rt_down_write_nested_lock);
  12716. +
  12717. +int rt__down_read_trylock(struct rw_semaphore *rwsem)
  12718. +{
  12719. + struct rt_mutex *lock = &rwsem->lock;
  12720. + int ret = 1;
  12721. +
  12722. + /*
  12723. + * recursive read locks succeed when current owns the rwsem,
  12724. + * but not when read_depth == 0 which means that the rwsem is
  12725. + * write locked.
  12726. + */
  12727. + if (rt_mutex_owner(lock) != current)
  12728. + ret = rt_mutex_trylock(&rwsem->lock);
  12729. + else if (!rwsem->read_depth)
  12730. + ret = 0;
  12731. +
  12732. + if (ret)
  12733. + rwsem->read_depth++;
  12734. + return ret;
  12735. +
  12736. +}
  12737. +
  12738. +int rt_down_read_trylock(struct rw_semaphore *rwsem)
  12739. +{
  12740. + int ret;
  12741. +
  12742. + ret = rt__down_read_trylock(rwsem);
  12743. + if (ret)
  12744. + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
  12745. +
  12746. + return ret;
  12747. +}
  12748. +EXPORT_SYMBOL(rt_down_read_trylock);
  12749. +
  12750. +void rt__down_read(struct rw_semaphore *rwsem)
  12751. +{
  12752. + struct rt_mutex *lock = &rwsem->lock;
  12753. +
  12754. + if (rt_mutex_owner(lock) != current)
  12755. + rt_mutex_lock(&rwsem->lock);
  12756. + rwsem->read_depth++;
  12757. +}
  12758. +EXPORT_SYMBOL(rt__down_read);
  12759. +
  12760. +static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
  12761. +{
  12762. + rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_);
  12763. + rt__down_read(rwsem);
  12764. +}
  12765. +
  12766. +void rt_down_read(struct rw_semaphore *rwsem)
  12767. +{
  12768. + __rt_down_read(rwsem, 0);
  12769. +}
  12770. +EXPORT_SYMBOL(rt_down_read);
  12771. +
  12772. +void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass)
  12773. +{
  12774. + __rt_down_read(rwsem, subclass);
  12775. +}
  12776. +EXPORT_SYMBOL(rt_down_read_nested);
  12777. +
  12778. +void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
  12779. + struct lock_class_key *key)
  12780. +{
  12781. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12782. + /*
  12783. + * Make sure we are not reinitializing a held lock:
  12784. + */
  12785. + debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
  12786. + lockdep_init_map(&rwsem->dep_map, name, key, 0);
  12787. +#endif
  12788. + rwsem->read_depth = 0;
  12789. + rwsem->lock.save_state = 0;
  12790. +}
  12791. +EXPORT_SYMBOL(__rt_rwsem_init);
  12792. +
  12793. +/**
  12794. + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
  12795. + * @cnt: the atomic which we are to dec
  12796. + * @lock: the mutex to return holding if we dec to 0
  12797. + *
  12798. + * return true and hold lock if we dec to 0, return false otherwise
  12799. + */
  12800. +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
  12801. +{
  12802. + /* dec if we can't possibly hit 0 */
  12803. + if (atomic_add_unless(cnt, -1, 1))
  12804. + return 0;
  12805. + /* we might hit 0, so take the lock */
  12806. + mutex_lock(lock);
  12807. + if (!atomic_dec_and_test(cnt)) {
  12808. + /* when we actually did the dec, we didn't hit 0 */
  12809. + mutex_unlock(lock);
  12810. + return 0;
  12811. + }
  12812. + /* we hit 0, and we hold the lock */
  12813. + return 1;
  12814. +}
  12815. +EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
  12816. diff -Nur linux-4.8.15.orig/kernel/locking/rtmutex.c linux-4.8.15/kernel/locking/rtmutex.c
  12817. --- linux-4.8.15.orig/kernel/locking/rtmutex.c 2016-12-15 17:50:48.000000000 +0100
  12818. +++ linux-4.8.15/kernel/locking/rtmutex.c 2017-01-01 17:07:16.023426438 +0100
  12819. @@ -7,6 +7,11 @@
  12820. * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  12821. * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
  12822. * Copyright (C) 2006 Esben Nielsen
  12823. + * Adaptive Spinlocks:
  12824. + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
  12825. + * and Peter Morreale,
  12826. + * Adaptive Spinlocks simplification:
  12827. + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
  12828. *
  12829. * See Documentation/locking/rt-mutex-design.txt for details.
  12830. */
  12831. @@ -16,6 +21,7 @@
  12832. #include <linux/sched/rt.h>
  12833. #include <linux/sched/deadline.h>
  12834. #include <linux/timer.h>
  12835. +#include <linux/ww_mutex.h>
  12836. #include "rtmutex_common.h"
  12837. @@ -133,6 +139,12 @@
  12838. WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
  12839. }
  12840. +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
  12841. +{
  12842. + return waiter && waiter != PI_WAKEUP_INPROGRESS &&
  12843. + waiter != PI_REQUEUE_INPROGRESS;
  12844. +}
  12845. +
  12846. /*
  12847. * We can speed up the acquire/release, if there's no debugging state to be
  12848. * set up.
  12849. @@ -414,6 +426,14 @@
  12850. return debug_rt_mutex_detect_deadlock(waiter, chwalk);
  12851. }
  12852. +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
  12853. +{
  12854. + if (waiter->savestate)
  12855. + wake_up_lock_sleeper(waiter->task);
  12856. + else
  12857. + wake_up_process(waiter->task);
  12858. +}
  12859. +
  12860. /*
  12861. * Max number of times we'll walk the boosting chain:
  12862. */
  12863. @@ -421,7 +441,8 @@
  12864. static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
  12865. {
  12866. - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
  12867. + return rt_mutex_real_waiter(p->pi_blocked_on) ?
  12868. + p->pi_blocked_on->lock : NULL;
  12869. }
  12870. /*
  12871. @@ -557,7 +578,7 @@
  12872. * reached or the state of the chain has changed while we
  12873. * dropped the locks.
  12874. */
  12875. - if (!waiter)
  12876. + if (!rt_mutex_real_waiter(waiter))
  12877. goto out_unlock_pi;
  12878. /*
  12879. @@ -719,13 +740,16 @@
  12880. * follow here. This is the end of the chain we are walking.
  12881. */
  12882. if (!rt_mutex_owner(lock)) {
  12883. + struct rt_mutex_waiter *lock_top_waiter;
  12884. +
  12885. /*
  12886. * If the requeue [7] above changed the top waiter,
  12887. * then we need to wake the new top waiter up to try
  12888. * to get the lock.
  12889. */
  12890. - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
  12891. - wake_up_process(rt_mutex_top_waiter(lock)->task);
  12892. + lock_top_waiter = rt_mutex_top_waiter(lock);
  12893. + if (prerequeue_top_waiter != lock_top_waiter)
  12894. + rt_mutex_wake_waiter(lock_top_waiter);
  12895. raw_spin_unlock_irq(&lock->wait_lock);
  12896. return 0;
  12897. }
  12898. @@ -818,6 +842,25 @@
  12899. return ret;
  12900. }
  12901. +
  12902. +#define STEAL_NORMAL 0
  12903. +#define STEAL_LATERAL 1
  12904. +
  12905. +/*
  12906. + * Note that RT tasks are excluded from lateral-steals to prevent the
  12907. + * introduction of an unbounded latency
  12908. + */
  12909. +static inline int lock_is_stealable(struct task_struct *task,
  12910. + struct task_struct *pendowner, int mode)
  12911. +{
  12912. + if (mode == STEAL_NORMAL || rt_task(task)) {
  12913. + if (task->prio >= pendowner->prio)
  12914. + return 0;
  12915. + } else if (task->prio > pendowner->prio)
  12916. + return 0;
  12917. + return 1;
  12918. +}
  12919. +
  12920. /*
  12921. * Try to take an rt-mutex
  12922. *
  12923. @@ -828,8 +871,9 @@
  12924. * @waiter: The waiter that is queued to the lock's wait tree if the
  12925. * callsite called task_blocked_on_lock(), otherwise NULL
  12926. */
  12927. -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  12928. - struct rt_mutex_waiter *waiter)
  12929. +static int __try_to_take_rt_mutex(struct rt_mutex *lock,
  12930. + struct task_struct *task,
  12931. + struct rt_mutex_waiter *waiter, int mode)
  12932. {
  12933. /*
  12934. * Before testing whether we can acquire @lock, we set the
  12935. @@ -866,8 +910,10 @@
  12936. * If waiter is not the highest priority waiter of
  12937. * @lock, give up.
  12938. */
  12939. - if (waiter != rt_mutex_top_waiter(lock))
  12940. + if (waiter != rt_mutex_top_waiter(lock)) {
  12941. + /* XXX lock_is_stealable() ? */
  12942. return 0;
  12943. + }
  12944. /*
  12945. * We can acquire the lock. Remove the waiter from the
  12946. @@ -885,14 +931,10 @@
  12947. * not need to be dequeued.
  12948. */
  12949. if (rt_mutex_has_waiters(lock)) {
  12950. - /*
  12951. - * If @task->prio is greater than or equal to
  12952. - * the top waiter priority (kernel view),
  12953. - * @task lost.
  12954. - */
  12955. - if (task->prio >= rt_mutex_top_waiter(lock)->prio)
  12956. - return 0;
  12957. + struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
  12958. + if (task != pown && !lock_is_stealable(task, pown, mode))
  12959. + return 0;
  12960. /*
  12961. * The current top waiter stays enqueued. We
  12962. * don't have to change anything in the lock
  12963. @@ -941,6 +983,438 @@
  12964. return 1;
  12965. }
  12966. +#ifdef CONFIG_PREEMPT_RT_FULL
  12967. +/*
  12968. + * preemptible spin_lock functions:
  12969. + */
  12970. +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
  12971. + void (*slowfn)(struct rt_mutex *lock,
  12972. + bool mg_off),
  12973. + bool do_mig_dis)
  12974. +{
  12975. + might_sleep_no_state_check();
  12976. +
  12977. + if (do_mig_dis)
  12978. + migrate_disable();
  12979. +
  12980. + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
  12981. + rt_mutex_deadlock_account_lock(lock, current);
  12982. + else
  12983. + slowfn(lock, do_mig_dis);
  12984. +}
  12985. +
  12986. +static inline int rt_spin_lock_fastunlock(struct rt_mutex *lock,
  12987. + int (*slowfn)(struct rt_mutex *lock))
  12988. +{
  12989. + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
  12990. + rt_mutex_deadlock_account_unlock(current);
  12991. + return 0;
  12992. + }
  12993. + return slowfn(lock);
  12994. +}
  12995. +#ifdef CONFIG_SMP
  12996. +/*
  12997. + * Note that owner is a speculative pointer and dereferencing relies
  12998. + * on rcu_read_lock() and the check against the lock owner.
  12999. + */
  13000. +static int adaptive_wait(struct rt_mutex *lock,
  13001. + struct task_struct *owner)
  13002. +{
  13003. + int res = 0;
  13004. +
  13005. + rcu_read_lock();
  13006. + for (;;) {
  13007. + if (owner != rt_mutex_owner(lock))
  13008. + break;
  13009. + /*
  13010. + * Ensure that owner->on_cpu is dereferenced _after_
  13011. + * checking the above to be valid.
  13012. + */
  13013. + barrier();
  13014. + if (!owner->on_cpu) {
  13015. + res = 1;
  13016. + break;
  13017. + }
  13018. + cpu_relax();
  13019. + }
  13020. + rcu_read_unlock();
  13021. + return res;
  13022. +}
  13023. +#else
  13024. +static int adaptive_wait(struct rt_mutex *lock,
  13025. + struct task_struct *orig_owner)
  13026. +{
  13027. + return 1;
  13028. +}
  13029. +#endif
  13030. +
  13031. +static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  13032. + struct rt_mutex_waiter *waiter,
  13033. + struct task_struct *task,
  13034. + enum rtmutex_chainwalk chwalk);
  13035. +/*
  13036. + * Slow path lock function spin_lock style: this variant is very
  13037. + * careful not to miss any non-lock wakeups.
  13038. + *
  13039. + * We store the current state under p->pi_lock in p->saved_state and
  13040. + * the try_to_wake_up() code handles this accordingly.
  13041. + */
  13042. +static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock,
  13043. + bool mg_off)
  13044. +{
  13045. + struct task_struct *lock_owner, *self = current;
  13046. + struct rt_mutex_waiter waiter, *top_waiter;
  13047. + unsigned long flags;
  13048. + int ret;
  13049. +
  13050. + rt_mutex_init_waiter(&waiter, true);
  13051. +
  13052. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  13053. +
  13054. + if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
  13055. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13056. + return;
  13057. + }
  13058. +
  13059. + BUG_ON(rt_mutex_owner(lock) == self);
  13060. +
  13061. + /*
  13062. + * We save whatever state the task is in and we'll restore it
  13063. + * after acquiring the lock taking real wakeups into account
  13064. + * as well. We are serialized via pi_lock against wakeups. See
  13065. + * try_to_wake_up().
  13066. + */
  13067. + raw_spin_lock(&self->pi_lock);
  13068. + self->saved_state = self->state;
  13069. + __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
  13070. + raw_spin_unlock(&self->pi_lock);
  13071. +
  13072. + ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK);
  13073. + BUG_ON(ret);
  13074. +
  13075. + for (;;) {
  13076. + /* Try to acquire the lock again. */
  13077. + if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
  13078. + break;
  13079. +
  13080. + top_waiter = rt_mutex_top_waiter(lock);
  13081. + lock_owner = rt_mutex_owner(lock);
  13082. +
  13083. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13084. +
  13085. + debug_rt_mutex_print_deadlock(&waiter);
  13086. +
  13087. + if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) {
  13088. + if (mg_off)
  13089. + migrate_enable();
  13090. + schedule();
  13091. + if (mg_off)
  13092. + migrate_disable();
  13093. + }
  13094. +
  13095. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  13096. +
  13097. + raw_spin_lock(&self->pi_lock);
  13098. + __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
  13099. + raw_spin_unlock(&self->pi_lock);
  13100. + }
  13101. +
  13102. + /*
  13103. + * Restore the task state to current->saved_state. We set it
  13104. + * to the original state above and the try_to_wake_up() code
  13105. + * has possibly updated it when a real (non-rtmutex) wakeup
  13106. + * happened while we were blocked. Clear saved_state so
  13107. + * try_to_wakeup() does not get confused.
  13108. + */
  13109. + raw_spin_lock(&self->pi_lock);
  13110. + __set_current_state_no_track(self->saved_state);
  13111. + self->saved_state = TASK_RUNNING;
  13112. + raw_spin_unlock(&self->pi_lock);
  13113. +
  13114. + /*
  13115. + * try_to_take_rt_mutex() sets the waiter bit
  13116. + * unconditionally. We might have to fix that up:
  13117. + */
  13118. + fixup_rt_mutex_waiters(lock);
  13119. +
  13120. + BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
  13121. + BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
  13122. +
  13123. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13124. +
  13125. + debug_rt_mutex_free_waiter(&waiter);
  13126. +}
  13127. +
  13128. +static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
  13129. + struct wake_q_head *wake_sleeper_q,
  13130. + struct rt_mutex *lock);
  13131. +/*
  13132. + * Slow path to release a rt_mutex spin_lock style
  13133. + */
  13134. +static int noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
  13135. +{
  13136. + unsigned long flags;
  13137. + WAKE_Q(wake_q);
  13138. + WAKE_Q(wake_sleeper_q);
  13139. +
  13140. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  13141. +
  13142. + debug_rt_mutex_unlock(lock);
  13143. +
  13144. + rt_mutex_deadlock_account_unlock(current);
  13145. +
  13146. + if (!rt_mutex_has_waiters(lock)) {
  13147. + lock->owner = NULL;
  13148. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13149. + return 0;
  13150. + }
  13151. +
  13152. + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
  13153. +
  13154. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13155. + wake_up_q(&wake_q);
  13156. + wake_up_q_sleeper(&wake_sleeper_q);
  13157. +
  13158. + /* Undo pi boosting.when necessary */
  13159. + rt_mutex_adjust_prio(current);
  13160. + return 0;
  13161. +}
  13162. +
  13163. +static int noinline __sched rt_spin_lock_slowunlock_no_deboost(struct rt_mutex *lock)
  13164. +{
  13165. + unsigned long flags;
  13166. + WAKE_Q(wake_q);
  13167. + WAKE_Q(wake_sleeper_q);
  13168. +
  13169. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  13170. +
  13171. + debug_rt_mutex_unlock(lock);
  13172. +
  13173. + rt_mutex_deadlock_account_unlock(current);
  13174. +
  13175. + if (!rt_mutex_has_waiters(lock)) {
  13176. + lock->owner = NULL;
  13177. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13178. + return 0;
  13179. + }
  13180. +
  13181. + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
  13182. +
  13183. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13184. + wake_up_q(&wake_q);
  13185. + wake_up_q_sleeper(&wake_sleeper_q);
  13186. + return 1;
  13187. +}
  13188. +
  13189. +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
  13190. +{
  13191. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false);
  13192. + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  13193. +}
  13194. +EXPORT_SYMBOL(rt_spin_lock__no_mg);
  13195. +
  13196. +void __lockfunc rt_spin_lock(spinlock_t *lock)
  13197. +{
  13198. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
  13199. + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  13200. +}
  13201. +EXPORT_SYMBOL(rt_spin_lock);
  13202. +
  13203. +void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
  13204. +{
  13205. + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true);
  13206. +}
  13207. +EXPORT_SYMBOL(__rt_spin_lock);
  13208. +
  13209. +void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock)
  13210. +{
  13211. + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false);
  13212. +}
  13213. +EXPORT_SYMBOL(__rt_spin_lock__no_mg);
  13214. +
  13215. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13216. +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
  13217. +{
  13218. + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
  13219. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
  13220. +}
  13221. +EXPORT_SYMBOL(rt_spin_lock_nested);
  13222. +#endif
  13223. +
  13224. +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock)
  13225. +{
  13226. + /* NOTE: we always pass in '1' for nested, for simplicity */
  13227. + spin_release(&lock->dep_map, 1, _RET_IP_);
  13228. + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
  13229. +}
  13230. +EXPORT_SYMBOL(rt_spin_unlock__no_mg);
  13231. +
  13232. +void __lockfunc rt_spin_unlock(spinlock_t *lock)
  13233. +{
  13234. + /* NOTE: we always pass in '1' for nested, for simplicity */
  13235. + spin_release(&lock->dep_map, 1, _RET_IP_);
  13236. + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
  13237. + migrate_enable();
  13238. +}
  13239. +EXPORT_SYMBOL(rt_spin_unlock);
  13240. +
  13241. +int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock)
  13242. +{
  13243. + int ret;
  13244. +
  13245. + /* NOTE: we always pass in '1' for nested, for simplicity */
  13246. + spin_release(&lock->dep_map, 1, _RET_IP_);
  13247. + ret = rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock_no_deboost);
  13248. + migrate_enable();
  13249. + return ret;
  13250. +}
  13251. +
  13252. +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
  13253. +{
  13254. + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
  13255. +}
  13256. +EXPORT_SYMBOL(__rt_spin_unlock);
  13257. +
  13258. +/*
  13259. + * Wait for the lock to get unlocked: instead of polling for an unlock
  13260. + * (like raw spinlocks do), we lock and unlock, to force the kernel to
  13261. + * schedule if there's contention:
  13262. + */
  13263. +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
  13264. +{
  13265. + spin_lock(lock);
  13266. + spin_unlock(lock);
  13267. +}
  13268. +EXPORT_SYMBOL(rt_spin_unlock_wait);
  13269. +
  13270. +int __lockfunc __rt_spin_trylock(struct rt_mutex *lock)
  13271. +{
  13272. + return rt_mutex_trylock(lock);
  13273. +}
  13274. +
  13275. +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock)
  13276. +{
  13277. + int ret;
  13278. +
  13279. + ret = rt_mutex_trylock(&lock->lock);
  13280. + if (ret)
  13281. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  13282. + return ret;
  13283. +}
  13284. +EXPORT_SYMBOL(rt_spin_trylock__no_mg);
  13285. +
  13286. +int __lockfunc rt_spin_trylock(spinlock_t *lock)
  13287. +{
  13288. + int ret;
  13289. +
  13290. + migrate_disable();
  13291. + ret = rt_mutex_trylock(&lock->lock);
  13292. + if (ret)
  13293. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  13294. + else
  13295. + migrate_enable();
  13296. + return ret;
  13297. +}
  13298. +EXPORT_SYMBOL(rt_spin_trylock);
  13299. +
  13300. +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
  13301. +{
  13302. + int ret;
  13303. +
  13304. + local_bh_disable();
  13305. + ret = rt_mutex_trylock(&lock->lock);
  13306. + if (ret) {
  13307. + migrate_disable();
  13308. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  13309. + } else
  13310. + local_bh_enable();
  13311. + return ret;
  13312. +}
  13313. +EXPORT_SYMBOL(rt_spin_trylock_bh);
  13314. +
  13315. +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
  13316. +{
  13317. + int ret;
  13318. +
  13319. + *flags = 0;
  13320. + ret = rt_mutex_trylock(&lock->lock);
  13321. + if (ret) {
  13322. + migrate_disable();
  13323. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  13324. + }
  13325. + return ret;
  13326. +}
  13327. +EXPORT_SYMBOL(rt_spin_trylock_irqsave);
  13328. +
  13329. +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
  13330. +{
  13331. + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
  13332. + if (atomic_add_unless(atomic, -1, 1))
  13333. + return 0;
  13334. + rt_spin_lock(lock);
  13335. + if (atomic_dec_and_test(atomic))
  13336. + return 1;
  13337. + rt_spin_unlock(lock);
  13338. + return 0;
  13339. +}
  13340. +EXPORT_SYMBOL(atomic_dec_and_spin_lock);
  13341. +
  13342. + void
  13343. +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
  13344. +{
  13345. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13346. + /*
  13347. + * Make sure we are not reinitializing a held lock:
  13348. + */
  13349. + debug_check_no_locks_freed((void *)lock, sizeof(*lock));
  13350. + lockdep_init_map(&lock->dep_map, name, key, 0);
  13351. +#endif
  13352. +}
  13353. +EXPORT_SYMBOL(__rt_spin_lock_init);
  13354. +
  13355. +#endif /* PREEMPT_RT_FULL */
  13356. +
  13357. +#ifdef CONFIG_PREEMPT_RT_FULL
  13358. + static inline int __sched
  13359. +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
  13360. +{
  13361. + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
  13362. + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
  13363. +
  13364. + if (!hold_ctx)
  13365. + return 0;
  13366. +
  13367. + if (unlikely(ctx == hold_ctx))
  13368. + return -EALREADY;
  13369. +
  13370. + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
  13371. + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
  13372. +#ifdef CONFIG_DEBUG_MUTEXES
  13373. + DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
  13374. + ctx->contending_lock = ww;
  13375. +#endif
  13376. + return -EDEADLK;
  13377. + }
  13378. +
  13379. + return 0;
  13380. +}
  13381. +#else
  13382. + static inline int __sched
  13383. +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
  13384. +{
  13385. + BUG();
  13386. + return 0;
  13387. +}
  13388. +
  13389. +#endif
  13390. +
  13391. +static inline int
  13392. +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  13393. + struct rt_mutex_waiter *waiter)
  13394. +{
  13395. + return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
  13396. +}
  13397. +
  13398. /*
  13399. * Task blocks on lock.
  13400. *
  13401. @@ -971,6 +1445,23 @@
  13402. return -EDEADLK;
  13403. raw_spin_lock(&task->pi_lock);
  13404. +
  13405. + /*
  13406. + * In the case of futex requeue PI, this will be a proxy
  13407. + * lock. The task will wake unaware that it is enqueueed on
  13408. + * this lock. Avoid blocking on two locks and corrupting
  13409. + * pi_blocked_on via the PI_WAKEUP_INPROGRESS
  13410. + * flag. futex_wait_requeue_pi() sets this when it wakes up
  13411. + * before requeue (due to a signal or timeout). Do not enqueue
  13412. + * the task if PI_WAKEUP_INPROGRESS is set.
  13413. + */
  13414. + if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
  13415. + raw_spin_unlock(&task->pi_lock);
  13416. + return -EAGAIN;
  13417. + }
  13418. +
  13419. + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
  13420. +
  13421. __rt_mutex_adjust_prio(task);
  13422. waiter->task = task;
  13423. waiter->lock = lock;
  13424. @@ -994,7 +1485,7 @@
  13425. rt_mutex_enqueue_pi(owner, waiter);
  13426. __rt_mutex_adjust_prio(owner);
  13427. - if (owner->pi_blocked_on)
  13428. + if (rt_mutex_real_waiter(owner->pi_blocked_on))
  13429. chain_walk = 1;
  13430. } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
  13431. chain_walk = 1;
  13432. @@ -1036,6 +1527,7 @@
  13433. * Called with lock->wait_lock held and interrupts disabled.
  13434. */
  13435. static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
  13436. + struct wake_q_head *wake_sleeper_q,
  13437. struct rt_mutex *lock)
  13438. {
  13439. struct rt_mutex_waiter *waiter;
  13440. @@ -1064,7 +1556,10 @@
  13441. raw_spin_unlock(&current->pi_lock);
  13442. - wake_q_add(wake_q, waiter->task);
  13443. + if (waiter->savestate)
  13444. + wake_q_add(wake_sleeper_q, waiter->task);
  13445. + else
  13446. + wake_q_add(wake_q, waiter->task);
  13447. }
  13448. /*
  13449. @@ -1078,7 +1573,7 @@
  13450. {
  13451. bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
  13452. struct task_struct *owner = rt_mutex_owner(lock);
  13453. - struct rt_mutex *next_lock;
  13454. + struct rt_mutex *next_lock = NULL;
  13455. raw_spin_lock(&current->pi_lock);
  13456. rt_mutex_dequeue(lock, waiter);
  13457. @@ -1102,7 +1597,8 @@
  13458. __rt_mutex_adjust_prio(owner);
  13459. /* Store the lock on which owner is blocked or NULL */
  13460. - next_lock = task_blocked_on_lock(owner);
  13461. + if (rt_mutex_real_waiter(owner->pi_blocked_on))
  13462. + next_lock = task_blocked_on_lock(owner);
  13463. raw_spin_unlock(&owner->pi_lock);
  13464. @@ -1138,17 +1634,17 @@
  13465. raw_spin_lock_irqsave(&task->pi_lock, flags);
  13466. waiter = task->pi_blocked_on;
  13467. - if (!waiter || (waiter->prio == task->prio &&
  13468. + if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio &&
  13469. !dl_prio(task->prio))) {
  13470. raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  13471. return;
  13472. }
  13473. next_lock = waiter->lock;
  13474. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  13475. /* gets dropped in rt_mutex_adjust_prio_chain()! */
  13476. get_task_struct(task);
  13477. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  13478. rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
  13479. next_lock, NULL, task);
  13480. }
  13481. @@ -1166,7 +1662,8 @@
  13482. static int __sched
  13483. __rt_mutex_slowlock(struct rt_mutex *lock, int state,
  13484. struct hrtimer_sleeper *timeout,
  13485. - struct rt_mutex_waiter *waiter)
  13486. + struct rt_mutex_waiter *waiter,
  13487. + struct ww_acquire_ctx *ww_ctx)
  13488. {
  13489. int ret = 0;
  13490. @@ -1189,6 +1686,12 @@
  13491. break;
  13492. }
  13493. + if (ww_ctx && ww_ctx->acquired > 0) {
  13494. + ret = __mutex_lock_check_stamp(lock, ww_ctx);
  13495. + if (ret)
  13496. + break;
  13497. + }
  13498. +
  13499. raw_spin_unlock_irq(&lock->wait_lock);
  13500. debug_rt_mutex_print_deadlock(waiter);
  13501. @@ -1223,21 +1726,96 @@
  13502. }
  13503. }
  13504. +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
  13505. + struct ww_acquire_ctx *ww_ctx)
  13506. +{
  13507. +#ifdef CONFIG_DEBUG_MUTEXES
  13508. + /*
  13509. + * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
  13510. + * but released with a normal mutex_unlock in this call.
  13511. + *
  13512. + * This should never happen, always use ww_mutex_unlock.
  13513. + */
  13514. + DEBUG_LOCKS_WARN_ON(ww->ctx);
  13515. +
  13516. + /*
  13517. + * Not quite done after calling ww_acquire_done() ?
  13518. + */
  13519. + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
  13520. +
  13521. + if (ww_ctx->contending_lock) {
  13522. + /*
  13523. + * After -EDEADLK you tried to
  13524. + * acquire a different ww_mutex? Bad!
  13525. + */
  13526. + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
  13527. +
  13528. + /*
  13529. + * You called ww_mutex_lock after receiving -EDEADLK,
  13530. + * but 'forgot' to unlock everything else first?
  13531. + */
  13532. + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
  13533. + ww_ctx->contending_lock = NULL;
  13534. + }
  13535. +
  13536. + /*
  13537. + * Naughty, using a different class will lead to undefined behavior!
  13538. + */
  13539. + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
  13540. +#endif
  13541. + ww_ctx->acquired++;
  13542. +}
  13543. +
  13544. +#ifdef CONFIG_PREEMPT_RT_FULL
  13545. +static void ww_mutex_account_lock(struct rt_mutex *lock,
  13546. + struct ww_acquire_ctx *ww_ctx)
  13547. +{
  13548. + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
  13549. + struct rt_mutex_waiter *waiter, *n;
  13550. +
  13551. + /*
  13552. + * This branch gets optimized out for the common case,
  13553. + * and is only important for ww_mutex_lock.
  13554. + */
  13555. + ww_mutex_lock_acquired(ww, ww_ctx);
  13556. + ww->ctx = ww_ctx;
  13557. +
  13558. + /*
  13559. + * Give any possible sleeping processes the chance to wake up,
  13560. + * so they can recheck if they have to back off.
  13561. + */
  13562. + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters,
  13563. + tree_entry) {
  13564. + /* XXX debug rt mutex waiter wakeup */
  13565. +
  13566. + BUG_ON(waiter->lock != lock);
  13567. + rt_mutex_wake_waiter(waiter);
  13568. + }
  13569. +}
  13570. +
  13571. +#else
  13572. +
  13573. +static void ww_mutex_account_lock(struct rt_mutex *lock,
  13574. + struct ww_acquire_ctx *ww_ctx)
  13575. +{
  13576. + BUG();
  13577. +}
  13578. +#endif
  13579. +
  13580. /*
  13581. * Slow path lock function:
  13582. */
  13583. static int __sched
  13584. rt_mutex_slowlock(struct rt_mutex *lock, int state,
  13585. struct hrtimer_sleeper *timeout,
  13586. - enum rtmutex_chainwalk chwalk)
  13587. + enum rtmutex_chainwalk chwalk,
  13588. + struct ww_acquire_ctx *ww_ctx)
  13589. {
  13590. struct rt_mutex_waiter waiter;
  13591. unsigned long flags;
  13592. int ret = 0;
  13593. - debug_rt_mutex_init_waiter(&waiter);
  13594. - RB_CLEAR_NODE(&waiter.pi_tree_entry);
  13595. - RB_CLEAR_NODE(&waiter.tree_entry);
  13596. + rt_mutex_init_waiter(&waiter, false);
  13597. /*
  13598. * Technically we could use raw_spin_[un]lock_irq() here, but this can
  13599. @@ -1251,6 +1829,8 @@
  13600. /* Try to acquire the lock again: */
  13601. if (try_to_take_rt_mutex(lock, current, NULL)) {
  13602. + if (ww_ctx)
  13603. + ww_mutex_account_lock(lock, ww_ctx);
  13604. raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13605. return 0;
  13606. }
  13607. @@ -1265,13 +1845,23 @@
  13608. if (likely(!ret))
  13609. /* sleep on the mutex */
  13610. - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
  13611. + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
  13612. + ww_ctx);
  13613. + else if (ww_ctx) {
  13614. + /* ww_mutex received EDEADLK, let it become EALREADY */
  13615. + ret = __mutex_lock_check_stamp(lock, ww_ctx);
  13616. + BUG_ON(!ret);
  13617. + }
  13618. if (unlikely(ret)) {
  13619. __set_current_state(TASK_RUNNING);
  13620. if (rt_mutex_has_waiters(lock))
  13621. remove_waiter(lock, &waiter);
  13622. - rt_mutex_handle_deadlock(ret, chwalk, &waiter);
  13623. + /* ww_mutex want to report EDEADLK/EALREADY, let them */
  13624. + if (!ww_ctx)
  13625. + rt_mutex_handle_deadlock(ret, chwalk, &waiter);
  13626. + } else if (ww_ctx) {
  13627. + ww_mutex_account_lock(lock, ww_ctx);
  13628. }
  13629. /*
  13630. @@ -1331,7 +1921,8 @@
  13631. * Return whether the current task needs to undo a potential priority boosting.
  13632. */
  13633. static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
  13634. - struct wake_q_head *wake_q)
  13635. + struct wake_q_head *wake_q,
  13636. + struct wake_q_head *wake_sleeper_q)
  13637. {
  13638. unsigned long flags;
  13639. @@ -1387,7 +1978,7 @@
  13640. *
  13641. * Queue the next waiter for wakeup once we release the wait_lock.
  13642. */
  13643. - mark_wakeup_next_waiter(wake_q, lock);
  13644. + mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock);
  13645. raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  13646. @@ -1403,31 +1994,36 @@
  13647. */
  13648. static inline int
  13649. rt_mutex_fastlock(struct rt_mutex *lock, int state,
  13650. + struct ww_acquire_ctx *ww_ctx,
  13651. int (*slowfn)(struct rt_mutex *lock, int state,
  13652. struct hrtimer_sleeper *timeout,
  13653. - enum rtmutex_chainwalk chwalk))
  13654. + enum rtmutex_chainwalk chwalk,
  13655. + struct ww_acquire_ctx *ww_ctx))
  13656. {
  13657. if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
  13658. rt_mutex_deadlock_account_lock(lock, current);
  13659. return 0;
  13660. } else
  13661. - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
  13662. + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK,
  13663. + ww_ctx);
  13664. }
  13665. static inline int
  13666. rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
  13667. struct hrtimer_sleeper *timeout,
  13668. enum rtmutex_chainwalk chwalk,
  13669. + struct ww_acquire_ctx *ww_ctx,
  13670. int (*slowfn)(struct rt_mutex *lock, int state,
  13671. struct hrtimer_sleeper *timeout,
  13672. - enum rtmutex_chainwalk chwalk))
  13673. + enum rtmutex_chainwalk chwalk,
  13674. + struct ww_acquire_ctx *ww_ctx))
  13675. {
  13676. if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
  13677. likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
  13678. rt_mutex_deadlock_account_lock(lock, current);
  13679. return 0;
  13680. } else
  13681. - return slowfn(lock, state, timeout, chwalk);
  13682. + return slowfn(lock, state, timeout, chwalk, ww_ctx);
  13683. }
  13684. static inline int
  13685. @@ -1444,17 +2040,20 @@
  13686. static inline void
  13687. rt_mutex_fastunlock(struct rt_mutex *lock,
  13688. bool (*slowfn)(struct rt_mutex *lock,
  13689. - struct wake_q_head *wqh))
  13690. + struct wake_q_head *wqh,
  13691. + struct wake_q_head *wq_sleeper))
  13692. {
  13693. WAKE_Q(wake_q);
  13694. + WAKE_Q(wake_sleeper_q);
  13695. if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
  13696. rt_mutex_deadlock_account_unlock(current);
  13697. } else {
  13698. - bool deboost = slowfn(lock, &wake_q);
  13699. + bool deboost = slowfn(lock, &wake_q, &wake_sleeper_q);
  13700. wake_up_q(&wake_q);
  13701. + wake_up_q_sleeper(&wake_sleeper_q);
  13702. /* Undo pi boosting if necessary: */
  13703. if (deboost)
  13704. @@ -1471,7 +2070,7 @@
  13705. {
  13706. might_sleep();
  13707. - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
  13708. + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock);
  13709. }
  13710. EXPORT_SYMBOL_GPL(rt_mutex_lock);
  13711. @@ -1488,7 +2087,7 @@
  13712. {
  13713. might_sleep();
  13714. - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
  13715. + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock);
  13716. }
  13717. EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
  13718. @@ -1501,11 +2100,30 @@
  13719. might_sleep();
  13720. return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  13721. - RT_MUTEX_FULL_CHAINWALK,
  13722. + RT_MUTEX_FULL_CHAINWALK, NULL,
  13723. rt_mutex_slowlock);
  13724. }
  13725. /**
  13726. + * rt_mutex_lock_killable - lock a rt_mutex killable
  13727. + *
  13728. + * @lock: the rt_mutex to be locked
  13729. + * @detect_deadlock: deadlock detection on/off
  13730. + *
  13731. + * Returns:
  13732. + * 0 on success
  13733. + * -EINTR when interrupted by a signal
  13734. + * -EDEADLK when the lock would deadlock (when deadlock detection is on)
  13735. + */
  13736. +int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
  13737. +{
  13738. + might_sleep();
  13739. +
  13740. + return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock);
  13741. +}
  13742. +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
  13743. +
  13744. +/**
  13745. * rt_mutex_timed_lock - lock a rt_mutex interruptible
  13746. * the timeout structure is provided
  13747. * by the caller
  13748. @@ -1525,6 +2143,7 @@
  13749. return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  13750. RT_MUTEX_MIN_CHAINWALK,
  13751. + NULL,
  13752. rt_mutex_slowlock);
  13753. }
  13754. EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  13755. @@ -1542,7 +2161,11 @@
  13756. */
  13757. int __sched rt_mutex_trylock(struct rt_mutex *lock)
  13758. {
  13759. +#ifdef CONFIG_PREEMPT_RT_FULL
  13760. + if (WARN_ON_ONCE(in_irq() || in_nmi()))
  13761. +#else
  13762. if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
  13763. +#endif
  13764. return 0;
  13765. return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
  13766. @@ -1568,13 +2191,14 @@
  13767. * required or not.
  13768. */
  13769. bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
  13770. - struct wake_q_head *wqh)
  13771. + struct wake_q_head *wqh,
  13772. + struct wake_q_head *wq_sleeper)
  13773. {
  13774. if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
  13775. rt_mutex_deadlock_account_unlock(current);
  13776. return false;
  13777. }
  13778. - return rt_mutex_slowunlock(lock, wqh);
  13779. + return rt_mutex_slowunlock(lock, wqh, wq_sleeper);
  13780. }
  13781. /**
  13782. @@ -1607,13 +2231,12 @@
  13783. void __rt_mutex_init(struct rt_mutex *lock, const char *name)
  13784. {
  13785. lock->owner = NULL;
  13786. - raw_spin_lock_init(&lock->wait_lock);
  13787. lock->waiters = RB_ROOT;
  13788. lock->waiters_leftmost = NULL;
  13789. debug_rt_mutex_init(lock, name);
  13790. }
  13791. -EXPORT_SYMBOL_GPL(__rt_mutex_init);
  13792. +EXPORT_SYMBOL(__rt_mutex_init);
  13793. /**
  13794. * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
  13795. @@ -1628,7 +2251,7 @@
  13796. void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  13797. struct task_struct *proxy_owner)
  13798. {
  13799. - __rt_mutex_init(lock, NULL);
  13800. + rt_mutex_init(lock);
  13801. debug_rt_mutex_proxy_lock(lock, proxy_owner);
  13802. rt_mutex_set_owner(lock, proxy_owner);
  13803. rt_mutex_deadlock_account_lock(lock, proxy_owner);
  13804. @@ -1676,6 +2299,35 @@
  13805. return 1;
  13806. }
  13807. +#ifdef CONFIG_PREEMPT_RT_FULL
  13808. + /*
  13809. + * In PREEMPT_RT there's an added race.
  13810. + * If the task, that we are about to requeue, times out,
  13811. + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
  13812. + * to skip this task. But right after the task sets
  13813. + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
  13814. + * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
  13815. + * This will replace the PI_WAKEUP_INPROGRESS with the actual
  13816. + * lock that it blocks on. We *must not* place this task
  13817. + * on this proxy lock in that case.
  13818. + *
  13819. + * To prevent this race, we first take the task's pi_lock
  13820. + * and check if it has updated its pi_blocked_on. If it has,
  13821. + * we assume that it woke up and we return -EAGAIN.
  13822. + * Otherwise, we set the task's pi_blocked_on to
  13823. + * PI_REQUEUE_INPROGRESS, so that if the task is waking up
  13824. + * it will know that we are in the process of requeuing it.
  13825. + */
  13826. + raw_spin_lock(&task->pi_lock);
  13827. + if (task->pi_blocked_on) {
  13828. + raw_spin_unlock(&task->pi_lock);
  13829. + raw_spin_unlock_irq(&lock->wait_lock);
  13830. + return -EAGAIN;
  13831. + }
  13832. + task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
  13833. + raw_spin_unlock(&task->pi_lock);
  13834. +#endif
  13835. +
  13836. /* We enforce deadlock detection for futexes */
  13837. ret = task_blocks_on_rt_mutex(lock, waiter, task,
  13838. RT_MUTEX_FULL_CHAINWALK);
  13839. @@ -1690,7 +2342,7 @@
  13840. ret = 0;
  13841. }
  13842. - if (unlikely(ret))
  13843. + if (ret && rt_mutex_has_waiters(lock))
  13844. remove_waiter(lock, waiter);
  13845. raw_spin_unlock_irq(&lock->wait_lock);
  13846. @@ -1746,7 +2398,7 @@
  13847. set_current_state(TASK_INTERRUPTIBLE);
  13848. /* sleep on the mutex */
  13849. - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
  13850. + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
  13851. if (unlikely(ret))
  13852. remove_waiter(lock, waiter);
  13853. @@ -1761,3 +2413,89 @@
  13854. return ret;
  13855. }
  13856. +
  13857. +static inline int
  13858. +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
  13859. +{
  13860. +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
  13861. + unsigned tmp;
  13862. +
  13863. + if (ctx->deadlock_inject_countdown-- == 0) {
  13864. + tmp = ctx->deadlock_inject_interval;
  13865. + if (tmp > UINT_MAX/4)
  13866. + tmp = UINT_MAX;
  13867. + else
  13868. + tmp = tmp*2 + tmp + tmp/2;
  13869. +
  13870. + ctx->deadlock_inject_interval = tmp;
  13871. + ctx->deadlock_inject_countdown = tmp;
  13872. + ctx->contending_lock = lock;
  13873. +
  13874. + ww_mutex_unlock(lock);
  13875. +
  13876. + return -EDEADLK;
  13877. + }
  13878. +#endif
  13879. +
  13880. + return 0;
  13881. +}
  13882. +
  13883. +#ifdef CONFIG_PREEMPT_RT_FULL
  13884. +int __sched
  13885. +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
  13886. +{
  13887. + int ret;
  13888. +
  13889. + might_sleep();
  13890. +
  13891. + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
  13892. + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx);
  13893. + if (ret)
  13894. + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
  13895. + else if (!ret && ww_ctx->acquired > 1)
  13896. + return ww_mutex_deadlock_injection(lock, ww_ctx);
  13897. +
  13898. + return ret;
  13899. +}
  13900. +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
  13901. +
  13902. +int __sched
  13903. +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
  13904. +{
  13905. + int ret;
  13906. +
  13907. + might_sleep();
  13908. +
  13909. + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
  13910. + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx);
  13911. + if (ret)
  13912. + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
  13913. + else if (!ret && ww_ctx->acquired > 1)
  13914. + return ww_mutex_deadlock_injection(lock, ww_ctx);
  13915. +
  13916. + return ret;
  13917. +}
  13918. +EXPORT_SYMBOL_GPL(__ww_mutex_lock);
  13919. +
  13920. +void __sched ww_mutex_unlock(struct ww_mutex *lock)
  13921. +{
  13922. + int nest = !!lock->ctx;
  13923. +
  13924. + /*
  13925. + * The unlocking fastpath is the 0->1 transition from 'locked'
  13926. + * into 'unlocked' state:
  13927. + */
  13928. + if (nest) {
  13929. +#ifdef CONFIG_DEBUG_MUTEXES
  13930. + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
  13931. +#endif
  13932. + if (lock->ctx->acquired > 0)
  13933. + lock->ctx->acquired--;
  13934. + lock->ctx = NULL;
  13935. + }
  13936. +
  13937. + mutex_release(&lock->base.dep_map, nest, _RET_IP_);
  13938. + rt_mutex_unlock(&lock->base.lock);
  13939. +}
  13940. +EXPORT_SYMBOL(ww_mutex_unlock);
  13941. +#endif
  13942. diff -Nur linux-4.8.15.orig/kernel/locking/rtmutex_common.h linux-4.8.15/kernel/locking/rtmutex_common.h
  13943. --- linux-4.8.15.orig/kernel/locking/rtmutex_common.h 2016-12-15 17:50:48.000000000 +0100
  13944. +++ linux-4.8.15/kernel/locking/rtmutex_common.h 2017-01-01 17:07:16.023426438 +0100
  13945. @@ -27,6 +27,7 @@
  13946. struct rb_node pi_tree_entry;
  13947. struct task_struct *task;
  13948. struct rt_mutex *lock;
  13949. + bool savestate;
  13950. #ifdef CONFIG_DEBUG_RT_MUTEXES
  13951. unsigned long ip;
  13952. struct pid *deadlock_task_pid;
  13953. @@ -98,6 +99,9 @@
  13954. /*
  13955. * PI-futex support (proxy locking functions, etc.):
  13956. */
  13957. +#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
  13958. +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
  13959. +
  13960. extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
  13961. extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  13962. struct task_struct *proxy_owner);
  13963. @@ -111,7 +115,8 @@
  13964. struct rt_mutex_waiter *waiter);
  13965. extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
  13966. extern bool rt_mutex_futex_unlock(struct rt_mutex *lock,
  13967. - struct wake_q_head *wqh);
  13968. + struct wake_q_head *wqh,
  13969. + struct wake_q_head *wq_sleeper);
  13970. extern void rt_mutex_adjust_prio(struct task_struct *task);
  13971. #ifdef CONFIG_DEBUG_RT_MUTEXES
  13972. @@ -120,4 +125,14 @@
  13973. # include "rtmutex.h"
  13974. #endif
  13975. +static inline void
  13976. +rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
  13977. +{
  13978. + debug_rt_mutex_init_waiter(waiter);
  13979. + waiter->task = NULL;
  13980. + waiter->savestate = savestate;
  13981. + RB_CLEAR_NODE(&waiter->pi_tree_entry);
  13982. + RB_CLEAR_NODE(&waiter->tree_entry);
  13983. +}
  13984. +
  13985. #endif
  13986. diff -Nur linux-4.8.15.orig/kernel/locking/spinlock.c linux-4.8.15/kernel/locking/spinlock.c
  13987. --- linux-4.8.15.orig/kernel/locking/spinlock.c 2016-12-15 17:50:48.000000000 +0100
  13988. +++ linux-4.8.15/kernel/locking/spinlock.c 2017-01-01 17:07:16.023426438 +0100
  13989. @@ -124,8 +124,11 @@
  13990. * __[spin|read|write]_lock_bh()
  13991. */
  13992. BUILD_LOCK_OPS(spin, raw_spinlock);
  13993. +
  13994. +#ifndef CONFIG_PREEMPT_RT_FULL
  13995. BUILD_LOCK_OPS(read, rwlock);
  13996. BUILD_LOCK_OPS(write, rwlock);
  13997. +#endif
  13998. #endif
  13999. @@ -209,6 +212,8 @@
  14000. EXPORT_SYMBOL(_raw_spin_unlock_bh);
  14001. #endif
  14002. +#ifndef CONFIG_PREEMPT_RT_FULL
  14003. +
  14004. #ifndef CONFIG_INLINE_READ_TRYLOCK
  14005. int __lockfunc _raw_read_trylock(rwlock_t *lock)
  14006. {
  14007. @@ -353,6 +358,8 @@
  14008. EXPORT_SYMBOL(_raw_write_unlock_bh);
  14009. #endif
  14010. +#endif /* !PREEMPT_RT_FULL */
  14011. +
  14012. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  14013. void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
  14014. diff -Nur linux-4.8.15.orig/kernel/locking/spinlock_debug.c linux-4.8.15/kernel/locking/spinlock_debug.c
  14015. --- linux-4.8.15.orig/kernel/locking/spinlock_debug.c 2016-12-15 17:50:48.000000000 +0100
  14016. +++ linux-4.8.15/kernel/locking/spinlock_debug.c 2017-01-01 17:07:16.023426438 +0100
  14017. @@ -31,6 +31,7 @@
  14018. EXPORT_SYMBOL(__raw_spin_lock_init);
  14019. +#ifndef CONFIG_PREEMPT_RT_FULL
  14020. void __rwlock_init(rwlock_t *lock, const char *name,
  14021. struct lock_class_key *key)
  14022. {
  14023. @@ -48,6 +49,7 @@
  14024. }
  14025. EXPORT_SYMBOL(__rwlock_init);
  14026. +#endif
  14027. static void spin_dump(raw_spinlock_t *lock, const char *msg)
  14028. {
  14029. @@ -159,6 +161,7 @@
  14030. arch_spin_unlock(&lock->raw_lock);
  14031. }
  14032. +#ifndef CONFIG_PREEMPT_RT_FULL
  14033. static void rwlock_bug(rwlock_t *lock, const char *msg)
  14034. {
  14035. if (!debug_locks_off())
  14036. @@ -300,3 +303,5 @@
  14037. debug_write_unlock(lock);
  14038. arch_write_unlock(&lock->raw_lock);
  14039. }
  14040. +
  14041. +#endif
  14042. diff -Nur linux-4.8.15.orig/kernel/Makefile linux-4.8.15/kernel/Makefile
  14043. --- linux-4.8.15.orig/kernel/Makefile 2016-12-15 17:50:48.000000000 +0100
  14044. +++ linux-4.8.15/kernel/Makefile 2017-01-01 17:07:15.995424645 +0100
  14045. @@ -11,6 +11,13 @@
  14046. notifier.o ksysfs.o cred.o reboot.o \
  14047. async.o range.o smpboot.o
  14048. +# Tracing may do some dangerous __builtin_return_address() operations
  14049. +# We know they are dangerous, we don't need gcc telling us that.
  14050. +ifdef CONFIG_USING_GET_LOCK_PARENT_IP
  14051. +FRAME_CFLAGS := $(call cc-disable-warning,frame-address)
  14052. +KBUILD_CFLAGS += $(FRAME_CFLAGS)
  14053. +endif
  14054. +
  14055. obj-$(CONFIG_MULTIUSER) += groups.o
  14056. ifdef CONFIG_FUNCTION_TRACER
  14057. diff -Nur linux-4.8.15.orig/kernel/panic.c linux-4.8.15/kernel/panic.c
  14058. --- linux-4.8.15.orig/kernel/panic.c 2016-12-15 17:50:48.000000000 +0100
  14059. +++ linux-4.8.15/kernel/panic.c 2017-01-01 17:07:16.023426438 +0100
  14060. @@ -449,9 +449,11 @@
  14061. static int init_oops_id(void)
  14062. {
  14063. +#ifndef CONFIG_PREEMPT_RT_FULL
  14064. if (!oops_id)
  14065. get_random_bytes(&oops_id, sizeof(oops_id));
  14066. else
  14067. +#endif
  14068. oops_id++;
  14069. return 0;
  14070. diff -Nur linux-4.8.15.orig/kernel/power/hibernate.c linux-4.8.15/kernel/power/hibernate.c
  14071. --- linux-4.8.15.orig/kernel/power/hibernate.c 2016-12-15 17:50:48.000000000 +0100
  14072. +++ linux-4.8.15/kernel/power/hibernate.c 2017-01-01 17:07:16.023426438 +0100
  14073. @@ -286,6 +286,8 @@
  14074. local_irq_disable();
  14075. + system_state = SYSTEM_SUSPEND;
  14076. +
  14077. error = syscore_suspend();
  14078. if (error) {
  14079. printk(KERN_ERR "PM: Some system devices failed to power down, "
  14080. @@ -315,6 +317,7 @@
  14081. syscore_resume();
  14082. Enable_irqs:
  14083. + system_state = SYSTEM_RUNNING;
  14084. local_irq_enable();
  14085. Enable_cpus:
  14086. @@ -444,6 +447,7 @@
  14087. goto Enable_cpus;
  14088. local_irq_disable();
  14089. + system_state = SYSTEM_SUSPEND;
  14090. error = syscore_suspend();
  14091. if (error)
  14092. @@ -477,6 +481,7 @@
  14093. syscore_resume();
  14094. Enable_irqs:
  14095. + system_state = SYSTEM_RUNNING;
  14096. local_irq_enable();
  14097. Enable_cpus:
  14098. @@ -562,6 +567,7 @@
  14099. goto Enable_cpus;
  14100. local_irq_disable();
  14101. + system_state = SYSTEM_SUSPEND;
  14102. syscore_suspend();
  14103. if (pm_wakeup_pending()) {
  14104. error = -EAGAIN;
  14105. @@ -574,6 +580,7 @@
  14106. Power_up:
  14107. syscore_resume();
  14108. + system_state = SYSTEM_RUNNING;
  14109. local_irq_enable();
  14110. Enable_cpus:
  14111. @@ -674,6 +681,10 @@
  14112. return error;
  14113. }
  14114. +#ifndef CONFIG_SUSPEND
  14115. +bool pm_in_action;
  14116. +#endif
  14117. +
  14118. /**
  14119. * hibernate - Carry out system hibernation, including saving the image.
  14120. */
  14121. @@ -687,6 +698,8 @@
  14122. return -EPERM;
  14123. }
  14124. + pm_in_action = true;
  14125. +
  14126. lock_system_sleep();
  14127. /* The snapshot device should not be opened while we're running */
  14128. if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
  14129. @@ -764,6 +777,7 @@
  14130. atomic_inc(&snapshot_device_available);
  14131. Unlock:
  14132. unlock_system_sleep();
  14133. + pm_in_action = false;
  14134. return error;
  14135. }
  14136. diff -Nur linux-4.8.15.orig/kernel/power/suspend.c linux-4.8.15/kernel/power/suspend.c
  14137. --- linux-4.8.15.orig/kernel/power/suspend.c 2016-12-15 17:50:48.000000000 +0100
  14138. +++ linux-4.8.15/kernel/power/suspend.c 2017-01-01 17:07:16.023426438 +0100
  14139. @@ -361,6 +361,8 @@
  14140. arch_suspend_disable_irqs();
  14141. BUG_ON(!irqs_disabled());
  14142. + system_state = SYSTEM_SUSPEND;
  14143. +
  14144. error = syscore_suspend();
  14145. if (!error) {
  14146. *wakeup = pm_wakeup_pending();
  14147. @@ -377,6 +379,8 @@
  14148. syscore_resume();
  14149. }
  14150. + system_state = SYSTEM_RUNNING;
  14151. +
  14152. arch_suspend_enable_irqs();
  14153. BUG_ON(irqs_disabled());
  14154. @@ -519,6 +523,8 @@
  14155. return error;
  14156. }
  14157. +bool pm_in_action;
  14158. +
  14159. /**
  14160. * pm_suspend - Externally visible function for suspending the system.
  14161. * @state: System sleep state to enter.
  14162. @@ -533,6 +539,8 @@
  14163. if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
  14164. return -EINVAL;
  14165. + pm_in_action = true;
  14166. +
  14167. error = enter_state(state);
  14168. if (error) {
  14169. suspend_stats.fail++;
  14170. @@ -540,6 +548,7 @@
  14171. } else {
  14172. suspend_stats.success++;
  14173. }
  14174. + pm_in_action = false;
  14175. return error;
  14176. }
  14177. EXPORT_SYMBOL(pm_suspend);
  14178. diff -Nur linux-4.8.15.orig/kernel/printk/printk.c linux-4.8.15/kernel/printk/printk.c
  14179. --- linux-4.8.15.orig/kernel/printk/printk.c 2016-12-15 17:50:48.000000000 +0100
  14180. +++ linux-4.8.15/kernel/printk/printk.c 2017-01-01 17:07:16.027426711 +0100
  14181. @@ -351,6 +351,65 @@
  14182. */
  14183. DEFINE_RAW_SPINLOCK(logbuf_lock);
  14184. +#ifdef CONFIG_EARLY_PRINTK
  14185. +struct console *early_console;
  14186. +
  14187. +static void early_vprintk(const char *fmt, va_list ap)
  14188. +{
  14189. + if (early_console) {
  14190. + char buf[512];
  14191. + int n = vscnprintf(buf, sizeof(buf), fmt, ap);
  14192. +
  14193. + early_console->write(early_console, buf, n);
  14194. + }
  14195. +}
  14196. +
  14197. +asmlinkage void early_printk(const char *fmt, ...)
  14198. +{
  14199. + va_list ap;
  14200. +
  14201. + va_start(ap, fmt);
  14202. + early_vprintk(fmt, ap);
  14203. + va_end(ap);
  14204. +}
  14205. +
  14206. +/*
  14207. + * This is independent of any log levels - a global
  14208. + * kill switch that turns off all of printk.
  14209. + *
  14210. + * Used by the NMI watchdog if early-printk is enabled.
  14211. + */
  14212. +static bool __read_mostly printk_killswitch;
  14213. +
  14214. +static int __init force_early_printk_setup(char *str)
  14215. +{
  14216. + printk_killswitch = true;
  14217. + return 0;
  14218. +}
  14219. +early_param("force_early_printk", force_early_printk_setup);
  14220. +
  14221. +void printk_kill(void)
  14222. +{
  14223. + printk_killswitch = true;
  14224. +}
  14225. +
  14226. +#ifdef CONFIG_PRINTK
  14227. +static int forced_early_printk(const char *fmt, va_list ap)
  14228. +{
  14229. + if (!printk_killswitch)
  14230. + return 0;
  14231. + early_vprintk(fmt, ap);
  14232. + return 1;
  14233. +}
  14234. +#endif
  14235. +
  14236. +#else
  14237. +static inline int forced_early_printk(const char *fmt, va_list ap)
  14238. +{
  14239. + return 0;
  14240. +}
  14241. +#endif
  14242. +
  14243. #ifdef CONFIG_PRINTK
  14244. DECLARE_WAIT_QUEUE_HEAD(log_wait);
  14245. /* the next printk record to read by syslog(READ) or /proc/kmsg */
  14246. @@ -1340,6 +1399,7 @@
  14247. {
  14248. char *text;
  14249. int len = 0;
  14250. + int attempts = 0;
  14251. text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
  14252. if (!text)
  14253. @@ -1351,6 +1411,14 @@
  14254. u64 seq;
  14255. u32 idx;
  14256. enum log_flags prev;
  14257. + int num_msg;
  14258. +try_again:
  14259. + attempts++;
  14260. + if (attempts > 10) {
  14261. + len = -EBUSY;
  14262. + goto out;
  14263. + }
  14264. + num_msg = 0;
  14265. /*
  14266. * Find first record that fits, including all following records,
  14267. @@ -1366,6 +1434,14 @@
  14268. prev = msg->flags;
  14269. idx = log_next(idx);
  14270. seq++;
  14271. + num_msg++;
  14272. + if (num_msg > 5) {
  14273. + num_msg = 0;
  14274. + raw_spin_unlock_irq(&logbuf_lock);
  14275. + raw_spin_lock_irq(&logbuf_lock);
  14276. + if (clear_seq < log_first_seq)
  14277. + goto try_again;
  14278. + }
  14279. }
  14280. /* move first record forward until length fits into the buffer */
  14281. @@ -1379,6 +1455,14 @@
  14282. prev = msg->flags;
  14283. idx = log_next(idx);
  14284. seq++;
  14285. + num_msg++;
  14286. + if (num_msg > 5) {
  14287. + num_msg = 0;
  14288. + raw_spin_unlock_irq(&logbuf_lock);
  14289. + raw_spin_lock_irq(&logbuf_lock);
  14290. + if (clear_seq < log_first_seq)
  14291. + goto try_again;
  14292. + }
  14293. }
  14294. /* last message fitting into this dump */
  14295. @@ -1419,6 +1503,7 @@
  14296. clear_seq = log_next_seq;
  14297. clear_idx = log_next_idx;
  14298. }
  14299. +out:
  14300. raw_spin_unlock_irq(&logbuf_lock);
  14301. kfree(text);
  14302. @@ -1572,6 +1657,12 @@
  14303. if (!console_drivers)
  14304. return;
  14305. + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
  14306. + if (in_irq() || in_nmi())
  14307. + return;
  14308. + }
  14309. +
  14310. + migrate_disable();
  14311. for_each_console(con) {
  14312. if (exclusive_console && con != exclusive_console)
  14313. continue;
  14314. @@ -1587,6 +1678,7 @@
  14315. else
  14316. con->write(con, text, len);
  14317. }
  14318. + migrate_enable();
  14319. }
  14320. /*
  14321. @@ -1750,6 +1842,13 @@
  14322. /* cpu currently holding logbuf_lock in this function */
  14323. static unsigned int logbuf_cpu = UINT_MAX;
  14324. + /*
  14325. + * Fall back to early_printk if a debugging subsystem has
  14326. + * killed printk output
  14327. + */
  14328. + if (unlikely(forced_early_printk(fmt, args)))
  14329. + return 1;
  14330. +
  14331. if (level == LOGLEVEL_SCHED) {
  14332. level = LOGLEVEL_DEFAULT;
  14333. in_sched = true;
  14334. @@ -1894,13 +1993,23 @@
  14335. /* If called from the scheduler, we can not call up(). */
  14336. if (!in_sched) {
  14337. + int may_trylock = 1;
  14338. +
  14339. lockdep_off();
  14340. +#ifdef CONFIG_PREEMPT_RT_FULL
  14341. + /*
  14342. + * we can't take a sleeping lock with IRQs or preeption disabled
  14343. + * so we can't print in these contexts
  14344. + */
  14345. + if (!(preempt_count() == 0 && !irqs_disabled()))
  14346. + may_trylock = 0;
  14347. +#endif
  14348. /*
  14349. * Try to acquire and then immediately release the console
  14350. * semaphore. The release will print out buffers and wake up
  14351. * /dev/kmsg and syslog() users.
  14352. */
  14353. - if (console_trylock())
  14354. + if (may_trylock && console_trylock())
  14355. console_unlock();
  14356. lockdep_on();
  14357. }
  14358. @@ -2023,26 +2132,6 @@
  14359. #endif /* CONFIG_PRINTK */
  14360. -#ifdef CONFIG_EARLY_PRINTK
  14361. -struct console *early_console;
  14362. -
  14363. -asmlinkage __visible void early_printk(const char *fmt, ...)
  14364. -{
  14365. - va_list ap;
  14366. - char buf[512];
  14367. - int n;
  14368. -
  14369. - if (!early_console)
  14370. - return;
  14371. -
  14372. - va_start(ap, fmt);
  14373. - n = vscnprintf(buf, sizeof(buf), fmt, ap);
  14374. - va_end(ap);
  14375. -
  14376. - early_console->write(early_console, buf, n);
  14377. -}
  14378. -#endif
  14379. -
  14380. static int __add_preferred_console(char *name, int idx, char *options,
  14381. char *brl_options)
  14382. {
  14383. @@ -2312,11 +2401,16 @@
  14384. goto out;
  14385. len = cont_print_text(text, size);
  14386. +#ifdef CONFIG_PREEMPT_RT_FULL
  14387. + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  14388. + call_console_drivers(cont.level, NULL, 0, text, len);
  14389. +#else
  14390. raw_spin_unlock(&logbuf_lock);
  14391. stop_critical_timings();
  14392. call_console_drivers(cont.level, NULL, 0, text, len);
  14393. start_critical_timings();
  14394. local_irq_restore(flags);
  14395. +#endif
  14396. return;
  14397. out:
  14398. raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  14399. @@ -2440,13 +2534,17 @@
  14400. console_idx = log_next(console_idx);
  14401. console_seq++;
  14402. console_prev = msg->flags;
  14403. +#ifdef CONFIG_PREEMPT_RT_FULL
  14404. + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  14405. + call_console_drivers(level, ext_text, ext_len, text, len);
  14406. +#else
  14407. raw_spin_unlock(&logbuf_lock);
  14408. stop_critical_timings(); /* don't trace print latency */
  14409. call_console_drivers(level, ext_text, ext_len, text, len);
  14410. start_critical_timings();
  14411. local_irq_restore(flags);
  14412. -
  14413. +#endif
  14414. if (do_cond_resched)
  14415. cond_resched();
  14416. }
  14417. @@ -2498,6 +2596,11 @@
  14418. {
  14419. struct console *c;
  14420. + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
  14421. + if (in_irq() || in_nmi())
  14422. + return;
  14423. + }
  14424. +
  14425. /*
  14426. * console_unblank can no longer be called in interrupt context unless
  14427. * oops_in_progress is set to 1..
  14428. diff -Nur linux-4.8.15.orig/kernel/ptrace.c linux-4.8.15/kernel/ptrace.c
  14429. --- linux-4.8.15.orig/kernel/ptrace.c 2016-12-15 17:50:48.000000000 +0100
  14430. +++ linux-4.8.15/kernel/ptrace.c 2017-01-01 17:07:16.027426711 +0100
  14431. @@ -128,7 +128,14 @@
  14432. spin_lock_irq(&task->sighand->siglock);
  14433. if (task_is_traced(task) && !__fatal_signal_pending(task)) {
  14434. - task->state = __TASK_TRACED;
  14435. + unsigned long flags;
  14436. +
  14437. + raw_spin_lock_irqsave(&task->pi_lock, flags);
  14438. + if (task->state & __TASK_TRACED)
  14439. + task->state = __TASK_TRACED;
  14440. + else
  14441. + task->saved_state = __TASK_TRACED;
  14442. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  14443. ret = true;
  14444. }
  14445. spin_unlock_irq(&task->sighand->siglock);
  14446. diff -Nur linux-4.8.15.orig/kernel/rcu/rcutorture.c linux-4.8.15/kernel/rcu/rcutorture.c
  14447. --- linux-4.8.15.orig/kernel/rcu/rcutorture.c 2016-12-15 17:50:48.000000000 +0100
  14448. +++ linux-4.8.15/kernel/rcu/rcutorture.c 2017-01-01 17:07:16.027426711 +0100
  14449. @@ -404,6 +404,7 @@
  14450. .name = "rcu"
  14451. };
  14452. +#ifndef CONFIG_PREEMPT_RT_FULL
  14453. /*
  14454. * Definitions for rcu_bh torture testing.
  14455. */
  14456. @@ -443,6 +444,12 @@
  14457. .name = "rcu_bh"
  14458. };
  14459. +#else
  14460. +static struct rcu_torture_ops rcu_bh_ops = {
  14461. + .ttype = INVALID_RCU_FLAVOR,
  14462. +};
  14463. +#endif
  14464. +
  14465. /*
  14466. * Don't even think about trying any of these in real life!!!
  14467. * The names includes "busted", and they really means it!
  14468. diff -Nur linux-4.8.15.orig/kernel/rcu/tree.c linux-4.8.15/kernel/rcu/tree.c
  14469. --- linux-4.8.15.orig/kernel/rcu/tree.c 2016-12-15 17:50:48.000000000 +0100
  14470. +++ linux-4.8.15/kernel/rcu/tree.c 2017-01-01 17:07:16.027426711 +0100
  14471. @@ -56,6 +56,11 @@
  14472. #include <linux/random.h>
  14473. #include <linux/trace_events.h>
  14474. #include <linux/suspend.h>
  14475. +#include <linux/delay.h>
  14476. +#include <linux/gfp.h>
  14477. +#include <linux/oom.h>
  14478. +#include <linux/smpboot.h>
  14479. +#include "../time/tick-internal.h"
  14480. #include "tree.h"
  14481. #include "rcu.h"
  14482. @@ -259,6 +264,19 @@
  14483. this_cpu_ptr(&rcu_sched_data), true);
  14484. }
  14485. +#ifdef CONFIG_PREEMPT_RT_FULL
  14486. +static void rcu_preempt_qs(void);
  14487. +
  14488. +void rcu_bh_qs(void)
  14489. +{
  14490. + unsigned long flags;
  14491. +
  14492. + /* Callers to this function, rcu_preempt_qs(), must disable irqs. */
  14493. + local_irq_save(flags);
  14494. + rcu_preempt_qs();
  14495. + local_irq_restore(flags);
  14496. +}
  14497. +#else
  14498. void rcu_bh_qs(void)
  14499. {
  14500. if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {
  14501. @@ -268,6 +286,7 @@
  14502. __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false);
  14503. }
  14504. }
  14505. +#endif
  14506. static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
  14507. @@ -448,11 +467,13 @@
  14508. /*
  14509. * Return the number of RCU BH batches started thus far for debug & stats.
  14510. */
  14511. +#ifndef CONFIG_PREEMPT_RT_FULL
  14512. unsigned long rcu_batches_started_bh(void)
  14513. {
  14514. return rcu_bh_state.gpnum;
  14515. }
  14516. EXPORT_SYMBOL_GPL(rcu_batches_started_bh);
  14517. +#endif
  14518. /*
  14519. * Return the number of RCU batches completed thus far for debug & stats.
  14520. @@ -472,6 +493,7 @@
  14521. }
  14522. EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
  14523. +#ifndef CONFIG_PREEMPT_RT_FULL
  14524. /*
  14525. * Return the number of RCU BH batches completed thus far for debug & stats.
  14526. */
  14527. @@ -480,6 +502,7 @@
  14528. return rcu_bh_state.completed;
  14529. }
  14530. EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
  14531. +#endif
  14532. /*
  14533. * Return the number of RCU expedited batches completed thus far for
  14534. @@ -503,6 +526,7 @@
  14535. }
  14536. EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched);
  14537. +#ifndef CONFIG_PREEMPT_RT_FULL
  14538. /*
  14539. * Force a quiescent state.
  14540. */
  14541. @@ -521,6 +545,13 @@
  14542. }
  14543. EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
  14544. +#else
  14545. +void rcu_force_quiescent_state(void)
  14546. +{
  14547. +}
  14548. +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
  14549. +#endif
  14550. +
  14551. /*
  14552. * Force a quiescent state for RCU-sched.
  14553. */
  14554. @@ -571,9 +602,11 @@
  14555. case RCU_FLAVOR:
  14556. rsp = rcu_state_p;
  14557. break;
  14558. +#ifndef CONFIG_PREEMPT_RT_FULL
  14559. case RCU_BH_FLAVOR:
  14560. rsp = &rcu_bh_state;
  14561. break;
  14562. +#endif
  14563. case RCU_SCHED_FLAVOR:
  14564. rsp = &rcu_sched_state;
  14565. break;
  14566. @@ -3013,18 +3046,17 @@
  14567. /*
  14568. * Do RCU core processing for the current CPU.
  14569. */
  14570. -static void rcu_process_callbacks(struct softirq_action *unused)
  14571. +static void rcu_process_callbacks(void)
  14572. {
  14573. struct rcu_state *rsp;
  14574. if (cpu_is_offline(smp_processor_id()))
  14575. return;
  14576. - trace_rcu_utilization(TPS("Start RCU core"));
  14577. for_each_rcu_flavor(rsp)
  14578. __rcu_process_callbacks(rsp);
  14579. - trace_rcu_utilization(TPS("End RCU core"));
  14580. }
  14581. +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
  14582. /*
  14583. * Schedule RCU callback invocation. If the specified type of RCU
  14584. * does not support RCU priority boosting, just do a direct call,
  14585. @@ -3036,18 +3068,105 @@
  14586. {
  14587. if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
  14588. return;
  14589. - if (likely(!rsp->boost)) {
  14590. - rcu_do_batch(rsp, rdp);
  14591. + rcu_do_batch(rsp, rdp);
  14592. +}
  14593. +
  14594. +static void rcu_wake_cond(struct task_struct *t, int status)
  14595. +{
  14596. + /*
  14597. + * If the thread is yielding, only wake it when this
  14598. + * is invoked from idle
  14599. + */
  14600. + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
  14601. + wake_up_process(t);
  14602. +}
  14603. +
  14604. +/*
  14605. + * Wake up this CPU's rcuc kthread to do RCU core processing.
  14606. + */
  14607. +static void invoke_rcu_core(void)
  14608. +{
  14609. + unsigned long flags;
  14610. + struct task_struct *t;
  14611. +
  14612. + if (!cpu_online(smp_processor_id()))
  14613. return;
  14614. + local_irq_save(flags);
  14615. + __this_cpu_write(rcu_cpu_has_work, 1);
  14616. + t = __this_cpu_read(rcu_cpu_kthread_task);
  14617. + if (t != NULL && current != t)
  14618. + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
  14619. + local_irq_restore(flags);
  14620. +}
  14621. +
  14622. +static void rcu_cpu_kthread_park(unsigned int cpu)
  14623. +{
  14624. + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  14625. +}
  14626. +
  14627. +static int rcu_cpu_kthread_should_run(unsigned int cpu)
  14628. +{
  14629. + return __this_cpu_read(rcu_cpu_has_work);
  14630. +}
  14631. +
  14632. +/*
  14633. + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  14634. + * RCU softirq used in flavors and configurations of RCU that do not
  14635. + * support RCU priority boosting.
  14636. + */
  14637. +static void rcu_cpu_kthread(unsigned int cpu)
  14638. +{
  14639. + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
  14640. + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
  14641. + int spincnt;
  14642. +
  14643. + for (spincnt = 0; spincnt < 10; spincnt++) {
  14644. + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
  14645. + local_bh_disable();
  14646. + *statusp = RCU_KTHREAD_RUNNING;
  14647. + this_cpu_inc(rcu_cpu_kthread_loops);
  14648. + local_irq_disable();
  14649. + work = *workp;
  14650. + *workp = 0;
  14651. + local_irq_enable();
  14652. + if (work)
  14653. + rcu_process_callbacks();
  14654. + local_bh_enable();
  14655. + if (*workp == 0) {
  14656. + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
  14657. + *statusp = RCU_KTHREAD_WAITING;
  14658. + return;
  14659. + }
  14660. }
  14661. - invoke_rcu_callbacks_kthread();
  14662. + *statusp = RCU_KTHREAD_YIELDING;
  14663. + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
  14664. + schedule_timeout_interruptible(2);
  14665. + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
  14666. + *statusp = RCU_KTHREAD_WAITING;
  14667. }
  14668. -static void invoke_rcu_core(void)
  14669. +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  14670. + .store = &rcu_cpu_kthread_task,
  14671. + .thread_should_run = rcu_cpu_kthread_should_run,
  14672. + .thread_fn = rcu_cpu_kthread,
  14673. + .thread_comm = "rcuc/%u",
  14674. + .setup = rcu_cpu_kthread_setup,
  14675. + .park = rcu_cpu_kthread_park,
  14676. +};
  14677. +
  14678. +/*
  14679. + * Spawn per-CPU RCU core processing kthreads.
  14680. + */
  14681. +static int __init rcu_spawn_core_kthreads(void)
  14682. {
  14683. - if (cpu_online(smp_processor_id()))
  14684. - raise_softirq(RCU_SOFTIRQ);
  14685. + int cpu;
  14686. +
  14687. + for_each_possible_cpu(cpu)
  14688. + per_cpu(rcu_cpu_has_work, cpu) = 0;
  14689. + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  14690. + return 0;
  14691. }
  14692. +early_initcall(rcu_spawn_core_kthreads);
  14693. /*
  14694. * Handle any core-RCU processing required by a call_rcu() invocation.
  14695. @@ -3192,6 +3311,7 @@
  14696. }
  14697. EXPORT_SYMBOL_GPL(call_rcu_sched);
  14698. +#ifndef CONFIG_PREEMPT_RT_FULL
  14699. /*
  14700. * Queue an RCU callback for invocation after a quicker grace period.
  14701. */
  14702. @@ -3200,6 +3320,7 @@
  14703. __call_rcu(head, func, &rcu_bh_state, -1, 0);
  14704. }
  14705. EXPORT_SYMBOL_GPL(call_rcu_bh);
  14706. +#endif
  14707. /*
  14708. * Queue an RCU callback for lazy invocation after a grace period.
  14709. @@ -3291,6 +3412,7 @@
  14710. }
  14711. EXPORT_SYMBOL_GPL(synchronize_sched);
  14712. +#ifndef CONFIG_PREEMPT_RT_FULL
  14713. /**
  14714. * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
  14715. *
  14716. @@ -3317,6 +3439,7 @@
  14717. wait_rcu_gp(call_rcu_bh);
  14718. }
  14719. EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
  14720. +#endif
  14721. /**
  14722. * get_state_synchronize_rcu - Snapshot current RCU state
  14723. @@ -3695,6 +3818,7 @@
  14724. mutex_unlock(&rsp->barrier_mutex);
  14725. }
  14726. +#ifndef CONFIG_PREEMPT_RT_FULL
  14727. /**
  14728. * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
  14729. */
  14730. @@ -3703,6 +3827,7 @@
  14731. _rcu_barrier(&rcu_bh_state);
  14732. }
  14733. EXPORT_SYMBOL_GPL(rcu_barrier_bh);
  14734. +#endif
  14735. /**
  14736. * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
  14737. @@ -4196,12 +4321,13 @@
  14738. rcu_bootup_announce();
  14739. rcu_init_geometry();
  14740. +#ifndef CONFIG_PREEMPT_RT_FULL
  14741. rcu_init_one(&rcu_bh_state);
  14742. +#endif
  14743. rcu_init_one(&rcu_sched_state);
  14744. if (dump_tree)
  14745. rcu_dump_rcu_node_tree(&rcu_sched_state);
  14746. __rcu_init_preempt();
  14747. - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
  14748. /*
  14749. * We don't need protection against CPU-hotplug here because
  14750. diff -Nur linux-4.8.15.orig/kernel/rcu/tree.h linux-4.8.15/kernel/rcu/tree.h
  14751. --- linux-4.8.15.orig/kernel/rcu/tree.h 2016-12-15 17:50:48.000000000 +0100
  14752. +++ linux-4.8.15/kernel/rcu/tree.h 2017-01-01 17:07:16.027426711 +0100
  14753. @@ -587,18 +587,18 @@
  14754. */
  14755. extern struct rcu_state rcu_sched_state;
  14756. +#ifndef CONFIG_PREEMPT_RT_FULL
  14757. extern struct rcu_state rcu_bh_state;
  14758. +#endif
  14759. #ifdef CONFIG_PREEMPT_RCU
  14760. extern struct rcu_state rcu_preempt_state;
  14761. #endif /* #ifdef CONFIG_PREEMPT_RCU */
  14762. -#ifdef CONFIG_RCU_BOOST
  14763. DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  14764. DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
  14765. DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  14766. DECLARE_PER_CPU(char, rcu_cpu_has_work);
  14767. -#endif /* #ifdef CONFIG_RCU_BOOST */
  14768. #ifndef RCU_TREE_NONCORE
  14769. @@ -618,10 +618,9 @@
  14770. static void __init __rcu_init_preempt(void);
  14771. static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
  14772. static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
  14773. -static void invoke_rcu_callbacks_kthread(void);
  14774. static bool rcu_is_callbacks_kthread(void);
  14775. +static void rcu_cpu_kthread_setup(unsigned int cpu);
  14776. #ifdef CONFIG_RCU_BOOST
  14777. -static void rcu_preempt_do_callbacks(void);
  14778. static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
  14779. struct rcu_node *rnp);
  14780. #endif /* #ifdef CONFIG_RCU_BOOST */
  14781. diff -Nur linux-4.8.15.orig/kernel/rcu/tree_plugin.h linux-4.8.15/kernel/rcu/tree_plugin.h
  14782. --- linux-4.8.15.orig/kernel/rcu/tree_plugin.h 2016-12-15 17:50:48.000000000 +0100
  14783. +++ linux-4.8.15/kernel/rcu/tree_plugin.h 2017-01-01 17:07:16.031426957 +0100
  14784. @@ -24,25 +24,10 @@
  14785. * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  14786. */
  14787. -#include <linux/delay.h>
  14788. -#include <linux/gfp.h>
  14789. -#include <linux/oom.h>
  14790. -#include <linux/smpboot.h>
  14791. -#include "../time/tick-internal.h"
  14792. -
  14793. #ifdef CONFIG_RCU_BOOST
  14794. #include "../locking/rtmutex_common.h"
  14795. -/*
  14796. - * Control variables for per-CPU and per-rcu_node kthreads. These
  14797. - * handle all flavors of RCU.
  14798. - */
  14799. -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
  14800. -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  14801. -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  14802. -DEFINE_PER_CPU(char, rcu_cpu_has_work);
  14803. -
  14804. #else /* #ifdef CONFIG_RCU_BOOST */
  14805. /*
  14806. @@ -55,6 +40,14 @@
  14807. #endif /* #else #ifdef CONFIG_RCU_BOOST */
  14808. +/*
  14809. + * Control variables for per-CPU and per-rcu_node kthreads. These
  14810. + * handle all flavors of RCU.
  14811. + */
  14812. +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  14813. +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  14814. +DEFINE_PER_CPU(char, rcu_cpu_has_work);
  14815. +
  14816. #ifdef CONFIG_RCU_NOCB_CPU
  14817. static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
  14818. static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
  14819. @@ -426,7 +419,7 @@
  14820. }
  14821. /* Hardware IRQ handlers cannot block, complain if they get here. */
  14822. - if (in_irq() || in_serving_softirq()) {
  14823. + if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) {
  14824. lockdep_rcu_suspicious(__FILE__, __LINE__,
  14825. "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
  14826. pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",
  14827. @@ -632,15 +625,6 @@
  14828. t->rcu_read_unlock_special.b.need_qs = true;
  14829. }
  14830. -#ifdef CONFIG_RCU_BOOST
  14831. -
  14832. -static void rcu_preempt_do_callbacks(void)
  14833. -{
  14834. - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
  14835. -}
  14836. -
  14837. -#endif /* #ifdef CONFIG_RCU_BOOST */
  14838. -
  14839. /*
  14840. * Queue a preemptible-RCU callback for invocation after a grace period.
  14841. */
  14842. @@ -829,6 +813,19 @@
  14843. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  14844. +/*
  14845. + * If boosting, set rcuc kthreads to realtime priority.
  14846. + */
  14847. +static void rcu_cpu_kthread_setup(unsigned int cpu)
  14848. +{
  14849. +#ifdef CONFIG_RCU_BOOST
  14850. + struct sched_param sp;
  14851. +
  14852. + sp.sched_priority = kthread_prio;
  14853. + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  14854. +#endif /* #ifdef CONFIG_RCU_BOOST */
  14855. +}
  14856. +
  14857. #ifdef CONFIG_RCU_BOOST
  14858. #include "../locking/rtmutex_common.h"
  14859. @@ -860,16 +857,6 @@
  14860. #endif /* #else #ifdef CONFIG_RCU_TRACE */
  14861. -static void rcu_wake_cond(struct task_struct *t, int status)
  14862. -{
  14863. - /*
  14864. - * If the thread is yielding, only wake it when this
  14865. - * is invoked from idle
  14866. - */
  14867. - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
  14868. - wake_up_process(t);
  14869. -}
  14870. -
  14871. /*
  14872. * Carry out RCU priority boosting on the task indicated by ->exp_tasks
  14873. * or ->boost_tasks, advancing the pointer to the next task in the
  14874. @@ -1013,23 +1000,6 @@
  14875. }
  14876. /*
  14877. - * Wake up the per-CPU kthread to invoke RCU callbacks.
  14878. - */
  14879. -static void invoke_rcu_callbacks_kthread(void)
  14880. -{
  14881. - unsigned long flags;
  14882. -
  14883. - local_irq_save(flags);
  14884. - __this_cpu_write(rcu_cpu_has_work, 1);
  14885. - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
  14886. - current != __this_cpu_read(rcu_cpu_kthread_task)) {
  14887. - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
  14888. - __this_cpu_read(rcu_cpu_kthread_status));
  14889. - }
  14890. - local_irq_restore(flags);
  14891. -}
  14892. -
  14893. -/*
  14894. * Is the current CPU running the RCU-callbacks kthread?
  14895. * Caller must have preemption disabled.
  14896. */
  14897. @@ -1083,67 +1053,6 @@
  14898. return 0;
  14899. }
  14900. -static void rcu_kthread_do_work(void)
  14901. -{
  14902. - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
  14903. - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
  14904. - rcu_preempt_do_callbacks();
  14905. -}
  14906. -
  14907. -static void rcu_cpu_kthread_setup(unsigned int cpu)
  14908. -{
  14909. - struct sched_param sp;
  14910. -
  14911. - sp.sched_priority = kthread_prio;
  14912. - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  14913. -}
  14914. -
  14915. -static void rcu_cpu_kthread_park(unsigned int cpu)
  14916. -{
  14917. - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  14918. -}
  14919. -
  14920. -static int rcu_cpu_kthread_should_run(unsigned int cpu)
  14921. -{
  14922. - return __this_cpu_read(rcu_cpu_has_work);
  14923. -}
  14924. -
  14925. -/*
  14926. - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  14927. - * RCU softirq used in flavors and configurations of RCU that do not
  14928. - * support RCU priority boosting.
  14929. - */
  14930. -static void rcu_cpu_kthread(unsigned int cpu)
  14931. -{
  14932. - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
  14933. - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
  14934. - int spincnt;
  14935. -
  14936. - for (spincnt = 0; spincnt < 10; spincnt++) {
  14937. - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
  14938. - local_bh_disable();
  14939. - *statusp = RCU_KTHREAD_RUNNING;
  14940. - this_cpu_inc(rcu_cpu_kthread_loops);
  14941. - local_irq_disable();
  14942. - work = *workp;
  14943. - *workp = 0;
  14944. - local_irq_enable();
  14945. - if (work)
  14946. - rcu_kthread_do_work();
  14947. - local_bh_enable();
  14948. - if (*workp == 0) {
  14949. - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
  14950. - *statusp = RCU_KTHREAD_WAITING;
  14951. - return;
  14952. - }
  14953. - }
  14954. - *statusp = RCU_KTHREAD_YIELDING;
  14955. - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
  14956. - schedule_timeout_interruptible(2);
  14957. - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
  14958. - *statusp = RCU_KTHREAD_WAITING;
  14959. -}
  14960. -
  14961. /*
  14962. * Set the per-rcu_node kthread's affinity to cover all CPUs that are
  14963. * served by the rcu_node in question. The CPU hotplug lock is still
  14964. @@ -1174,26 +1083,12 @@
  14965. free_cpumask_var(cm);
  14966. }
  14967. -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  14968. - .store = &rcu_cpu_kthread_task,
  14969. - .thread_should_run = rcu_cpu_kthread_should_run,
  14970. - .thread_fn = rcu_cpu_kthread,
  14971. - .thread_comm = "rcuc/%u",
  14972. - .setup = rcu_cpu_kthread_setup,
  14973. - .park = rcu_cpu_kthread_park,
  14974. -};
  14975. -
  14976. /*
  14977. * Spawn boost kthreads -- called as soon as the scheduler is running.
  14978. */
  14979. static void __init rcu_spawn_boost_kthreads(void)
  14980. {
  14981. struct rcu_node *rnp;
  14982. - int cpu;
  14983. -
  14984. - for_each_possible_cpu(cpu)
  14985. - per_cpu(rcu_cpu_has_work, cpu) = 0;
  14986. - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  14987. rcu_for_each_leaf_node(rcu_state_p, rnp)
  14988. (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
  14989. }
  14990. @@ -1216,11 +1111,6 @@
  14991. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  14992. }
  14993. -static void invoke_rcu_callbacks_kthread(void)
  14994. -{
  14995. - WARN_ON_ONCE(1);
  14996. -}
  14997. -
  14998. static bool rcu_is_callbacks_kthread(void)
  14999. {
  15000. return false;
  15001. @@ -1244,7 +1134,7 @@
  15002. #endif /* #else #ifdef CONFIG_RCU_BOOST */
  15003. -#if !defined(CONFIG_RCU_FAST_NO_HZ)
  15004. +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL)
  15005. /*
  15006. * Check to see if any future RCU-related work will need to be done
  15007. @@ -1261,7 +1151,9 @@
  15008. return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
  15009. ? 0 : rcu_cpu_has_callbacks(NULL);
  15010. }
  15011. +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */
  15012. +#if !defined(CONFIG_RCU_FAST_NO_HZ)
  15013. /*
  15014. * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
  15015. * after it.
  15016. @@ -1357,6 +1249,8 @@
  15017. return cbs_ready;
  15018. }
  15019. +#ifndef CONFIG_PREEMPT_RT_FULL
  15020. +
  15021. /*
  15022. * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
  15023. * to invoke. If the CPU has callbacks, try to advance them. Tell the
  15024. @@ -1402,6 +1296,7 @@
  15025. *nextevt = basemono + dj * TICK_NSEC;
  15026. return 0;
  15027. }
  15028. +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */
  15029. /*
  15030. * Prepare a CPU for idle from an RCU perspective. The first major task
  15031. diff -Nur linux-4.8.15.orig/kernel/rcu/update.c linux-4.8.15/kernel/rcu/update.c
  15032. --- linux-4.8.15.orig/kernel/rcu/update.c 2016-12-15 17:50:48.000000000 +0100
  15033. +++ linux-4.8.15/kernel/rcu/update.c 2017-01-01 17:07:16.031426957 +0100
  15034. @@ -63,7 +63,7 @@
  15035. #ifndef CONFIG_TINY_RCU
  15036. module_param(rcu_expedited, int, 0);
  15037. module_param(rcu_normal, int, 0);
  15038. -static int rcu_normal_after_boot;
  15039. +static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT_FULL);
  15040. module_param(rcu_normal_after_boot, int, 0);
  15041. #endif /* #ifndef CONFIG_TINY_RCU */
  15042. @@ -130,8 +130,7 @@
  15043. }
  15044. EXPORT_SYMBOL_GPL(rcu_gp_is_normal);
  15045. -static atomic_t rcu_expedited_nesting =
  15046. - ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0);
  15047. +static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1);
  15048. /*
  15049. * Should normal grace-period primitives be expedited? Intended for
  15050. @@ -179,8 +178,7 @@
  15051. */
  15052. void rcu_end_inkernel_boot(void)
  15053. {
  15054. - if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT))
  15055. - rcu_unexpedite_gp();
  15056. + rcu_unexpedite_gp();
  15057. if (rcu_normal_after_boot)
  15058. WRITE_ONCE(rcu_normal, 1);
  15059. }
  15060. @@ -295,6 +293,7 @@
  15061. }
  15062. EXPORT_SYMBOL_GPL(rcu_read_lock_held);
  15063. +#ifndef CONFIG_PREEMPT_RT_FULL
  15064. /**
  15065. * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
  15066. *
  15067. @@ -321,6 +320,7 @@
  15068. return in_softirq() || irqs_disabled();
  15069. }
  15070. EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
  15071. +#endif
  15072. #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
  15073. diff -Nur linux-4.8.15.orig/kernel/relay.c linux-4.8.15/kernel/relay.c
  15074. --- linux-4.8.15.orig/kernel/relay.c 2016-12-15 17:50:48.000000000 +0100
  15075. +++ linux-4.8.15/kernel/relay.c 2017-01-01 17:07:16.031426957 +0100
  15076. @@ -336,6 +336,10 @@
  15077. {
  15078. struct rchan_buf *buf = (struct rchan_buf *)data;
  15079. wake_up_interruptible(&buf->read_wait);
  15080. + /*
  15081. + * Stupid polling for now:
  15082. + */
  15083. + mod_timer(&buf->timer, jiffies + 1);
  15084. }
  15085. /**
  15086. @@ -353,6 +357,7 @@
  15087. init_waitqueue_head(&buf->read_wait);
  15088. kref_init(&buf->kref);
  15089. setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
  15090. + mod_timer(&buf->timer, jiffies + 1);
  15091. } else
  15092. del_timer_sync(&buf->timer);
  15093. @@ -767,15 +772,6 @@
  15094. else
  15095. buf->early_bytes += buf->chan->subbuf_size -
  15096. buf->padding[old_subbuf];
  15097. - smp_mb();
  15098. - if (waitqueue_active(&buf->read_wait))
  15099. - /*
  15100. - * Calling wake_up_interruptible() from here
  15101. - * will deadlock if we happen to be logging
  15102. - * from the scheduler (trying to re-grab
  15103. - * rq->lock), so defer it.
  15104. - */
  15105. - mod_timer(&buf->timer, jiffies + 1);
  15106. }
  15107. old = buf->data;
  15108. diff -Nur linux-4.8.15.orig/kernel/sched/completion.c linux-4.8.15/kernel/sched/completion.c
  15109. --- linux-4.8.15.orig/kernel/sched/completion.c 2016-12-15 17:50:48.000000000 +0100
  15110. +++ linux-4.8.15/kernel/sched/completion.c 2017-01-01 17:07:16.031426957 +0100
  15111. @@ -30,10 +30,10 @@
  15112. {
  15113. unsigned long flags;
  15114. - spin_lock_irqsave(&x->wait.lock, flags);
  15115. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  15116. x->done++;
  15117. - __wake_up_locked(&x->wait, TASK_NORMAL, 1);
  15118. - spin_unlock_irqrestore(&x->wait.lock, flags);
  15119. + swake_up_locked(&x->wait);
  15120. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  15121. }
  15122. EXPORT_SYMBOL(complete);
  15123. @@ -50,10 +50,10 @@
  15124. {
  15125. unsigned long flags;
  15126. - spin_lock_irqsave(&x->wait.lock, flags);
  15127. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  15128. x->done += UINT_MAX/2;
  15129. - __wake_up_locked(&x->wait, TASK_NORMAL, 0);
  15130. - spin_unlock_irqrestore(&x->wait.lock, flags);
  15131. + swake_up_all_locked(&x->wait);
  15132. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  15133. }
  15134. EXPORT_SYMBOL(complete_all);
  15135. @@ -62,20 +62,20 @@
  15136. long (*action)(long), long timeout, int state)
  15137. {
  15138. if (!x->done) {
  15139. - DECLARE_WAITQUEUE(wait, current);
  15140. + DECLARE_SWAITQUEUE(wait);
  15141. - __add_wait_queue_tail_exclusive(&x->wait, &wait);
  15142. + __prepare_to_swait(&x->wait, &wait);
  15143. do {
  15144. if (signal_pending_state(state, current)) {
  15145. timeout = -ERESTARTSYS;
  15146. break;
  15147. }
  15148. __set_current_state(state);
  15149. - spin_unlock_irq(&x->wait.lock);
  15150. + raw_spin_unlock_irq(&x->wait.lock);
  15151. timeout = action(timeout);
  15152. - spin_lock_irq(&x->wait.lock);
  15153. + raw_spin_lock_irq(&x->wait.lock);
  15154. } while (!x->done && timeout);
  15155. - __remove_wait_queue(&x->wait, &wait);
  15156. + __finish_swait(&x->wait, &wait);
  15157. if (!x->done)
  15158. return timeout;
  15159. }
  15160. @@ -89,9 +89,9 @@
  15161. {
  15162. might_sleep();
  15163. - spin_lock_irq(&x->wait.lock);
  15164. + raw_spin_lock_irq(&x->wait.lock);
  15165. timeout = do_wait_for_common(x, action, timeout, state);
  15166. - spin_unlock_irq(&x->wait.lock);
  15167. + raw_spin_unlock_irq(&x->wait.lock);
  15168. return timeout;
  15169. }
  15170. @@ -277,12 +277,12 @@
  15171. if (!READ_ONCE(x->done))
  15172. return 0;
  15173. - spin_lock_irqsave(&x->wait.lock, flags);
  15174. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  15175. if (!x->done)
  15176. ret = 0;
  15177. else
  15178. x->done--;
  15179. - spin_unlock_irqrestore(&x->wait.lock, flags);
  15180. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  15181. return ret;
  15182. }
  15183. EXPORT_SYMBOL(try_wait_for_completion);
  15184. @@ -311,7 +311,7 @@
  15185. * after it's acquired the lock.
  15186. */
  15187. smp_rmb();
  15188. - spin_unlock_wait(&x->wait.lock);
  15189. + raw_spin_unlock_wait(&x->wait.lock);
  15190. return true;
  15191. }
  15192. EXPORT_SYMBOL(completion_done);
  15193. diff -Nur linux-4.8.15.orig/kernel/sched/core.c linux-4.8.15/kernel/sched/core.c
  15194. --- linux-4.8.15.orig/kernel/sched/core.c 2016-12-15 17:50:48.000000000 +0100
  15195. +++ linux-4.8.15/kernel/sched/core.c 2017-01-01 17:07:16.035427212 +0100
  15196. @@ -129,7 +129,11 @@
  15197. * Number of tasks to iterate in a single balance run.
  15198. * Limited because this is done with IRQs disabled.
  15199. */
  15200. +#ifndef CONFIG_PREEMPT_RT_FULL
  15201. const_debug unsigned int sysctl_sched_nr_migrate = 32;
  15202. +#else
  15203. +const_debug unsigned int sysctl_sched_nr_migrate = 8;
  15204. +#endif
  15205. /*
  15206. * period over which we average the RT time consumption, measured
  15207. @@ -345,6 +349,7 @@
  15208. hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  15209. rq->hrtick_timer.function = hrtick;
  15210. + rq->hrtick_timer.irqsafe = 1;
  15211. }
  15212. #else /* CONFIG_SCHED_HRTICK */
  15213. static inline void hrtick_clear(struct rq *rq)
  15214. @@ -449,7 +454,7 @@
  15215. head->lastp = &node->next;
  15216. }
  15217. -void wake_up_q(struct wake_q_head *head)
  15218. +void __wake_up_q(struct wake_q_head *head, bool sleeper)
  15219. {
  15220. struct wake_q_node *node = head->first;
  15221. @@ -466,7 +471,10 @@
  15222. * wake_up_process() implies a wmb() to pair with the queueing
  15223. * in wake_q_add() so as not to miss wakeups.
  15224. */
  15225. - wake_up_process(task);
  15226. + if (sleeper)
  15227. + wake_up_lock_sleeper(task);
  15228. + else
  15229. + wake_up_process(task);
  15230. put_task_struct(task);
  15231. }
  15232. }
  15233. @@ -502,6 +510,38 @@
  15234. trace_sched_wake_idle_without_ipi(cpu);
  15235. }
  15236. +#ifdef CONFIG_PREEMPT_LAZY
  15237. +void resched_curr_lazy(struct rq *rq)
  15238. +{
  15239. + struct task_struct *curr = rq->curr;
  15240. + int cpu;
  15241. +
  15242. + if (!sched_feat(PREEMPT_LAZY)) {
  15243. + resched_curr(rq);
  15244. + return;
  15245. + }
  15246. +
  15247. + lockdep_assert_held(&rq->lock);
  15248. +
  15249. + if (test_tsk_need_resched(curr))
  15250. + return;
  15251. +
  15252. + if (test_tsk_need_resched_lazy(curr))
  15253. + return;
  15254. +
  15255. + set_tsk_need_resched_lazy(curr);
  15256. +
  15257. + cpu = cpu_of(rq);
  15258. + if (cpu == smp_processor_id())
  15259. + return;
  15260. +
  15261. + /* NEED_RESCHED_LAZY must be visible before we test polling */
  15262. + smp_mb();
  15263. + if (!tsk_is_polling(curr))
  15264. + smp_send_reschedule(cpu);
  15265. +}
  15266. +#endif
  15267. +
  15268. void resched_cpu(int cpu)
  15269. {
  15270. struct rq *rq = cpu_rq(cpu);
  15271. @@ -525,11 +565,14 @@
  15272. */
  15273. int get_nohz_timer_target(void)
  15274. {
  15275. - int i, cpu = smp_processor_id();
  15276. + int i, cpu;
  15277. struct sched_domain *sd;
  15278. + preempt_disable_rt();
  15279. + cpu = smp_processor_id();
  15280. +
  15281. if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu))
  15282. - return cpu;
  15283. + goto preempt_en_rt;
  15284. rcu_read_lock();
  15285. for_each_domain(cpu, sd) {
  15286. @@ -548,6 +591,8 @@
  15287. cpu = housekeeping_any_cpu();
  15288. unlock:
  15289. rcu_read_unlock();
  15290. +preempt_en_rt:
  15291. + preempt_enable_rt();
  15292. return cpu;
  15293. }
  15294. /*
  15295. @@ -1089,6 +1134,11 @@
  15296. lockdep_assert_held(&p->pi_lock);
  15297. + if (__migrate_disabled(p)) {
  15298. + cpumask_copy(&p->cpus_allowed, new_mask);
  15299. + return;
  15300. + }
  15301. +
  15302. queued = task_on_rq_queued(p);
  15303. running = task_current(rq, p);
  15304. @@ -1111,6 +1161,84 @@
  15305. enqueue_task(rq, p, ENQUEUE_RESTORE);
  15306. }
  15307. +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
  15308. +static DEFINE_MUTEX(sched_down_mutex);
  15309. +static cpumask_t sched_down_cpumask;
  15310. +
  15311. +void tell_sched_cpu_down_begin(int cpu)
  15312. +{
  15313. + mutex_lock(&sched_down_mutex);
  15314. + cpumask_set_cpu(cpu, &sched_down_cpumask);
  15315. + mutex_unlock(&sched_down_mutex);
  15316. +}
  15317. +
  15318. +void tell_sched_cpu_down_done(int cpu)
  15319. +{
  15320. + mutex_lock(&sched_down_mutex);
  15321. + cpumask_clear_cpu(cpu, &sched_down_cpumask);
  15322. + mutex_unlock(&sched_down_mutex);
  15323. +}
  15324. +
  15325. +/**
  15326. + * migrate_me - try to move the current task off this cpu
  15327. + *
  15328. + * Used by the pin_current_cpu() code to try to get tasks
  15329. + * to move off the current CPU as it is going down.
  15330. + * It will only move the task if the task isn't pinned to
  15331. + * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
  15332. + * and the task has to be in a RUNNING state. Otherwise the
  15333. + * movement of the task will wake it up (change its state
  15334. + * to running) when the task did not expect it.
  15335. + *
  15336. + * Returns 1 if it succeeded in moving the current task
  15337. + * 0 otherwise.
  15338. + */
  15339. +int migrate_me(void)
  15340. +{
  15341. + struct task_struct *p = current;
  15342. + struct migration_arg arg;
  15343. + struct cpumask *cpumask;
  15344. + struct cpumask *mask;
  15345. + unsigned int dest_cpu;
  15346. + struct rq_flags rf;
  15347. + struct rq *rq;
  15348. +
  15349. + /*
  15350. + * We can not migrate tasks bounded to a CPU or tasks not
  15351. + * running. The movement of the task will wake it up.
  15352. + */
  15353. + if (p->flags & PF_NO_SETAFFINITY || p->state)
  15354. + return 0;
  15355. +
  15356. + mutex_lock(&sched_down_mutex);
  15357. + rq = task_rq_lock(p, &rf);
  15358. +
  15359. + cpumask = this_cpu_ptr(&sched_cpumasks);
  15360. + mask = &p->cpus_allowed;
  15361. +
  15362. + cpumask_andnot(cpumask, mask, &sched_down_cpumask);
  15363. +
  15364. + if (!cpumask_weight(cpumask)) {
  15365. + /* It's only on this CPU? */
  15366. + task_rq_unlock(rq, p, &rf);
  15367. + mutex_unlock(&sched_down_mutex);
  15368. + return 0;
  15369. + }
  15370. +
  15371. + dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
  15372. +
  15373. + arg.task = p;
  15374. + arg.dest_cpu = dest_cpu;
  15375. +
  15376. + task_rq_unlock(rq, p, &rf);
  15377. +
  15378. + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
  15379. + tlb_migrate_finish(p->mm);
  15380. + mutex_unlock(&sched_down_mutex);
  15381. +
  15382. + return 1;
  15383. +}
  15384. +
  15385. /*
  15386. * Change a given task's CPU affinity. Migrate the thread to a
  15387. * proper CPU and schedule it away if the CPU it's executing on
  15388. @@ -1168,7 +1296,7 @@
  15389. }
  15390. /* Can the task run on the task's current CPU? If so, we're done */
  15391. - if (cpumask_test_cpu(task_cpu(p), new_mask))
  15392. + if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
  15393. goto out;
  15394. dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
  15395. @@ -1355,6 +1483,18 @@
  15396. return ret;
  15397. }
  15398. +static bool check_task_state(struct task_struct *p, long match_state)
  15399. +{
  15400. + bool match = false;
  15401. +
  15402. + raw_spin_lock_irq(&p->pi_lock);
  15403. + if (p->state == match_state || p->saved_state == match_state)
  15404. + match = true;
  15405. + raw_spin_unlock_irq(&p->pi_lock);
  15406. +
  15407. + return match;
  15408. +}
  15409. +
  15410. /*
  15411. * wait_task_inactive - wait for a thread to unschedule.
  15412. *
  15413. @@ -1399,7 +1539,7 @@
  15414. * is actually now running somewhere else!
  15415. */
  15416. while (task_running(rq, p)) {
  15417. - if (match_state && unlikely(p->state != match_state))
  15418. + if (match_state && !check_task_state(p, match_state))
  15419. return 0;
  15420. cpu_relax();
  15421. }
  15422. @@ -1414,7 +1554,8 @@
  15423. running = task_running(rq, p);
  15424. queued = task_on_rq_queued(p);
  15425. ncsw = 0;
  15426. - if (!match_state || p->state == match_state)
  15427. + if (!match_state || p->state == match_state ||
  15428. + p->saved_state == match_state)
  15429. ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
  15430. task_rq_unlock(rq, p, &rf);
  15431. @@ -1670,10 +1811,6 @@
  15432. {
  15433. activate_task(rq, p, en_flags);
  15434. p->on_rq = TASK_ON_RQ_QUEUED;
  15435. -
  15436. - /* if a worker is waking up, notify workqueue */
  15437. - if (p->flags & PF_WQ_WORKER)
  15438. - wq_worker_waking_up(p, cpu_of(rq));
  15439. }
  15440. /*
  15441. @@ -2008,8 +2145,27 @@
  15442. */
  15443. smp_mb__before_spinlock();
  15444. raw_spin_lock_irqsave(&p->pi_lock, flags);
  15445. - if (!(p->state & state))
  15446. + if (!(p->state & state)) {
  15447. + /*
  15448. + * The task might be running due to a spinlock sleeper
  15449. + * wakeup. Check the saved state and set it to running
  15450. + * if the wakeup condition is true.
  15451. + */
  15452. + if (!(wake_flags & WF_LOCK_SLEEPER)) {
  15453. + if (p->saved_state & state) {
  15454. + p->saved_state = TASK_RUNNING;
  15455. + success = 1;
  15456. + }
  15457. + }
  15458. goto out;
  15459. + }
  15460. +
  15461. + /*
  15462. + * If this is a regular wakeup, then we can unconditionally
  15463. + * clear the saved state of a "lock sleeper".
  15464. + */
  15465. + if (!(wake_flags & WF_LOCK_SLEEPER))
  15466. + p->saved_state = TASK_RUNNING;
  15467. trace_sched_waking(p);
  15468. @@ -2093,53 +2249,6 @@
  15469. }
  15470. /**
  15471. - * try_to_wake_up_local - try to wake up a local task with rq lock held
  15472. - * @p: the thread to be awakened
  15473. - *
  15474. - * Put @p on the run-queue if it's not already there. The caller must
  15475. - * ensure that this_rq() is locked, @p is bound to this_rq() and not
  15476. - * the current task.
  15477. - */
  15478. -static void try_to_wake_up_local(struct task_struct *p, struct pin_cookie cookie)
  15479. -{
  15480. - struct rq *rq = task_rq(p);
  15481. -
  15482. - if (WARN_ON_ONCE(rq != this_rq()) ||
  15483. - WARN_ON_ONCE(p == current))
  15484. - return;
  15485. -
  15486. - lockdep_assert_held(&rq->lock);
  15487. -
  15488. - if (!raw_spin_trylock(&p->pi_lock)) {
  15489. - /*
  15490. - * This is OK, because current is on_cpu, which avoids it being
  15491. - * picked for load-balance and preemption/IRQs are still
  15492. - * disabled avoiding further scheduler activity on it and we've
  15493. - * not yet picked a replacement task.
  15494. - */
  15495. - lockdep_unpin_lock(&rq->lock, cookie);
  15496. - raw_spin_unlock(&rq->lock);
  15497. - raw_spin_lock(&p->pi_lock);
  15498. - raw_spin_lock(&rq->lock);
  15499. - lockdep_repin_lock(&rq->lock, cookie);
  15500. - }
  15501. -
  15502. - if (!(p->state & TASK_NORMAL))
  15503. - goto out;
  15504. -
  15505. - trace_sched_waking(p);
  15506. -
  15507. - if (!task_on_rq_queued(p))
  15508. - ttwu_activate(rq, p, ENQUEUE_WAKEUP);
  15509. -
  15510. - ttwu_do_wakeup(rq, p, 0, cookie);
  15511. - if (schedstat_enabled())
  15512. - ttwu_stat(p, smp_processor_id(), 0);
  15513. -out:
  15514. - raw_spin_unlock(&p->pi_lock);
  15515. -}
  15516. -
  15517. -/**
  15518. * wake_up_process - Wake up a specific process
  15519. * @p: The process to be woken up.
  15520. *
  15521. @@ -2157,6 +2266,18 @@
  15522. }
  15523. EXPORT_SYMBOL(wake_up_process);
  15524. +/**
  15525. + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
  15526. + * @p: The process to be woken up.
  15527. + *
  15528. + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
  15529. + * the nature of the wakeup.
  15530. + */
  15531. +int wake_up_lock_sleeper(struct task_struct *p)
  15532. +{
  15533. + return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
  15534. +}
  15535. +
  15536. int wake_up_state(struct task_struct *p, unsigned int state)
  15537. {
  15538. return try_to_wake_up(p, state, 0);
  15539. @@ -2433,6 +2554,9 @@
  15540. p->on_cpu = 0;
  15541. #endif
  15542. init_task_preempt_count(p);
  15543. +#ifdef CONFIG_HAVE_PREEMPT_LAZY
  15544. + task_thread_info(p)->preempt_lazy_count = 0;
  15545. +#endif
  15546. #ifdef CONFIG_SMP
  15547. plist_node_init(&p->pushable_tasks, MAX_PRIO);
  15548. RB_CLEAR_NODE(&p->pushable_dl_tasks);
  15549. @@ -2761,8 +2885,12 @@
  15550. finish_arch_post_lock_switch();
  15551. fire_sched_in_preempt_notifiers(current);
  15552. + /*
  15553. + * We use mmdrop_delayed() here so we don't have to do the
  15554. + * full __mmdrop() when we are the last user.
  15555. + */
  15556. if (mm)
  15557. - mmdrop(mm);
  15558. + mmdrop_delayed(mm);
  15559. if (unlikely(prev_state == TASK_DEAD)) {
  15560. if (prev->sched_class->task_dead)
  15561. prev->sched_class->task_dead(prev);
  15562. @@ -3237,6 +3365,77 @@
  15563. schedstat_inc(this_rq(), sched_count);
  15564. }
  15565. +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
  15566. +
  15567. +void migrate_disable(void)
  15568. +{
  15569. + struct task_struct *p = current;
  15570. +
  15571. + if (in_atomic() || irqs_disabled()) {
  15572. +#ifdef CONFIG_SCHED_DEBUG
  15573. + p->migrate_disable_atomic++;
  15574. +#endif
  15575. + return;
  15576. + }
  15577. +
  15578. +#ifdef CONFIG_SCHED_DEBUG
  15579. + if (unlikely(p->migrate_disable_atomic)) {
  15580. + tracing_off();
  15581. + WARN_ON_ONCE(1);
  15582. + }
  15583. +#endif
  15584. +
  15585. + if (p->migrate_disable) {
  15586. + p->migrate_disable++;
  15587. + return;
  15588. + }
  15589. +
  15590. + preempt_disable();
  15591. + preempt_lazy_disable();
  15592. + pin_current_cpu();
  15593. + p->migrate_disable = 1;
  15594. + preempt_enable();
  15595. +}
  15596. +EXPORT_SYMBOL(migrate_disable);
  15597. +
  15598. +void migrate_enable(void)
  15599. +{
  15600. + struct task_struct *p = current;
  15601. +
  15602. + if (in_atomic() || irqs_disabled()) {
  15603. +#ifdef CONFIG_SCHED_DEBUG
  15604. + p->migrate_disable_atomic--;
  15605. +#endif
  15606. + return;
  15607. + }
  15608. +
  15609. +#ifdef CONFIG_SCHED_DEBUG
  15610. + if (unlikely(p->migrate_disable_atomic)) {
  15611. + tracing_off();
  15612. + WARN_ON_ONCE(1);
  15613. + }
  15614. +#endif
  15615. + WARN_ON_ONCE(p->migrate_disable <= 0);
  15616. +
  15617. + if (p->migrate_disable > 1) {
  15618. + p->migrate_disable--;
  15619. + return;
  15620. + }
  15621. +
  15622. + preempt_disable();
  15623. + /*
  15624. + * Clearing migrate_disable causes tsk_cpus_allowed to
  15625. + * show the tasks original cpu affinity.
  15626. + */
  15627. + p->migrate_disable = 0;
  15628. +
  15629. + unpin_current_cpu();
  15630. + preempt_enable();
  15631. + preempt_lazy_enable();
  15632. +}
  15633. +EXPORT_SYMBOL(migrate_enable);
  15634. +#endif
  15635. +
  15636. /*
  15637. * Pick up the highest-prio task:
  15638. */
  15639. @@ -3364,19 +3563,6 @@
  15640. } else {
  15641. deactivate_task(rq, prev, DEQUEUE_SLEEP);
  15642. prev->on_rq = 0;
  15643. -
  15644. - /*
  15645. - * If a worker went to sleep, notify and ask workqueue
  15646. - * whether it wants to wake up a task to maintain
  15647. - * concurrency.
  15648. - */
  15649. - if (prev->flags & PF_WQ_WORKER) {
  15650. - struct task_struct *to_wakeup;
  15651. -
  15652. - to_wakeup = wq_worker_sleeping(prev);
  15653. - if (to_wakeup)
  15654. - try_to_wake_up_local(to_wakeup, cookie);
  15655. - }
  15656. }
  15657. switch_count = &prev->nvcsw;
  15658. }
  15659. @@ -3386,6 +3572,7 @@
  15660. next = pick_next_task(rq, prev, cookie);
  15661. clear_tsk_need_resched(prev);
  15662. + clear_tsk_need_resched_lazy(prev);
  15663. clear_preempt_need_resched();
  15664. rq->clock_skip_update = 0;
  15665. @@ -3407,9 +3594,20 @@
  15666. static inline void sched_submit_work(struct task_struct *tsk)
  15667. {
  15668. - if (!tsk->state || tsk_is_pi_blocked(tsk))
  15669. + if (!tsk->state)
  15670. return;
  15671. /*
  15672. + * If a worker went to sleep, notify and ask workqueue whether
  15673. + * it wants to wake up a task to maintain concurrency.
  15674. + */
  15675. + if (tsk->flags & PF_WQ_WORKER)
  15676. + wq_worker_sleeping(tsk);
  15677. +
  15678. +
  15679. + if (tsk_is_pi_blocked(tsk))
  15680. + return;
  15681. +
  15682. + /*
  15683. * If we are going to sleep and we have plugged IO queued,
  15684. * make sure to submit it to avoid deadlocks.
  15685. */
  15686. @@ -3417,6 +3615,12 @@
  15687. blk_schedule_flush_plug(tsk);
  15688. }
  15689. +static void sched_update_worker(struct task_struct *tsk)
  15690. +{
  15691. + if (tsk->flags & PF_WQ_WORKER)
  15692. + wq_worker_running(tsk);
  15693. +}
  15694. +
  15695. asmlinkage __visible void __sched schedule(void)
  15696. {
  15697. struct task_struct *tsk = current;
  15698. @@ -3427,6 +3631,7 @@
  15699. __schedule(false);
  15700. sched_preempt_enable_no_resched();
  15701. } while (need_resched());
  15702. + sched_update_worker(tsk);
  15703. }
  15704. EXPORT_SYMBOL(schedule);
  15705. @@ -3490,6 +3695,30 @@
  15706. } while (need_resched());
  15707. }
  15708. +#ifdef CONFIG_PREEMPT_LAZY
  15709. +/*
  15710. + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
  15711. + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
  15712. + * preempt_lazy_count counter >0.
  15713. + */
  15714. +static __always_inline int preemptible_lazy(void)
  15715. +{
  15716. + if (test_thread_flag(TIF_NEED_RESCHED))
  15717. + return 1;
  15718. + if (current_thread_info()->preempt_lazy_count)
  15719. + return 0;
  15720. + return 1;
  15721. +}
  15722. +
  15723. +#else
  15724. +
  15725. +static inline int preemptible_lazy(void)
  15726. +{
  15727. + return 1;
  15728. +}
  15729. +
  15730. +#endif
  15731. +
  15732. #ifdef CONFIG_PREEMPT
  15733. /*
  15734. * this is the entry point to schedule() from in-kernel preemption
  15735. @@ -3504,7 +3733,8 @@
  15736. */
  15737. if (likely(!preemptible()))
  15738. return;
  15739. -
  15740. + if (!preemptible_lazy())
  15741. + return;
  15742. preempt_schedule_common();
  15743. }
  15744. NOKPROBE_SYMBOL(preempt_schedule);
  15745. @@ -3531,6 +3761,9 @@
  15746. if (likely(!preemptible()))
  15747. return;
  15748. + if (!preemptible_lazy())
  15749. + return;
  15750. +
  15751. do {
  15752. /*
  15753. * Because the function tracer can trace preempt_count_sub()
  15754. @@ -3553,7 +3786,16 @@
  15755. * an infinite recursion.
  15756. */
  15757. prev_ctx = exception_enter();
  15758. + /*
  15759. + * The add/subtract must not be traced by the function
  15760. + * tracer. But we still want to account for the
  15761. + * preempt off latency tracer. Since the _notrace versions
  15762. + * of add/subtract skip the accounting for latency tracer
  15763. + * we must force it manually.
  15764. + */
  15765. + start_critical_timings();
  15766. __schedule(true);
  15767. + stop_critical_timings();
  15768. exception_exit(prev_ctx);
  15769. preempt_latency_stop(1);
  15770. @@ -4901,6 +5143,7 @@
  15771. }
  15772. EXPORT_SYMBOL(__cond_resched_lock);
  15773. +#ifndef CONFIG_PREEMPT_RT_FULL
  15774. int __sched __cond_resched_softirq(void)
  15775. {
  15776. BUG_ON(!in_softirq());
  15777. @@ -4914,6 +5157,7 @@
  15778. return 0;
  15779. }
  15780. EXPORT_SYMBOL(__cond_resched_softirq);
  15781. +#endif
  15782. /**
  15783. * yield - yield the current processor to other threads.
  15784. @@ -5283,7 +5527,9 @@
  15785. /* Set the preempt count _outside_ the spinlocks! */
  15786. init_idle_preempt_count(idle, cpu);
  15787. -
  15788. +#ifdef CONFIG_HAVE_PREEMPT_LAZY
  15789. + task_thread_info(idle)->preempt_lazy_count = 0;
  15790. +#endif
  15791. /*
  15792. * The idle tasks have their own, simple scheduling class:
  15793. */
  15794. @@ -5426,6 +5672,8 @@
  15795. #endif /* CONFIG_NUMA_BALANCING */
  15796. #ifdef CONFIG_HOTPLUG_CPU
  15797. +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
  15798. +
  15799. /*
  15800. * Ensures that the idle task is using init_mm right before its cpu goes
  15801. * offline.
  15802. @@ -5440,7 +5688,12 @@
  15803. switch_mm_irqs_off(mm, &init_mm, current);
  15804. finish_arch_post_lock_switch();
  15805. }
  15806. - mmdrop(mm);
  15807. + /*
  15808. + * Defer the cleanup to an alive cpu. On RT we can neither
  15809. + * call mmdrop() nor mmdrop_delayed() from here.
  15810. + */
  15811. + per_cpu(idle_last_mm, smp_processor_id()) = mm;
  15812. +
  15813. }
  15814. /*
  15815. @@ -7315,6 +7568,10 @@
  15816. update_max_interval();
  15817. nohz_balance_exit_idle(cpu);
  15818. hrtick_clear(rq);
  15819. + if (per_cpu(idle_last_mm, cpu)) {
  15820. + mmdrop_delayed(per_cpu(idle_last_mm, cpu));
  15821. + per_cpu(idle_last_mm, cpu) = NULL;
  15822. + }
  15823. return 0;
  15824. }
  15825. #endif
  15826. @@ -7566,7 +7823,7 @@
  15827. #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  15828. static inline int preempt_count_equals(int preempt_offset)
  15829. {
  15830. - int nested = preempt_count() + rcu_preempt_depth();
  15831. + int nested = preempt_count() + sched_rcu_preempt_depth();
  15832. return (nested == preempt_offset);
  15833. }
  15834. diff -Nur linux-4.8.15.orig/kernel/sched/deadline.c linux-4.8.15/kernel/sched/deadline.c
  15835. --- linux-4.8.15.orig/kernel/sched/deadline.c 2016-12-15 17:50:48.000000000 +0100
  15836. +++ linux-4.8.15/kernel/sched/deadline.c 2017-01-01 17:07:16.035427212 +0100
  15837. @@ -697,6 +697,7 @@
  15838. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  15839. timer->function = dl_task_timer;
  15840. + timer->irqsafe = 1;
  15841. }
  15842. static
  15843. diff -Nur linux-4.8.15.orig/kernel/sched/debug.c linux-4.8.15/kernel/sched/debug.c
  15844. --- linux-4.8.15.orig/kernel/sched/debug.c 2016-12-15 17:50:48.000000000 +0100
  15845. +++ linux-4.8.15/kernel/sched/debug.c 2017-01-01 17:07:16.035427212 +0100
  15846. @@ -552,6 +552,9 @@
  15847. P(rt_throttled);
  15848. PN(rt_time);
  15849. PN(rt_runtime);
  15850. +#ifdef CONFIG_SMP
  15851. + P(rt_nr_migratory);
  15852. +#endif
  15853. #undef PN
  15854. #undef P
  15855. @@ -947,6 +950,10 @@
  15856. #endif
  15857. P(policy);
  15858. P(prio);
  15859. +#ifdef CONFIG_PREEMPT_RT_FULL
  15860. + P(migrate_disable);
  15861. +#endif
  15862. + P(nr_cpus_allowed);
  15863. #undef PN
  15864. #undef __PN
  15865. #undef P
  15866. diff -Nur linux-4.8.15.orig/kernel/sched/fair.c linux-4.8.15/kernel/sched/fair.c
  15867. --- linux-4.8.15.orig/kernel/sched/fair.c 2016-12-15 17:50:48.000000000 +0100
  15868. +++ linux-4.8.15/kernel/sched/fair.c 2017-01-01 17:07:16.039427472 +0100
  15869. @@ -3508,7 +3508,7 @@
  15870. ideal_runtime = sched_slice(cfs_rq, curr);
  15871. delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
  15872. if (delta_exec > ideal_runtime) {
  15873. - resched_curr(rq_of(cfs_rq));
  15874. + resched_curr_lazy(rq_of(cfs_rq));
  15875. /*
  15876. * The current task ran long enough, ensure it doesn't get
  15877. * re-elected due to buddy favours.
  15878. @@ -3532,7 +3532,7 @@
  15879. return;
  15880. if (delta > ideal_runtime)
  15881. - resched_curr(rq_of(cfs_rq));
  15882. + resched_curr_lazy(rq_of(cfs_rq));
  15883. }
  15884. static void
  15885. @@ -3677,7 +3677,7 @@
  15886. * validating it and just reschedule.
  15887. */
  15888. if (queued) {
  15889. - resched_curr(rq_of(cfs_rq));
  15890. + resched_curr_lazy(rq_of(cfs_rq));
  15891. return;
  15892. }
  15893. /*
  15894. @@ -3859,7 +3859,7 @@
  15895. * hierarchy can be throttled
  15896. */
  15897. if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
  15898. - resched_curr(rq_of(cfs_rq));
  15899. + resched_curr_lazy(rq_of(cfs_rq));
  15900. }
  15901. static __always_inline
  15902. @@ -4487,7 +4487,7 @@
  15903. if (delta < 0) {
  15904. if (rq->curr == p)
  15905. - resched_curr(rq);
  15906. + resched_curr_lazy(rq);
  15907. return;
  15908. }
  15909. hrtick_start(rq, delta);
  15910. @@ -5676,7 +5676,7 @@
  15911. return;
  15912. preempt:
  15913. - resched_curr(rq);
  15914. + resched_curr_lazy(rq);
  15915. /*
  15916. * Only set the backward buddy when the current task is still
  15917. * on the rq. This can happen when a wakeup gets interleaved
  15918. @@ -8402,7 +8402,7 @@
  15919. * 'current' within the tree based on its new key value.
  15920. */
  15921. swap(curr->vruntime, se->vruntime);
  15922. - resched_curr(rq);
  15923. + resched_curr_lazy(rq);
  15924. }
  15925. se->vruntime -= cfs_rq->min_vruntime;
  15926. @@ -8426,7 +8426,7 @@
  15927. */
  15928. if (rq->curr == p) {
  15929. if (p->prio > oldprio)
  15930. - resched_curr(rq);
  15931. + resched_curr_lazy(rq);
  15932. } else
  15933. check_preempt_curr(rq, p, 0);
  15934. }
  15935. diff -Nur linux-4.8.15.orig/kernel/sched/features.h linux-4.8.15/kernel/sched/features.h
  15936. --- linux-4.8.15.orig/kernel/sched/features.h 2016-12-15 17:50:48.000000000 +0100
  15937. +++ linux-4.8.15/kernel/sched/features.h 2017-01-01 17:07:16.039427472 +0100
  15938. @@ -45,11 +45,19 @@
  15939. */
  15940. SCHED_FEAT(NONTASK_CAPACITY, true)
  15941. +#ifdef CONFIG_PREEMPT_RT_FULL
  15942. +SCHED_FEAT(TTWU_QUEUE, false)
  15943. +# ifdef CONFIG_PREEMPT_LAZY
  15944. +SCHED_FEAT(PREEMPT_LAZY, true)
  15945. +# endif
  15946. +#else
  15947. +
  15948. /*
  15949. * Queue remote wakeups on the target CPU and process them
  15950. * using the scheduler IPI. Reduces rq->lock contention/bounces.
  15951. */
  15952. SCHED_FEAT(TTWU_QUEUE, true)
  15953. +#endif
  15954. #ifdef HAVE_RT_PUSH_IPI
  15955. /*
  15956. diff -Nur linux-4.8.15.orig/kernel/sched/Makefile linux-4.8.15/kernel/sched/Makefile
  15957. --- linux-4.8.15.orig/kernel/sched/Makefile 2016-12-15 17:50:48.000000000 +0100
  15958. +++ linux-4.8.15/kernel/sched/Makefile 2017-01-01 17:07:16.031426957 +0100
  15959. @@ -17,7 +17,7 @@
  15960. obj-y += core.o loadavg.o clock.o cputime.o
  15961. obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
  15962. -obj-y += wait.o swait.o completion.o idle.o
  15963. +obj-y += wait.o swait.o swork.o completion.o idle.o
  15964. obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
  15965. obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
  15966. obj-$(CONFIG_SCHEDSTATS) += stats.o
  15967. diff -Nur linux-4.8.15.orig/kernel/sched/rt.c linux-4.8.15/kernel/sched/rt.c
  15968. --- linux-4.8.15.orig/kernel/sched/rt.c 2016-12-15 17:50:48.000000000 +0100
  15969. +++ linux-4.8.15/kernel/sched/rt.c 2017-01-01 17:07:16.039427472 +0100
  15970. @@ -47,6 +47,7 @@
  15971. hrtimer_init(&rt_b->rt_period_timer,
  15972. CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  15973. + rt_b->rt_period_timer.irqsafe = 1;
  15974. rt_b->rt_period_timer.function = sched_rt_period_timer;
  15975. }
  15976. @@ -101,6 +102,7 @@
  15977. rt_rq->push_cpu = nr_cpu_ids;
  15978. raw_spin_lock_init(&rt_rq->push_lock);
  15979. init_irq_work(&rt_rq->push_work, push_irq_work_func);
  15980. + rt_rq->push_work.flags |= IRQ_WORK_HARD_IRQ;
  15981. #endif
  15982. #endif /* CONFIG_SMP */
  15983. /* We start is dequeued state, because no RT tasks are queued */
  15984. diff -Nur linux-4.8.15.orig/kernel/sched/sched.h linux-4.8.15/kernel/sched/sched.h
  15985. --- linux-4.8.15.orig/kernel/sched/sched.h 2016-12-15 17:50:48.000000000 +0100
  15986. +++ linux-4.8.15/kernel/sched/sched.h 2017-01-01 17:07:16.039427472 +0100
  15987. @@ -1138,6 +1138,7 @@
  15988. #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
  15989. #define WF_FORK 0x02 /* child wakeup after fork */
  15990. #define WF_MIGRATED 0x4 /* internal use, task got migrated */
  15991. +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */
  15992. /*
  15993. * To aid in avoiding the subversion of "niceness" due to uneven distribution
  15994. @@ -1316,6 +1317,15 @@
  15995. extern void resched_curr(struct rq *rq);
  15996. extern void resched_cpu(int cpu);
  15997. +#ifdef CONFIG_PREEMPT_LAZY
  15998. +extern void resched_curr_lazy(struct rq *rq);
  15999. +#else
  16000. +static inline void resched_curr_lazy(struct rq *rq)
  16001. +{
  16002. + resched_curr(rq);
  16003. +}
  16004. +#endif
  16005. +
  16006. extern struct rt_bandwidth def_rt_bandwidth;
  16007. extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
  16008. diff -Nur linux-4.8.15.orig/kernel/sched/swait.c linux-4.8.15/kernel/sched/swait.c
  16009. --- linux-4.8.15.orig/kernel/sched/swait.c 2016-12-15 17:50:48.000000000 +0100
  16010. +++ linux-4.8.15/kernel/sched/swait.c 2017-01-01 17:07:16.039427472 +0100
  16011. @@ -1,5 +1,6 @@
  16012. #include <linux/sched.h>
  16013. #include <linux/swait.h>
  16014. +#include <linux/suspend.h>
  16015. void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
  16016. struct lock_class_key *key)
  16017. @@ -29,6 +30,25 @@
  16018. }
  16019. EXPORT_SYMBOL(swake_up_locked);
  16020. +void swake_up_all_locked(struct swait_queue_head *q)
  16021. +{
  16022. + struct swait_queue *curr;
  16023. + int wakes = 0;
  16024. +
  16025. + while (!list_empty(&q->task_list)) {
  16026. +
  16027. + curr = list_first_entry(&q->task_list, typeof(*curr),
  16028. + task_list);
  16029. + wake_up_process(curr->task);
  16030. + list_del_init(&curr->task_list);
  16031. + wakes++;
  16032. + }
  16033. + if (pm_in_action)
  16034. + return;
  16035. + WARN(wakes > 2, "complete_all() with %d waiters\n", wakes);
  16036. +}
  16037. +EXPORT_SYMBOL(swake_up_all_locked);
  16038. +
  16039. void swake_up(struct swait_queue_head *q)
  16040. {
  16041. unsigned long flags;
  16042. @@ -54,6 +74,7 @@
  16043. if (!swait_active(q))
  16044. return;
  16045. + WARN_ON(irqs_disabled());
  16046. raw_spin_lock_irq(&q->lock);
  16047. list_splice_init(&q->task_list, &tmp);
  16048. while (!list_empty(&tmp)) {
  16049. diff -Nur linux-4.8.15.orig/kernel/sched/swork.c linux-4.8.15/kernel/sched/swork.c
  16050. --- linux-4.8.15.orig/kernel/sched/swork.c 1970-01-01 01:00:00.000000000 +0100
  16051. +++ linux-4.8.15/kernel/sched/swork.c 2017-01-01 17:07:16.039427472 +0100
  16052. @@ -0,0 +1,173 @@
  16053. +/*
  16054. + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de
  16055. + *
  16056. + * Provides a framework for enqueuing callbacks from irq context
  16057. + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context.
  16058. + */
  16059. +
  16060. +#include <linux/swait.h>
  16061. +#include <linux/swork.h>
  16062. +#include <linux/kthread.h>
  16063. +#include <linux/slab.h>
  16064. +#include <linux/spinlock.h>
  16065. +#include <linux/export.h>
  16066. +
  16067. +#define SWORK_EVENT_PENDING (1 << 0)
  16068. +
  16069. +static DEFINE_MUTEX(worker_mutex);
  16070. +static struct sworker *glob_worker;
  16071. +
  16072. +struct sworker {
  16073. + struct list_head events;
  16074. + struct swait_queue_head wq;
  16075. +
  16076. + raw_spinlock_t lock;
  16077. +
  16078. + struct task_struct *task;
  16079. + int refs;
  16080. +};
  16081. +
  16082. +static bool swork_readable(struct sworker *worker)
  16083. +{
  16084. + bool r;
  16085. +
  16086. + if (kthread_should_stop())
  16087. + return true;
  16088. +
  16089. + raw_spin_lock_irq(&worker->lock);
  16090. + r = !list_empty(&worker->events);
  16091. + raw_spin_unlock_irq(&worker->lock);
  16092. +
  16093. + return r;
  16094. +}
  16095. +
  16096. +static int swork_kthread(void *arg)
  16097. +{
  16098. + struct sworker *worker = arg;
  16099. +
  16100. + for (;;) {
  16101. + swait_event_interruptible(worker->wq,
  16102. + swork_readable(worker));
  16103. + if (kthread_should_stop())
  16104. + break;
  16105. +
  16106. + raw_spin_lock_irq(&worker->lock);
  16107. + while (!list_empty(&worker->events)) {
  16108. + struct swork_event *sev;
  16109. +
  16110. + sev = list_first_entry(&worker->events,
  16111. + struct swork_event, item);
  16112. + list_del(&sev->item);
  16113. + raw_spin_unlock_irq(&worker->lock);
  16114. +
  16115. + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING,
  16116. + &sev->flags));
  16117. + sev->func(sev);
  16118. + raw_spin_lock_irq(&worker->lock);
  16119. + }
  16120. + raw_spin_unlock_irq(&worker->lock);
  16121. + }
  16122. + return 0;
  16123. +}
  16124. +
  16125. +static struct sworker *swork_create(void)
  16126. +{
  16127. + struct sworker *worker;
  16128. +
  16129. + worker = kzalloc(sizeof(*worker), GFP_KERNEL);
  16130. + if (!worker)
  16131. + return ERR_PTR(-ENOMEM);
  16132. +
  16133. + INIT_LIST_HEAD(&worker->events);
  16134. + raw_spin_lock_init(&worker->lock);
  16135. + init_swait_queue_head(&worker->wq);
  16136. +
  16137. + worker->task = kthread_run(swork_kthread, worker, "kswork");
  16138. + if (IS_ERR(worker->task)) {
  16139. + kfree(worker);
  16140. + return ERR_PTR(-ENOMEM);
  16141. + }
  16142. +
  16143. + return worker;
  16144. +}
  16145. +
  16146. +static void swork_destroy(struct sworker *worker)
  16147. +{
  16148. + kthread_stop(worker->task);
  16149. +
  16150. + WARN_ON(!list_empty(&worker->events));
  16151. + kfree(worker);
  16152. +}
  16153. +
  16154. +/**
  16155. + * swork_queue - queue swork
  16156. + *
  16157. + * Returns %false if @work was already on a queue, %true otherwise.
  16158. + *
  16159. + * The work is queued and processed on a random CPU
  16160. + */
  16161. +bool swork_queue(struct swork_event *sev)
  16162. +{
  16163. + unsigned long flags;
  16164. +
  16165. + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags))
  16166. + return false;
  16167. +
  16168. + raw_spin_lock_irqsave(&glob_worker->lock, flags);
  16169. + list_add_tail(&sev->item, &glob_worker->events);
  16170. + raw_spin_unlock_irqrestore(&glob_worker->lock, flags);
  16171. +
  16172. + swake_up(&glob_worker->wq);
  16173. + return true;
  16174. +}
  16175. +EXPORT_SYMBOL_GPL(swork_queue);
  16176. +
  16177. +/**
  16178. + * swork_get - get an instance of the sworker
  16179. + *
  16180. + * Returns an negative error code if the initialization if the worker did not
  16181. + * work, %0 otherwise.
  16182. + *
  16183. + */
  16184. +int swork_get(void)
  16185. +{
  16186. + struct sworker *worker;
  16187. +
  16188. + mutex_lock(&worker_mutex);
  16189. + if (!glob_worker) {
  16190. + worker = swork_create();
  16191. + if (IS_ERR(worker)) {
  16192. + mutex_unlock(&worker_mutex);
  16193. + return -ENOMEM;
  16194. + }
  16195. +
  16196. + glob_worker = worker;
  16197. + }
  16198. +
  16199. + glob_worker->refs++;
  16200. + mutex_unlock(&worker_mutex);
  16201. +
  16202. + return 0;
  16203. +}
  16204. +EXPORT_SYMBOL_GPL(swork_get);
  16205. +
  16206. +/**
  16207. + * swork_put - puts an instance of the sworker
  16208. + *
  16209. + * Will destroy the sworker thread. This function must not be called until all
  16210. + * queued events have been completed.
  16211. + */
  16212. +void swork_put(void)
  16213. +{
  16214. + mutex_lock(&worker_mutex);
  16215. +
  16216. + glob_worker->refs--;
  16217. + if (glob_worker->refs > 0)
  16218. + goto out;
  16219. +
  16220. + swork_destroy(glob_worker);
  16221. + glob_worker = NULL;
  16222. +out:
  16223. + mutex_unlock(&worker_mutex);
  16224. +}
  16225. +EXPORT_SYMBOL_GPL(swork_put);
  16226. diff -Nur linux-4.8.15.orig/kernel/signal.c linux-4.8.15/kernel/signal.c
  16227. --- linux-4.8.15.orig/kernel/signal.c 2016-12-15 17:50:48.000000000 +0100
  16228. +++ linux-4.8.15/kernel/signal.c 2017-01-01 17:07:16.043427737 +0100
  16229. @@ -14,6 +14,7 @@
  16230. #include <linux/export.h>
  16231. #include <linux/init.h>
  16232. #include <linux/sched.h>
  16233. +#include <linux/sched/rt.h>
  16234. #include <linux/fs.h>
  16235. #include <linux/tty.h>
  16236. #include <linux/binfmts.h>
  16237. @@ -352,13 +353,30 @@
  16238. return false;
  16239. }
  16240. +static inline struct sigqueue *get_task_cache(struct task_struct *t)
  16241. +{
  16242. + struct sigqueue *q = t->sigqueue_cache;
  16243. +
  16244. + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
  16245. + return NULL;
  16246. + return q;
  16247. +}
  16248. +
  16249. +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
  16250. +{
  16251. + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
  16252. + return 0;
  16253. + return 1;
  16254. +}
  16255. +
  16256. /*
  16257. * allocate a new signal queue record
  16258. * - this may be called without locks if and only if t == current, otherwise an
  16259. * appropriate lock must be held to stop the target task from exiting
  16260. */
  16261. static struct sigqueue *
  16262. -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
  16263. +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
  16264. + int override_rlimit, int fromslab)
  16265. {
  16266. struct sigqueue *q = NULL;
  16267. struct user_struct *user;
  16268. @@ -375,7 +393,10 @@
  16269. if (override_rlimit ||
  16270. atomic_read(&user->sigpending) <=
  16271. task_rlimit(t, RLIMIT_SIGPENDING)) {
  16272. - q = kmem_cache_alloc(sigqueue_cachep, flags);
  16273. + if (!fromslab)
  16274. + q = get_task_cache(t);
  16275. + if (!q)
  16276. + q = kmem_cache_alloc(sigqueue_cachep, flags);
  16277. } else {
  16278. print_dropped_signal(sig);
  16279. }
  16280. @@ -392,6 +413,13 @@
  16281. return q;
  16282. }
  16283. +static struct sigqueue *
  16284. +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
  16285. + int override_rlimit)
  16286. +{
  16287. + return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
  16288. +}
  16289. +
  16290. static void __sigqueue_free(struct sigqueue *q)
  16291. {
  16292. if (q->flags & SIGQUEUE_PREALLOC)
  16293. @@ -401,6 +429,21 @@
  16294. kmem_cache_free(sigqueue_cachep, q);
  16295. }
  16296. +static void sigqueue_free_current(struct sigqueue *q)
  16297. +{
  16298. + struct user_struct *up;
  16299. +
  16300. + if (q->flags & SIGQUEUE_PREALLOC)
  16301. + return;
  16302. +
  16303. + up = q->user;
  16304. + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
  16305. + atomic_dec(&up->sigpending);
  16306. + free_uid(up);
  16307. + } else
  16308. + __sigqueue_free(q);
  16309. +}
  16310. +
  16311. void flush_sigqueue(struct sigpending *queue)
  16312. {
  16313. struct sigqueue *q;
  16314. @@ -414,6 +457,21 @@
  16315. }
  16316. /*
  16317. + * Called from __exit_signal. Flush tsk->pending and
  16318. + * tsk->sigqueue_cache
  16319. + */
  16320. +void flush_task_sigqueue(struct task_struct *tsk)
  16321. +{
  16322. + struct sigqueue *q;
  16323. +
  16324. + flush_sigqueue(&tsk->pending);
  16325. +
  16326. + q = get_task_cache(tsk);
  16327. + if (q)
  16328. + kmem_cache_free(sigqueue_cachep, q);
  16329. +}
  16330. +
  16331. +/*
  16332. * Flush all pending signals for this kthread.
  16333. */
  16334. void flush_signals(struct task_struct *t)
  16335. @@ -525,7 +583,7 @@
  16336. still_pending:
  16337. list_del_init(&first->list);
  16338. copy_siginfo(info, &first->info);
  16339. - __sigqueue_free(first);
  16340. + sigqueue_free_current(first);
  16341. } else {
  16342. /*
  16343. * Ok, it wasn't in the queue. This must be
  16344. @@ -560,6 +618,8 @@
  16345. {
  16346. int signr;
  16347. + WARN_ON_ONCE(tsk != current);
  16348. +
  16349. /* We only dequeue private signals from ourselves, we don't let
  16350. * signalfd steal them
  16351. */
  16352. @@ -1156,8 +1216,8 @@
  16353. * We don't want to have recursive SIGSEGV's etc, for example,
  16354. * that is why we also clear SIGNAL_UNKILLABLE.
  16355. */
  16356. -int
  16357. -force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  16358. +static int
  16359. +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  16360. {
  16361. unsigned long int flags;
  16362. int ret, blocked, ignored;
  16363. @@ -1182,6 +1242,39 @@
  16364. return ret;
  16365. }
  16366. +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  16367. +{
  16368. +/*
  16369. + * On some archs, PREEMPT_RT has to delay sending a signal from a trap
  16370. + * since it can not enable preemption, and the signal code's spin_locks
  16371. + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
  16372. + * send the signal on exit of the trap.
  16373. + */
  16374. +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
  16375. + if (in_atomic()) {
  16376. + if (WARN_ON_ONCE(t != current))
  16377. + return 0;
  16378. + if (WARN_ON_ONCE(t->forced_info.si_signo))
  16379. + return 0;
  16380. +
  16381. + if (is_si_special(info)) {
  16382. + WARN_ON_ONCE(info != SEND_SIG_PRIV);
  16383. + t->forced_info.si_signo = sig;
  16384. + t->forced_info.si_errno = 0;
  16385. + t->forced_info.si_code = SI_KERNEL;
  16386. + t->forced_info.si_pid = 0;
  16387. + t->forced_info.si_uid = 0;
  16388. + } else {
  16389. + t->forced_info = *info;
  16390. + }
  16391. +
  16392. + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
  16393. + return 0;
  16394. + }
  16395. +#endif
  16396. + return do_force_sig_info(sig, info, t);
  16397. +}
  16398. +
  16399. /*
  16400. * Nuke all other threads in the group.
  16401. */
  16402. @@ -1216,12 +1309,12 @@
  16403. * Disable interrupts early to avoid deadlocks.
  16404. * See rcu_read_unlock() comment header for details.
  16405. */
  16406. - local_irq_save(*flags);
  16407. + local_irq_save_nort(*flags);
  16408. rcu_read_lock();
  16409. sighand = rcu_dereference(tsk->sighand);
  16410. if (unlikely(sighand == NULL)) {
  16411. rcu_read_unlock();
  16412. - local_irq_restore(*flags);
  16413. + local_irq_restore_nort(*flags);
  16414. break;
  16415. }
  16416. /*
  16417. @@ -1242,7 +1335,7 @@
  16418. }
  16419. spin_unlock(&sighand->siglock);
  16420. rcu_read_unlock();
  16421. - local_irq_restore(*flags);
  16422. + local_irq_restore_nort(*flags);
  16423. }
  16424. return sighand;
  16425. @@ -1485,7 +1578,8 @@
  16426. */
  16427. struct sigqueue *sigqueue_alloc(void)
  16428. {
  16429. - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
  16430. + /* Preallocated sigqueue objects always from the slabcache ! */
  16431. + struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
  16432. if (q)
  16433. q->flags |= SIGQUEUE_PREALLOC;
  16434. @@ -1846,15 +1940,7 @@
  16435. if (gstop_done && ptrace_reparented(current))
  16436. do_notify_parent_cldstop(current, false, why);
  16437. - /*
  16438. - * Don't want to allow preemption here, because
  16439. - * sys_ptrace() needs this task to be inactive.
  16440. - *
  16441. - * XXX: implement read_unlock_no_resched().
  16442. - */
  16443. - preempt_disable();
  16444. read_unlock(&tasklist_lock);
  16445. - preempt_enable_no_resched();
  16446. freezable_schedule();
  16447. } else {
  16448. /*
  16449. diff -Nur linux-4.8.15.orig/kernel/softirq.c linux-4.8.15/kernel/softirq.c
  16450. --- linux-4.8.15.orig/kernel/softirq.c 2016-12-15 17:50:48.000000000 +0100
  16451. +++ linux-4.8.15/kernel/softirq.c 2017-01-01 17:07:16.043427737 +0100
  16452. @@ -21,10 +21,12 @@
  16453. #include <linux/freezer.h>
  16454. #include <linux/kthread.h>
  16455. #include <linux/rcupdate.h>
  16456. +#include <linux/delay.h>
  16457. #include <linux/ftrace.h>
  16458. #include <linux/smp.h>
  16459. #include <linux/smpboot.h>
  16460. #include <linux/tick.h>
  16461. +#include <linux/locallock.h>
  16462. #include <linux/irq.h>
  16463. #define CREATE_TRACE_POINTS
  16464. @@ -56,12 +58,108 @@
  16465. static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
  16466. DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
  16467. +#ifdef CONFIG_PREEMPT_RT_FULL
  16468. +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ))
  16469. +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd);
  16470. +#endif
  16471. const char * const softirq_to_name[NR_SOFTIRQS] = {
  16472. "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
  16473. "TASKLET", "SCHED", "HRTIMER", "RCU"
  16474. };
  16475. +#ifdef CONFIG_NO_HZ_COMMON
  16476. +# ifdef CONFIG_PREEMPT_RT_FULL
  16477. +
  16478. +struct softirq_runner {
  16479. + struct task_struct *runner[NR_SOFTIRQS];
  16480. +};
  16481. +
  16482. +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners);
  16483. +
  16484. +static inline void softirq_set_runner(unsigned int sirq)
  16485. +{
  16486. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  16487. +
  16488. + sr->runner[sirq] = current;
  16489. +}
  16490. +
  16491. +static inline void softirq_clr_runner(unsigned int sirq)
  16492. +{
  16493. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  16494. +
  16495. + sr->runner[sirq] = NULL;
  16496. +}
  16497. +
  16498. +/*
  16499. + * On preempt-rt a softirq running context might be blocked on a
  16500. + * lock. There might be no other runnable task on this CPU because the
  16501. + * lock owner runs on some other CPU. So we have to go into idle with
  16502. + * the pending bit set. Therefor we need to check this otherwise we
  16503. + * warn about false positives which confuses users and defeats the
  16504. + * whole purpose of this test.
  16505. + *
  16506. + * This code is called with interrupts disabled.
  16507. + */
  16508. +void softirq_check_pending_idle(void)
  16509. +{
  16510. + static int rate_limit;
  16511. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  16512. + u32 warnpending;
  16513. + int i;
  16514. +
  16515. + if (rate_limit >= 10)
  16516. + return;
  16517. +
  16518. + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
  16519. + for (i = 0; i < NR_SOFTIRQS; i++) {
  16520. + struct task_struct *tsk = sr->runner[i];
  16521. +
  16522. + /*
  16523. + * The wakeup code in rtmutex.c wakes up the task
  16524. + * _before_ it sets pi_blocked_on to NULL under
  16525. + * tsk->pi_lock. So we need to check for both: state
  16526. + * and pi_blocked_on.
  16527. + */
  16528. + if (tsk) {
  16529. + raw_spin_lock(&tsk->pi_lock);
  16530. + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
  16531. + /* Clear all bits pending in that task */
  16532. + warnpending &= ~(tsk->softirqs_raised);
  16533. + warnpending &= ~(1 << i);
  16534. + }
  16535. + raw_spin_unlock(&tsk->pi_lock);
  16536. + }
  16537. + }
  16538. +
  16539. + if (warnpending) {
  16540. + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
  16541. + warnpending);
  16542. + rate_limit++;
  16543. + }
  16544. +}
  16545. +# else
  16546. +/*
  16547. + * On !PREEMPT_RT we just printk rate limited:
  16548. + */
  16549. +void softirq_check_pending_idle(void)
  16550. +{
  16551. + static int rate_limit;
  16552. +
  16553. + if (rate_limit < 10 &&
  16554. + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  16555. + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
  16556. + local_softirq_pending());
  16557. + rate_limit++;
  16558. + }
  16559. +}
  16560. +# endif
  16561. +
  16562. +#else /* !CONFIG_NO_HZ_COMMON */
  16563. +static inline void softirq_set_runner(unsigned int sirq) { }
  16564. +static inline void softirq_clr_runner(unsigned int sirq) { }
  16565. +#endif
  16566. +
  16567. /*
  16568. * we cannot loop indefinitely here to avoid userspace starvation,
  16569. * but we also don't want to introduce a worst case 1/HZ latency
  16570. @@ -77,6 +175,79 @@
  16571. wake_up_process(tsk);
  16572. }
  16573. +#ifdef CONFIG_PREEMPT_RT_FULL
  16574. +static void wakeup_timer_softirqd(void)
  16575. +{
  16576. + /* Interrupts are disabled: no need to stop preemption */
  16577. + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd);
  16578. +
  16579. + if (tsk && tsk->state != TASK_RUNNING)
  16580. + wake_up_process(tsk);
  16581. +}
  16582. +#endif
  16583. +
  16584. +static void handle_softirq(unsigned int vec_nr)
  16585. +{
  16586. + struct softirq_action *h = softirq_vec + vec_nr;
  16587. + int prev_count;
  16588. +
  16589. + prev_count = preempt_count();
  16590. +
  16591. + kstat_incr_softirqs_this_cpu(vec_nr);
  16592. +
  16593. + trace_softirq_entry(vec_nr);
  16594. + h->action(h);
  16595. + trace_softirq_exit(vec_nr);
  16596. + if (unlikely(prev_count != preempt_count())) {
  16597. + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
  16598. + vec_nr, softirq_to_name[vec_nr], h->action,
  16599. + prev_count, preempt_count());
  16600. + preempt_count_set(prev_count);
  16601. + }
  16602. +}
  16603. +
  16604. +#ifndef CONFIG_PREEMPT_RT_FULL
  16605. +static inline int ksoftirqd_softirq_pending(void)
  16606. +{
  16607. + return local_softirq_pending();
  16608. +}
  16609. +
  16610. +static void handle_pending_softirqs(u32 pending)
  16611. +{
  16612. + struct softirq_action *h = softirq_vec;
  16613. + int softirq_bit;
  16614. +
  16615. + local_irq_enable();
  16616. +
  16617. + h = softirq_vec;
  16618. +
  16619. + while ((softirq_bit = ffs(pending))) {
  16620. + unsigned int vec_nr;
  16621. +
  16622. + h += softirq_bit - 1;
  16623. + vec_nr = h - softirq_vec;
  16624. + handle_softirq(vec_nr);
  16625. +
  16626. + h++;
  16627. + pending >>= softirq_bit;
  16628. + }
  16629. +
  16630. + rcu_bh_qs();
  16631. + local_irq_disable();
  16632. +}
  16633. +
  16634. +static void run_ksoftirqd(unsigned int cpu)
  16635. +{
  16636. + local_irq_disable();
  16637. + if (ksoftirqd_softirq_pending()) {
  16638. + __do_softirq();
  16639. + local_irq_enable();
  16640. + cond_resched_rcu_qs();
  16641. + return;
  16642. + }
  16643. + local_irq_enable();
  16644. +}
  16645. +
  16646. /*
  16647. * preempt_count and SOFTIRQ_OFFSET usage:
  16648. * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
  16649. @@ -232,10 +403,8 @@
  16650. unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
  16651. unsigned long old_flags = current->flags;
  16652. int max_restart = MAX_SOFTIRQ_RESTART;
  16653. - struct softirq_action *h;
  16654. bool in_hardirq;
  16655. __u32 pending;
  16656. - int softirq_bit;
  16657. /*
  16658. * Mask out PF_MEMALLOC s current task context is borrowed for the
  16659. @@ -254,36 +423,7 @@
  16660. /* Reset the pending bitmask before enabling irqs */
  16661. set_softirq_pending(0);
  16662. - local_irq_enable();
  16663. -
  16664. - h = softirq_vec;
  16665. -
  16666. - while ((softirq_bit = ffs(pending))) {
  16667. - unsigned int vec_nr;
  16668. - int prev_count;
  16669. -
  16670. - h += softirq_bit - 1;
  16671. -
  16672. - vec_nr = h - softirq_vec;
  16673. - prev_count = preempt_count();
  16674. -
  16675. - kstat_incr_softirqs_this_cpu(vec_nr);
  16676. -
  16677. - trace_softirq_entry(vec_nr);
  16678. - h->action(h);
  16679. - trace_softirq_exit(vec_nr);
  16680. - if (unlikely(prev_count != preempt_count())) {
  16681. - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
  16682. - vec_nr, softirq_to_name[vec_nr], h->action,
  16683. - prev_count, preempt_count());
  16684. - preempt_count_set(prev_count);
  16685. - }
  16686. - h++;
  16687. - pending >>= softirq_bit;
  16688. - }
  16689. -
  16690. - rcu_bh_qs();
  16691. - local_irq_disable();
  16692. + handle_pending_softirqs(pending);
  16693. pending = local_softirq_pending();
  16694. if (pending) {
  16695. @@ -320,6 +460,310 @@
  16696. }
  16697. /*
  16698. + * This function must run with irqs disabled!
  16699. + */
  16700. +void raise_softirq_irqoff(unsigned int nr)
  16701. +{
  16702. + __raise_softirq_irqoff(nr);
  16703. +
  16704. + /*
  16705. + * If we're in an interrupt or softirq, we're done
  16706. + * (this also catches softirq-disabled code). We will
  16707. + * actually run the softirq once we return from
  16708. + * the irq or softirq.
  16709. + *
  16710. + * Otherwise we wake up ksoftirqd to make sure we
  16711. + * schedule the softirq soon.
  16712. + */
  16713. + if (!in_interrupt())
  16714. + wakeup_softirqd();
  16715. +}
  16716. +
  16717. +void __raise_softirq_irqoff(unsigned int nr)
  16718. +{
  16719. + trace_softirq_raise(nr);
  16720. + or_softirq_pending(1UL << nr);
  16721. +}
  16722. +
  16723. +static inline void local_bh_disable_nort(void) { local_bh_disable(); }
  16724. +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
  16725. +static void ksoftirqd_set_sched_params(unsigned int cpu) { }
  16726. +
  16727. +#else /* !PREEMPT_RT_FULL */
  16728. +
  16729. +/*
  16730. + * On RT we serialize softirq execution with a cpu local lock per softirq
  16731. + */
  16732. +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks);
  16733. +
  16734. +void __init softirq_early_init(void)
  16735. +{
  16736. + int i;
  16737. +
  16738. + for (i = 0; i < NR_SOFTIRQS; i++)
  16739. + local_irq_lock_init(local_softirq_locks[i]);
  16740. +}
  16741. +
  16742. +static void lock_softirq(int which)
  16743. +{
  16744. + local_lock(local_softirq_locks[which]);
  16745. +}
  16746. +
  16747. +static void unlock_softirq(int which)
  16748. +{
  16749. + local_unlock(local_softirq_locks[which]);
  16750. +}
  16751. +
  16752. +static void do_single_softirq(int which)
  16753. +{
  16754. + unsigned long old_flags = current->flags;
  16755. +
  16756. + current->flags &= ~PF_MEMALLOC;
  16757. + vtime_account_irq_enter(current);
  16758. + current->flags |= PF_IN_SOFTIRQ;
  16759. + lockdep_softirq_enter();
  16760. + local_irq_enable();
  16761. + handle_softirq(which);
  16762. + local_irq_disable();
  16763. + lockdep_softirq_exit();
  16764. + current->flags &= ~PF_IN_SOFTIRQ;
  16765. + vtime_account_irq_enter(current);
  16766. + tsk_restore_flags(current, old_flags, PF_MEMALLOC);
  16767. +}
  16768. +
  16769. +/*
  16770. + * Called with interrupts disabled. Process softirqs which were raised
  16771. + * in current context (or on behalf of ksoftirqd).
  16772. + */
  16773. +static void do_current_softirqs(void)
  16774. +{
  16775. + while (current->softirqs_raised) {
  16776. + int i = __ffs(current->softirqs_raised);
  16777. + unsigned int pending, mask = (1U << i);
  16778. +
  16779. + current->softirqs_raised &= ~mask;
  16780. + local_irq_enable();
  16781. +
  16782. + /*
  16783. + * If the lock is contended, we boost the owner to
  16784. + * process the softirq or leave the critical section
  16785. + * now.
  16786. + */
  16787. + lock_softirq(i);
  16788. + local_irq_disable();
  16789. + softirq_set_runner(i);
  16790. + /*
  16791. + * Check with the local_softirq_pending() bits,
  16792. + * whether we need to process this still or if someone
  16793. + * else took care of it.
  16794. + */
  16795. + pending = local_softirq_pending();
  16796. + if (pending & mask) {
  16797. + set_softirq_pending(pending & ~mask);
  16798. + do_single_softirq(i);
  16799. + }
  16800. + softirq_clr_runner(i);
  16801. + WARN_ON(current->softirq_nestcnt != 1);
  16802. + local_irq_enable();
  16803. + unlock_softirq(i);
  16804. + local_irq_disable();
  16805. + }
  16806. +}
  16807. +
  16808. +void __local_bh_disable(void)
  16809. +{
  16810. + if (++current->softirq_nestcnt == 1)
  16811. + migrate_disable();
  16812. +}
  16813. +EXPORT_SYMBOL(__local_bh_disable);
  16814. +
  16815. +void __local_bh_enable(void)
  16816. +{
  16817. + if (WARN_ON(current->softirq_nestcnt == 0))
  16818. + return;
  16819. +
  16820. + local_irq_disable();
  16821. + if (current->softirq_nestcnt == 1 && current->softirqs_raised)
  16822. + do_current_softirqs();
  16823. + local_irq_enable();
  16824. +
  16825. + if (--current->softirq_nestcnt == 0)
  16826. + migrate_enable();
  16827. +}
  16828. +EXPORT_SYMBOL(__local_bh_enable);
  16829. +
  16830. +void _local_bh_enable(void)
  16831. +{
  16832. + if (WARN_ON(current->softirq_nestcnt == 0))
  16833. + return;
  16834. + if (--current->softirq_nestcnt == 0)
  16835. + migrate_enable();
  16836. +}
  16837. +EXPORT_SYMBOL(_local_bh_enable);
  16838. +
  16839. +int in_serving_softirq(void)
  16840. +{
  16841. + return current->flags & PF_IN_SOFTIRQ;
  16842. +}
  16843. +EXPORT_SYMBOL(in_serving_softirq);
  16844. +
  16845. +/* Called with preemption disabled */
  16846. +static void run_ksoftirqd(unsigned int cpu)
  16847. +{
  16848. + local_irq_disable();
  16849. + current->softirq_nestcnt++;
  16850. +
  16851. + do_current_softirqs();
  16852. + current->softirq_nestcnt--;
  16853. + local_irq_enable();
  16854. + cond_resched_rcu_qs();
  16855. +}
  16856. +
  16857. +/*
  16858. + * Called from netif_rx_ni(). Preemption enabled, but migration
  16859. + * disabled. So the cpu can't go away under us.
  16860. + */
  16861. +void thread_do_softirq(void)
  16862. +{
  16863. + if (!in_serving_softirq() && current->softirqs_raised) {
  16864. + current->softirq_nestcnt++;
  16865. + do_current_softirqs();
  16866. + current->softirq_nestcnt--;
  16867. + }
  16868. +}
  16869. +
  16870. +static void do_raise_softirq_irqoff(unsigned int nr)
  16871. +{
  16872. + unsigned int mask;
  16873. +
  16874. + mask = 1UL << nr;
  16875. +
  16876. + trace_softirq_raise(nr);
  16877. + or_softirq_pending(mask);
  16878. +
  16879. + /*
  16880. + * If we are not in a hard interrupt and inside a bh disabled
  16881. + * region, we simply raise the flag on current. local_bh_enable()
  16882. + * will make sure that the softirq is executed. Otherwise we
  16883. + * delegate it to ksoftirqd.
  16884. + */
  16885. + if (!in_irq() && current->softirq_nestcnt)
  16886. + current->softirqs_raised |= mask;
  16887. + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd))
  16888. + return;
  16889. +
  16890. + if (mask & TIMER_SOFTIRQS)
  16891. + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
  16892. + else
  16893. + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
  16894. +}
  16895. +
  16896. +static void wakeup_proper_softirq(unsigned int nr)
  16897. +{
  16898. + if ((1UL << nr) & TIMER_SOFTIRQS)
  16899. + wakeup_timer_softirqd();
  16900. + else
  16901. + wakeup_softirqd();
  16902. +}
  16903. +
  16904. +
  16905. +void __raise_softirq_irqoff(unsigned int nr)
  16906. +{
  16907. + do_raise_softirq_irqoff(nr);
  16908. + if (!in_irq() && !current->softirq_nestcnt)
  16909. + wakeup_proper_softirq(nr);
  16910. +}
  16911. +
  16912. +/*
  16913. + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd
  16914. + */
  16915. +void __raise_softirq_irqoff_ksoft(unsigned int nr)
  16916. +{
  16917. + unsigned int mask;
  16918. +
  16919. + if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) ||
  16920. + !__this_cpu_read(ktimer_softirqd)))
  16921. + return;
  16922. + mask = 1UL << nr;
  16923. +
  16924. + trace_softirq_raise(nr);
  16925. + or_softirq_pending(mask);
  16926. + if (mask & TIMER_SOFTIRQS)
  16927. + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
  16928. + else
  16929. + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
  16930. + wakeup_proper_softirq(nr);
  16931. +}
  16932. +
  16933. +/*
  16934. + * This function must run with irqs disabled!
  16935. + */
  16936. +void raise_softirq_irqoff(unsigned int nr)
  16937. +{
  16938. + do_raise_softirq_irqoff(nr);
  16939. +
  16940. + /*
  16941. + * If we're in an hard interrupt we let irq return code deal
  16942. + * with the wakeup of ksoftirqd.
  16943. + */
  16944. + if (in_irq())
  16945. + return;
  16946. + /*
  16947. + * If we are in thread context but outside of a bh disabled
  16948. + * region, we need to wake ksoftirqd as well.
  16949. + *
  16950. + * CHECKME: Some of the places which do that could be wrapped
  16951. + * into local_bh_disable/enable pairs. Though it's unclear
  16952. + * whether this is worth the effort. To find those places just
  16953. + * raise a WARN() if the condition is met.
  16954. + */
  16955. + if (!current->softirq_nestcnt)
  16956. + wakeup_proper_softirq(nr);
  16957. +}
  16958. +
  16959. +static inline int ksoftirqd_softirq_pending(void)
  16960. +{
  16961. + return current->softirqs_raised;
  16962. +}
  16963. +
  16964. +static inline void local_bh_disable_nort(void) { }
  16965. +static inline void _local_bh_enable_nort(void) { }
  16966. +
  16967. +static inline void ksoftirqd_set_sched_params(unsigned int cpu)
  16968. +{
  16969. + /* Take over all but timer pending softirqs when starting */
  16970. + local_irq_disable();
  16971. + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS;
  16972. + local_irq_enable();
  16973. +}
  16974. +
  16975. +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu)
  16976. +{
  16977. + struct sched_param param = { .sched_priority = 1 };
  16978. +
  16979. + sched_setscheduler(current, SCHED_FIFO, &param);
  16980. +
  16981. + /* Take over timer pending softirqs when starting */
  16982. + local_irq_disable();
  16983. + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS;
  16984. + local_irq_enable();
  16985. +}
  16986. +
  16987. +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu,
  16988. + bool online)
  16989. +{
  16990. + struct sched_param param = { .sched_priority = 0 };
  16991. +
  16992. + sched_setscheduler(current, SCHED_NORMAL, &param);
  16993. +}
  16994. +
  16995. +static int ktimer_softirqd_should_run(unsigned int cpu)
  16996. +{
  16997. + return current->softirqs_raised;
  16998. +}
  16999. +
  17000. +#endif /* PREEMPT_RT_FULL */
  17001. +/*
  17002. * Enter an interrupt context.
  17003. */
  17004. void irq_enter(void)
  17005. @@ -330,9 +774,9 @@
  17006. * Prevent raise_softirq from needlessly waking up ksoftirqd
  17007. * here, as softirq will be serviced on return from interrupt.
  17008. */
  17009. - local_bh_disable();
  17010. + local_bh_disable_nort();
  17011. tick_irq_enter();
  17012. - _local_bh_enable();
  17013. + _local_bh_enable_nort();
  17014. }
  17015. __irq_enter();
  17016. @@ -340,6 +784,7 @@
  17017. static inline void invoke_softirq(void)
  17018. {
  17019. +#ifndef CONFIG_PREEMPT_RT_FULL
  17020. if (!force_irqthreads) {
  17021. #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
  17022. /*
  17023. @@ -359,6 +804,18 @@
  17024. } else {
  17025. wakeup_softirqd();
  17026. }
  17027. +#else /* PREEMPT_RT_FULL */
  17028. + unsigned long flags;
  17029. +
  17030. + local_irq_save(flags);
  17031. + if (__this_cpu_read(ksoftirqd) &&
  17032. + __this_cpu_read(ksoftirqd)->softirqs_raised)
  17033. + wakeup_softirqd();
  17034. + if (__this_cpu_read(ktimer_softirqd) &&
  17035. + __this_cpu_read(ktimer_softirqd)->softirqs_raised)
  17036. + wakeup_timer_softirqd();
  17037. + local_irq_restore(flags);
  17038. +#endif
  17039. }
  17040. static inline void tick_irq_exit(void)
  17041. @@ -395,26 +852,6 @@
  17042. trace_hardirq_exit(); /* must be last! */
  17043. }
  17044. -/*
  17045. - * This function must run with irqs disabled!
  17046. - */
  17047. -inline void raise_softirq_irqoff(unsigned int nr)
  17048. -{
  17049. - __raise_softirq_irqoff(nr);
  17050. -
  17051. - /*
  17052. - * If we're in an interrupt or softirq, we're done
  17053. - * (this also catches softirq-disabled code). We will
  17054. - * actually run the softirq once we return from
  17055. - * the irq or softirq.
  17056. - *
  17057. - * Otherwise we wake up ksoftirqd to make sure we
  17058. - * schedule the softirq soon.
  17059. - */
  17060. - if (!in_interrupt())
  17061. - wakeup_softirqd();
  17062. -}
  17063. -
  17064. void raise_softirq(unsigned int nr)
  17065. {
  17066. unsigned long flags;
  17067. @@ -424,12 +861,6 @@
  17068. local_irq_restore(flags);
  17069. }
  17070. -void __raise_softirq_irqoff(unsigned int nr)
  17071. -{
  17072. - trace_softirq_raise(nr);
  17073. - or_softirq_pending(1UL << nr);
  17074. -}
  17075. -
  17076. void open_softirq(int nr, void (*action)(struct softirq_action *))
  17077. {
  17078. softirq_vec[nr].action = action;
  17079. @@ -446,15 +877,45 @@
  17080. static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
  17081. static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
  17082. +static void inline
  17083. +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
  17084. +{
  17085. + if (tasklet_trylock(t)) {
  17086. +again:
  17087. + /* We may have been preempted before tasklet_trylock
  17088. + * and __tasklet_action may have already run.
  17089. + * So double check the sched bit while the takslet
  17090. + * is locked before adding it to the list.
  17091. + */
  17092. + if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
  17093. + t->next = NULL;
  17094. + *head->tail = t;
  17095. + head->tail = &(t->next);
  17096. + raise_softirq_irqoff(nr);
  17097. + tasklet_unlock(t);
  17098. + } else {
  17099. + /* This is subtle. If we hit the corner case above
  17100. + * It is possible that we get preempted right here,
  17101. + * and another task has successfully called
  17102. + * tasklet_schedule(), then this function, and
  17103. + * failed on the trylock. Thus we must be sure
  17104. + * before releasing the tasklet lock, that the
  17105. + * SCHED_BIT is clear. Otherwise the tasklet
  17106. + * may get its SCHED_BIT set, but not added to the
  17107. + * list
  17108. + */
  17109. + if (!tasklet_tryunlock(t))
  17110. + goto again;
  17111. + }
  17112. + }
  17113. +}
  17114. +
  17115. void __tasklet_schedule(struct tasklet_struct *t)
  17116. {
  17117. unsigned long flags;
  17118. local_irq_save(flags);
  17119. - t->next = NULL;
  17120. - *__this_cpu_read(tasklet_vec.tail) = t;
  17121. - __this_cpu_write(tasklet_vec.tail, &(t->next));
  17122. - raise_softirq_irqoff(TASKLET_SOFTIRQ);
  17123. + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
  17124. local_irq_restore(flags);
  17125. }
  17126. EXPORT_SYMBOL(__tasklet_schedule);
  17127. @@ -464,10 +925,7 @@
  17128. unsigned long flags;
  17129. local_irq_save(flags);
  17130. - t->next = NULL;
  17131. - *__this_cpu_read(tasklet_hi_vec.tail) = t;
  17132. - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
  17133. - raise_softirq_irqoff(HI_SOFTIRQ);
  17134. + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
  17135. local_irq_restore(flags);
  17136. }
  17137. EXPORT_SYMBOL(__tasklet_hi_schedule);
  17138. @@ -476,82 +934,122 @@
  17139. {
  17140. BUG_ON(!irqs_disabled());
  17141. - t->next = __this_cpu_read(tasklet_hi_vec.head);
  17142. - __this_cpu_write(tasklet_hi_vec.head, t);
  17143. - __raise_softirq_irqoff(HI_SOFTIRQ);
  17144. + __tasklet_hi_schedule(t);
  17145. }
  17146. EXPORT_SYMBOL(__tasklet_hi_schedule_first);
  17147. -static void tasklet_action(struct softirq_action *a)
  17148. +void tasklet_enable(struct tasklet_struct *t)
  17149. {
  17150. - struct tasklet_struct *list;
  17151. + if (!atomic_dec_and_test(&t->count))
  17152. + return;
  17153. + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
  17154. + tasklet_schedule(t);
  17155. +}
  17156. +EXPORT_SYMBOL(tasklet_enable);
  17157. - local_irq_disable();
  17158. - list = __this_cpu_read(tasklet_vec.head);
  17159. - __this_cpu_write(tasklet_vec.head, NULL);
  17160. - __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
  17161. - local_irq_enable();
  17162. +static void __tasklet_action(struct softirq_action *a,
  17163. + struct tasklet_struct *list)
  17164. +{
  17165. + int loops = 1000000;
  17166. while (list) {
  17167. struct tasklet_struct *t = list;
  17168. list = list->next;
  17169. - if (tasklet_trylock(t)) {
  17170. - if (!atomic_read(&t->count)) {
  17171. - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
  17172. - &t->state))
  17173. - BUG();
  17174. - t->func(t->data);
  17175. - tasklet_unlock(t);
  17176. - continue;
  17177. - }
  17178. - tasklet_unlock(t);
  17179. + /*
  17180. + * Should always succeed - after a tasklist got on the
  17181. + * list (after getting the SCHED bit set from 0 to 1),
  17182. + * nothing but the tasklet softirq it got queued to can
  17183. + * lock it:
  17184. + */
  17185. + if (!tasklet_trylock(t)) {
  17186. + WARN_ON(1);
  17187. + continue;
  17188. }
  17189. - local_irq_disable();
  17190. t->next = NULL;
  17191. - *__this_cpu_read(tasklet_vec.tail) = t;
  17192. - __this_cpu_write(tasklet_vec.tail, &(t->next));
  17193. - __raise_softirq_irqoff(TASKLET_SOFTIRQ);
  17194. - local_irq_enable();
  17195. +
  17196. + /*
  17197. + * If we cannot handle the tasklet because it's disabled,
  17198. + * mark it as pending. tasklet_enable() will later
  17199. + * re-schedule the tasklet.
  17200. + */
  17201. + if (unlikely(atomic_read(&t->count))) {
  17202. +out_disabled:
  17203. + /* implicit unlock: */
  17204. + wmb();
  17205. + t->state = TASKLET_STATEF_PENDING;
  17206. + continue;
  17207. + }
  17208. +
  17209. + /*
  17210. + * After this point on the tasklet might be rescheduled
  17211. + * on another CPU, but it can only be added to another
  17212. + * CPU's tasklet list if we unlock the tasklet (which we
  17213. + * dont do yet).
  17214. + */
  17215. + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
  17216. + WARN_ON(1);
  17217. +
  17218. +again:
  17219. + t->func(t->data);
  17220. +
  17221. + /*
  17222. + * Try to unlock the tasklet. We must use cmpxchg, because
  17223. + * another CPU might have scheduled or disabled the tasklet.
  17224. + * We only allow the STATE_RUN -> 0 transition here.
  17225. + */
  17226. + while (!tasklet_tryunlock(t)) {
  17227. + /*
  17228. + * If it got disabled meanwhile, bail out:
  17229. + */
  17230. + if (atomic_read(&t->count))
  17231. + goto out_disabled;
  17232. + /*
  17233. + * If it got scheduled meanwhile, re-execute
  17234. + * the tasklet function:
  17235. + */
  17236. + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
  17237. + goto again;
  17238. + if (!--loops) {
  17239. + printk("hm, tasklet state: %08lx\n", t->state);
  17240. + WARN_ON(1);
  17241. + tasklet_unlock(t);
  17242. + break;
  17243. + }
  17244. + }
  17245. }
  17246. }
  17247. +static void tasklet_action(struct softirq_action *a)
  17248. +{
  17249. + struct tasklet_struct *list;
  17250. +
  17251. + local_irq_disable();
  17252. +
  17253. + list = __this_cpu_read(tasklet_vec.head);
  17254. + __this_cpu_write(tasklet_vec.head, NULL);
  17255. + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
  17256. +
  17257. + local_irq_enable();
  17258. +
  17259. + __tasklet_action(a, list);
  17260. +}
  17261. +
  17262. static void tasklet_hi_action(struct softirq_action *a)
  17263. {
  17264. struct tasklet_struct *list;
  17265. local_irq_disable();
  17266. +
  17267. list = __this_cpu_read(tasklet_hi_vec.head);
  17268. __this_cpu_write(tasklet_hi_vec.head, NULL);
  17269. __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
  17270. - local_irq_enable();
  17271. - while (list) {
  17272. - struct tasklet_struct *t = list;
  17273. -
  17274. - list = list->next;
  17275. -
  17276. - if (tasklet_trylock(t)) {
  17277. - if (!atomic_read(&t->count)) {
  17278. - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
  17279. - &t->state))
  17280. - BUG();
  17281. - t->func(t->data);
  17282. - tasklet_unlock(t);
  17283. - continue;
  17284. - }
  17285. - tasklet_unlock(t);
  17286. - }
  17287. + local_irq_enable();
  17288. - local_irq_disable();
  17289. - t->next = NULL;
  17290. - *__this_cpu_read(tasklet_hi_vec.tail) = t;
  17291. - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
  17292. - __raise_softirq_irqoff(HI_SOFTIRQ);
  17293. - local_irq_enable();
  17294. - }
  17295. + __tasklet_action(a, list);
  17296. }
  17297. void tasklet_init(struct tasklet_struct *t,
  17298. @@ -572,7 +1070,7 @@
  17299. while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
  17300. do {
  17301. - yield();
  17302. + msleep(1);
  17303. } while (test_bit(TASKLET_STATE_SCHED, &t->state));
  17304. }
  17305. tasklet_unlock_wait(t);
  17306. @@ -646,25 +1144,26 @@
  17307. open_softirq(HI_SOFTIRQ, tasklet_hi_action);
  17308. }
  17309. -static int ksoftirqd_should_run(unsigned int cpu)
  17310. -{
  17311. - return local_softirq_pending();
  17312. -}
  17313. -
  17314. -static void run_ksoftirqd(unsigned int cpu)
  17315. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  17316. +void tasklet_unlock_wait(struct tasklet_struct *t)
  17317. {
  17318. - local_irq_disable();
  17319. - if (local_softirq_pending()) {
  17320. + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
  17321. /*
  17322. - * We can safely run softirq on inline stack, as we are not deep
  17323. - * in the task stack here.
  17324. + * Hack for now to avoid this busy-loop:
  17325. */
  17326. - __do_softirq();
  17327. - local_irq_enable();
  17328. - cond_resched_rcu_qs();
  17329. - return;
  17330. +#ifdef CONFIG_PREEMPT_RT_FULL
  17331. + msleep(1);
  17332. +#else
  17333. + barrier();
  17334. +#endif
  17335. }
  17336. - local_irq_enable();
  17337. +}
  17338. +EXPORT_SYMBOL(tasklet_unlock_wait);
  17339. +#endif
  17340. +
  17341. +static int ksoftirqd_should_run(unsigned int cpu)
  17342. +{
  17343. + return ksoftirqd_softirq_pending();
  17344. }
  17345. #ifdef CONFIG_HOTPLUG_CPU
  17346. @@ -746,16 +1245,31 @@
  17347. static struct smp_hotplug_thread softirq_threads = {
  17348. .store = &ksoftirqd,
  17349. + .setup = ksoftirqd_set_sched_params,
  17350. .thread_should_run = ksoftirqd_should_run,
  17351. .thread_fn = run_ksoftirqd,
  17352. .thread_comm = "ksoftirqd/%u",
  17353. };
  17354. +#ifdef CONFIG_PREEMPT_RT_FULL
  17355. +static struct smp_hotplug_thread softirq_timer_threads = {
  17356. + .store = &ktimer_softirqd,
  17357. + .setup = ktimer_softirqd_set_sched_params,
  17358. + .cleanup = ktimer_softirqd_clr_sched_params,
  17359. + .thread_should_run = ktimer_softirqd_should_run,
  17360. + .thread_fn = run_ksoftirqd,
  17361. + .thread_comm = "ktimersoftd/%u",
  17362. +};
  17363. +#endif
  17364. +
  17365. static __init int spawn_ksoftirqd(void)
  17366. {
  17367. register_cpu_notifier(&cpu_nfb);
  17368. BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
  17369. +#ifdef CONFIG_PREEMPT_RT_FULL
  17370. + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads));
  17371. +#endif
  17372. return 0;
  17373. }
  17374. diff -Nur linux-4.8.15.orig/kernel/stop_machine.c linux-4.8.15/kernel/stop_machine.c
  17375. --- linux-4.8.15.orig/kernel/stop_machine.c 2016-12-15 17:50:48.000000000 +0100
  17376. +++ linux-4.8.15/kernel/stop_machine.c 2017-01-01 17:07:16.043427737 +0100
  17377. @@ -37,7 +37,7 @@
  17378. struct cpu_stopper {
  17379. struct task_struct *thread;
  17380. - spinlock_t lock;
  17381. + raw_spinlock_t lock;
  17382. bool enabled; /* is this stopper enabled? */
  17383. struct list_head works; /* list of pending works */
  17384. @@ -83,14 +83,14 @@
  17385. unsigned long flags;
  17386. bool enabled;
  17387. - spin_lock_irqsave(&stopper->lock, flags);
  17388. + raw_spin_lock_irqsave(&stopper->lock, flags);
  17389. enabled = stopper->enabled;
  17390. if (enabled)
  17391. __cpu_stop_queue_work(stopper, work);
  17392. else if (work->done)
  17393. cpu_stop_signal_done(work->done);
  17394. - spin_unlock_irqrestore(&stopper->lock, flags);
  17395. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  17396. return enabled;
  17397. }
  17398. @@ -232,8 +232,8 @@
  17399. int err;
  17400. lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
  17401. - spin_lock_irq(&stopper1->lock);
  17402. - spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
  17403. + raw_spin_lock_irq(&stopper1->lock);
  17404. + raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
  17405. err = -ENOENT;
  17406. if (!stopper1->enabled || !stopper2->enabled)
  17407. @@ -243,8 +243,8 @@
  17408. __cpu_stop_queue_work(stopper1, work1);
  17409. __cpu_stop_queue_work(stopper2, work2);
  17410. unlock:
  17411. - spin_unlock(&stopper2->lock);
  17412. - spin_unlock_irq(&stopper1->lock);
  17413. + raw_spin_unlock(&stopper2->lock);
  17414. + raw_spin_unlock_irq(&stopper1->lock);
  17415. lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
  17416. return err;
  17417. @@ -321,18 +321,21 @@
  17418. static bool queue_stop_cpus_work(const struct cpumask *cpumask,
  17419. cpu_stop_fn_t fn, void *arg,
  17420. - struct cpu_stop_done *done)
  17421. + struct cpu_stop_done *done, bool inactive)
  17422. {
  17423. struct cpu_stop_work *work;
  17424. unsigned int cpu;
  17425. bool queued = false;
  17426. /*
  17427. - * Disable preemption while queueing to avoid getting
  17428. - * preempted by a stopper which might wait for other stoppers
  17429. - * to enter @fn which can lead to deadlock.
  17430. + * Make sure that all work is queued on all cpus before
  17431. + * any of the cpus can execute it.
  17432. */
  17433. - lg_global_lock(&stop_cpus_lock);
  17434. + if (!inactive)
  17435. + lg_global_lock(&stop_cpus_lock);
  17436. + else
  17437. + lg_global_trylock_relax(&stop_cpus_lock);
  17438. +
  17439. for_each_cpu(cpu, cpumask) {
  17440. work = &per_cpu(cpu_stopper.stop_work, cpu);
  17441. work->fn = fn;
  17442. @@ -352,7 +355,7 @@
  17443. struct cpu_stop_done done;
  17444. cpu_stop_init_done(&done, cpumask_weight(cpumask));
  17445. - if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
  17446. + if (!queue_stop_cpus_work(cpumask, fn, arg, &done, false))
  17447. return -ENOENT;
  17448. wait_for_completion(&done.completion);
  17449. return done.ret;
  17450. @@ -433,9 +436,9 @@
  17451. unsigned long flags;
  17452. int run;
  17453. - spin_lock_irqsave(&stopper->lock, flags);
  17454. + raw_spin_lock_irqsave(&stopper->lock, flags);
  17455. run = !list_empty(&stopper->works);
  17456. - spin_unlock_irqrestore(&stopper->lock, flags);
  17457. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  17458. return run;
  17459. }
  17460. @@ -446,13 +449,13 @@
  17461. repeat:
  17462. work = NULL;
  17463. - spin_lock_irq(&stopper->lock);
  17464. + raw_spin_lock_irq(&stopper->lock);
  17465. if (!list_empty(&stopper->works)) {
  17466. work = list_first_entry(&stopper->works,
  17467. struct cpu_stop_work, list);
  17468. list_del_init(&work->list);
  17469. }
  17470. - spin_unlock_irq(&stopper->lock);
  17471. + raw_spin_unlock_irq(&stopper->lock);
  17472. if (work) {
  17473. cpu_stop_fn_t fn = work->fn;
  17474. @@ -460,6 +463,16 @@
  17475. struct cpu_stop_done *done = work->done;
  17476. int ret;
  17477. + /*
  17478. + * Wait until the stopper finished scheduling on all
  17479. + * cpus
  17480. + */
  17481. + lg_global_lock(&stop_cpus_lock);
  17482. + /*
  17483. + * Let other cpu threads continue as well
  17484. + */
  17485. + lg_global_unlock(&stop_cpus_lock);
  17486. +
  17487. /* cpu stop callbacks must not sleep, make in_atomic() == T */
  17488. preempt_count_inc();
  17489. ret = fn(arg);
  17490. @@ -526,10 +539,12 @@
  17491. for_each_possible_cpu(cpu) {
  17492. struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
  17493. - spin_lock_init(&stopper->lock);
  17494. + raw_spin_lock_init(&stopper->lock);
  17495. INIT_LIST_HEAD(&stopper->works);
  17496. }
  17497. + lg_lock_init(&stop_cpus_lock, "stop_cpus_lock");
  17498. +
  17499. BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
  17500. stop_machine_unpark(raw_smp_processor_id());
  17501. stop_machine_initialized = true;
  17502. @@ -624,7 +639,7 @@
  17503. set_state(&msdata, MULTI_STOP_PREPARE);
  17504. cpu_stop_init_done(&done, num_active_cpus());
  17505. queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
  17506. - &done);
  17507. + &done, true);
  17508. ret = multi_cpu_stop(&msdata);
  17509. /* Busy wait for completion. */
  17510. diff -Nur linux-4.8.15.orig/kernel/time/hrtimer.c linux-4.8.15/kernel/time/hrtimer.c
  17511. --- linux-4.8.15.orig/kernel/time/hrtimer.c 2016-12-15 17:50:48.000000000 +0100
  17512. +++ linux-4.8.15/kernel/time/hrtimer.c 2017-01-01 17:07:16.047427991 +0100
  17513. @@ -53,6 +53,7 @@
  17514. #include <asm/uaccess.h>
  17515. #include <trace/events/timer.h>
  17516. +#include <trace/events/hist.h>
  17517. #include "tick-internal.h"
  17518. @@ -695,6 +696,29 @@
  17519. retrigger_next_event(NULL);
  17520. }
  17521. +#ifdef CONFIG_PREEMPT_RT_FULL
  17522. +
  17523. +static struct swork_event clock_set_delay_work;
  17524. +
  17525. +static void run_clock_set_delay(struct swork_event *event)
  17526. +{
  17527. + clock_was_set();
  17528. +}
  17529. +
  17530. +void clock_was_set_delayed(void)
  17531. +{
  17532. + swork_queue(&clock_set_delay_work);
  17533. +}
  17534. +
  17535. +static __init int create_clock_set_delay_thread(void)
  17536. +{
  17537. + WARN_ON(swork_get());
  17538. + INIT_SWORK(&clock_set_delay_work, run_clock_set_delay);
  17539. + return 0;
  17540. +}
  17541. +early_initcall(create_clock_set_delay_thread);
  17542. +#else /* PREEMPT_RT_FULL */
  17543. +
  17544. static void clock_was_set_work(struct work_struct *work)
  17545. {
  17546. clock_was_set();
  17547. @@ -710,6 +734,7 @@
  17548. {
  17549. schedule_work(&hrtimer_work);
  17550. }
  17551. +#endif
  17552. #else
  17553. @@ -719,11 +744,8 @@
  17554. static inline void hrtimer_switch_to_hres(void) { }
  17555. static inline void
  17556. hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
  17557. -static inline int hrtimer_reprogram(struct hrtimer *timer,
  17558. - struct hrtimer_clock_base *base)
  17559. -{
  17560. - return 0;
  17561. -}
  17562. +static inline void hrtimer_reprogram(struct hrtimer *timer,
  17563. + struct hrtimer_clock_base *base) { }
  17564. static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
  17565. static inline void retrigger_next_event(void *arg) { }
  17566. @@ -855,6 +877,32 @@
  17567. }
  17568. EXPORT_SYMBOL_GPL(hrtimer_forward);
  17569. +#ifdef CONFIG_PREEMPT_RT_BASE
  17570. +# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
  17571. +
  17572. +/**
  17573. + * hrtimer_wait_for_timer - Wait for a running timer
  17574. + *
  17575. + * @timer: timer to wait for
  17576. + *
  17577. + * The function waits in case the timers callback function is
  17578. + * currently executed on the waitqueue of the timer base. The
  17579. + * waitqueue is woken up after the timer callback function has
  17580. + * finished execution.
  17581. + */
  17582. +void hrtimer_wait_for_timer(const struct hrtimer *timer)
  17583. +{
  17584. + struct hrtimer_clock_base *base = timer->base;
  17585. +
  17586. + if (base && base->cpu_base && !timer->irqsafe)
  17587. + wait_event(base->cpu_base->wait,
  17588. + !(hrtimer_callback_running(timer)));
  17589. +}
  17590. +
  17591. +#else
  17592. +# define wake_up_timer_waiters(b) do { } while (0)
  17593. +#endif
  17594. +
  17595. /*
  17596. * enqueue_hrtimer - internal function to (re)start a timer
  17597. *
  17598. @@ -896,6 +944,11 @@
  17599. if (!(state & HRTIMER_STATE_ENQUEUED))
  17600. return;
  17601. + if (unlikely(!list_empty(&timer->cb_entry))) {
  17602. + list_del_init(&timer->cb_entry);
  17603. + return;
  17604. + }
  17605. +
  17606. if (!timerqueue_del(&base->active, &timer->node))
  17607. cpu_base->active_bases &= ~(1 << base->index);
  17608. @@ -991,7 +1044,16 @@
  17609. new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
  17610. timer_stats_hrtimer_set_start_info(timer);
  17611. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  17612. + {
  17613. + ktime_t now = new_base->get_time();
  17614. + if (ktime_to_ns(tim) < ktime_to_ns(now))
  17615. + timer->praecox = now;
  17616. + else
  17617. + timer->praecox = ktime_set(0, 0);
  17618. + }
  17619. +#endif
  17620. leftmost = enqueue_hrtimer(timer, new_base);
  17621. if (!leftmost)
  17622. goto unlock;
  17623. @@ -1063,7 +1125,7 @@
  17624. if (ret >= 0)
  17625. return ret;
  17626. - cpu_relax();
  17627. + hrtimer_wait_for_timer(timer);
  17628. }
  17629. }
  17630. EXPORT_SYMBOL_GPL(hrtimer_cancel);
  17631. @@ -1127,6 +1189,7 @@
  17632. base = hrtimer_clockid_to_base(clock_id);
  17633. timer->base = &cpu_base->clock_base[base];
  17634. + INIT_LIST_HEAD(&timer->cb_entry);
  17635. timerqueue_init(&timer->node);
  17636. #ifdef CONFIG_TIMER_STATS
  17637. @@ -1167,6 +1230,7 @@
  17638. seq = raw_read_seqcount_begin(&cpu_base->seq);
  17639. if (timer->state != HRTIMER_STATE_INACTIVE ||
  17640. + cpu_base->running_soft == timer ||
  17641. cpu_base->running == timer)
  17642. return true;
  17643. @@ -1265,10 +1329,112 @@
  17644. cpu_base->running = NULL;
  17645. }
  17646. +#ifdef CONFIG_PREEMPT_RT_BASE
  17647. +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
  17648. + struct hrtimer_clock_base *base)
  17649. +{
  17650. + int leftmost;
  17651. +
  17652. + if (restart != HRTIMER_NORESTART &&
  17653. + !(timer->state & HRTIMER_STATE_ENQUEUED)) {
  17654. +
  17655. + leftmost = enqueue_hrtimer(timer, base);
  17656. + if (!leftmost)
  17657. + return;
  17658. +#ifdef CONFIG_HIGH_RES_TIMERS
  17659. + if (!hrtimer_is_hres_active(timer)) {
  17660. + /*
  17661. + * Kick to reschedule the next tick to handle the new timer
  17662. + * on dynticks target.
  17663. + */
  17664. + if (base->cpu_base->nohz_active)
  17665. + wake_up_nohz_cpu(base->cpu_base->cpu);
  17666. + } else {
  17667. +
  17668. + hrtimer_reprogram(timer, base);
  17669. + }
  17670. +#endif
  17671. + }
  17672. +}
  17673. +
  17674. +/*
  17675. + * The changes in mainline which removed the callback modes from
  17676. + * hrtimer are not yet working with -rt. The non wakeup_process()
  17677. + * based callbacks which involve sleeping locks need to be treated
  17678. + * seperately.
  17679. + */
  17680. +static void hrtimer_rt_run_pending(void)
  17681. +{
  17682. + enum hrtimer_restart (*fn)(struct hrtimer *);
  17683. + struct hrtimer_cpu_base *cpu_base;
  17684. + struct hrtimer_clock_base *base;
  17685. + struct hrtimer *timer;
  17686. + int index, restart;
  17687. +
  17688. + local_irq_disable();
  17689. + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
  17690. +
  17691. + raw_spin_lock(&cpu_base->lock);
  17692. +
  17693. + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
  17694. + base = &cpu_base->clock_base[index];
  17695. +
  17696. + while (!list_empty(&base->expired)) {
  17697. + timer = list_first_entry(&base->expired,
  17698. + struct hrtimer, cb_entry);
  17699. +
  17700. + /*
  17701. + * Same as the above __run_hrtimer function
  17702. + * just we run with interrupts enabled.
  17703. + */
  17704. + debug_deactivate(timer);
  17705. + cpu_base->running_soft = timer;
  17706. + raw_write_seqcount_barrier(&cpu_base->seq);
  17707. +
  17708. + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
  17709. + timer_stats_account_hrtimer(timer);
  17710. + fn = timer->function;
  17711. +
  17712. + raw_spin_unlock_irq(&cpu_base->lock);
  17713. + restart = fn(timer);
  17714. + raw_spin_lock_irq(&cpu_base->lock);
  17715. +
  17716. + hrtimer_rt_reprogram(restart, timer, base);
  17717. + raw_write_seqcount_barrier(&cpu_base->seq);
  17718. +
  17719. + WARN_ON_ONCE(cpu_base->running_soft != timer);
  17720. + cpu_base->running_soft = NULL;
  17721. + }
  17722. + }
  17723. +
  17724. + raw_spin_unlock_irq(&cpu_base->lock);
  17725. +
  17726. + wake_up_timer_waiters(cpu_base);
  17727. +}
  17728. +
  17729. +static int hrtimer_rt_defer(struct hrtimer *timer)
  17730. +{
  17731. + if (timer->irqsafe)
  17732. + return 0;
  17733. +
  17734. + __remove_hrtimer(timer, timer->base, timer->state, 0);
  17735. + list_add_tail(&timer->cb_entry, &timer->base->expired);
  17736. + return 1;
  17737. +}
  17738. +
  17739. +#else
  17740. +
  17741. +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
  17742. +
  17743. +#endif
  17744. +
  17745. +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
  17746. +
  17747. static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
  17748. {
  17749. struct hrtimer_clock_base *base = cpu_base->clock_base;
  17750. unsigned int active = cpu_base->active_bases;
  17751. + int raise = 0;
  17752. for (; active; base++, active >>= 1) {
  17753. struct timerqueue_node *node;
  17754. @@ -1284,6 +1450,15 @@
  17755. timer = container_of(node, struct hrtimer, node);
  17756. + trace_hrtimer_interrupt(raw_smp_processor_id(),
  17757. + ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ?
  17758. + timer->praecox : hrtimer_get_expires(timer),
  17759. + basenow)),
  17760. + current,
  17761. + timer->function == hrtimer_wakeup ?
  17762. + container_of(timer, struct hrtimer_sleeper,
  17763. + timer)->task : NULL);
  17764. +
  17765. /*
  17766. * The immediate goal for using the softexpires is
  17767. * minimizing wakeups, not running timers at the
  17768. @@ -1299,9 +1474,14 @@
  17769. if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
  17770. break;
  17771. - __run_hrtimer(cpu_base, base, timer, &basenow);
  17772. + if (!hrtimer_rt_defer(timer))
  17773. + __run_hrtimer(cpu_base, base, timer, &basenow);
  17774. + else
  17775. + raise = 1;
  17776. }
  17777. }
  17778. + if (raise)
  17779. + raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  17780. }
  17781. #ifdef CONFIG_HIGH_RES_TIMERS
  17782. @@ -1464,16 +1644,18 @@
  17783. void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
  17784. {
  17785. sl->timer.function = hrtimer_wakeup;
  17786. + sl->timer.irqsafe = 1;
  17787. sl->task = task;
  17788. }
  17789. EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
  17790. -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
  17791. +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
  17792. + unsigned long state)
  17793. {
  17794. hrtimer_init_sleeper(t, current);
  17795. do {
  17796. - set_current_state(TASK_INTERRUPTIBLE);
  17797. + set_current_state(state);
  17798. hrtimer_start_expires(&t->timer, mode);
  17799. if (likely(t->task))
  17800. @@ -1515,7 +1697,8 @@
  17801. HRTIMER_MODE_ABS);
  17802. hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
  17803. - if (do_nanosleep(&t, HRTIMER_MODE_ABS))
  17804. + /* cpu_chill() does not care about restart state. */
  17805. + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE))
  17806. goto out;
  17807. rmtp = restart->nanosleep.rmtp;
  17808. @@ -1532,8 +1715,10 @@
  17809. return ret;
  17810. }
  17811. -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  17812. - const enum hrtimer_mode mode, const clockid_t clockid)
  17813. +static long
  17814. +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  17815. + const enum hrtimer_mode mode, const clockid_t clockid,
  17816. + unsigned long state)
  17817. {
  17818. struct restart_block *restart;
  17819. struct hrtimer_sleeper t;
  17820. @@ -1546,7 +1731,7 @@
  17821. hrtimer_init_on_stack(&t.timer, clockid, mode);
  17822. hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
  17823. - if (do_nanosleep(&t, mode))
  17824. + if (do_nanosleep(&t, mode, state))
  17825. goto out;
  17826. /* Absolute timers do not update the rmtp value and restart: */
  17827. @@ -1573,6 +1758,12 @@
  17828. return ret;
  17829. }
  17830. +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  17831. + const enum hrtimer_mode mode, const clockid_t clockid)
  17832. +{
  17833. + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE);
  17834. +}
  17835. +
  17836. SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
  17837. struct timespec __user *, rmtp)
  17838. {
  17839. @@ -1587,6 +1778,26 @@
  17840. return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
  17841. }
  17842. +#ifdef CONFIG_PREEMPT_RT_FULL
  17843. +/*
  17844. + * Sleep for 1 ms in hope whoever holds what we want will let it go.
  17845. + */
  17846. +void cpu_chill(void)
  17847. +{
  17848. + struct timespec tu = {
  17849. + .tv_nsec = NSEC_PER_MSEC,
  17850. + };
  17851. + unsigned int freeze_flag = current->flags & PF_NOFREEZE;
  17852. +
  17853. + current->flags |= PF_NOFREEZE;
  17854. + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
  17855. + TASK_UNINTERRUPTIBLE);
  17856. + if (!freeze_flag)
  17857. + current->flags &= ~PF_NOFREEZE;
  17858. +}
  17859. +EXPORT_SYMBOL(cpu_chill);
  17860. +#endif
  17861. +
  17862. /*
  17863. * Functions related to boot-time initialization:
  17864. */
  17865. @@ -1598,10 +1809,14 @@
  17866. for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
  17867. cpu_base->clock_base[i].cpu_base = cpu_base;
  17868. timerqueue_init_head(&cpu_base->clock_base[i].active);
  17869. + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
  17870. }
  17871. cpu_base->cpu = cpu;
  17872. hrtimer_init_hres(cpu_base);
  17873. +#ifdef CONFIG_PREEMPT_RT_BASE
  17874. + init_waitqueue_head(&cpu_base->wait);
  17875. +#endif
  17876. return 0;
  17877. }
  17878. @@ -1671,9 +1886,26 @@
  17879. #endif /* CONFIG_HOTPLUG_CPU */
  17880. +#ifdef CONFIG_PREEMPT_RT_BASE
  17881. +
  17882. +static void run_hrtimer_softirq(struct softirq_action *h)
  17883. +{
  17884. + hrtimer_rt_run_pending();
  17885. +}
  17886. +
  17887. +static void hrtimers_open_softirq(void)
  17888. +{
  17889. + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
  17890. +}
  17891. +
  17892. +#else
  17893. +static void hrtimers_open_softirq(void) { }
  17894. +#endif
  17895. +
  17896. void __init hrtimers_init(void)
  17897. {
  17898. hrtimers_prepare_cpu(smp_processor_id());
  17899. + hrtimers_open_softirq();
  17900. }
  17901. /**
  17902. diff -Nur linux-4.8.15.orig/kernel/time/itimer.c linux-4.8.15/kernel/time/itimer.c
  17903. --- linux-4.8.15.orig/kernel/time/itimer.c 2016-12-15 17:50:48.000000000 +0100
  17904. +++ linux-4.8.15/kernel/time/itimer.c 2017-01-01 17:07:16.047427991 +0100
  17905. @@ -213,6 +213,7 @@
  17906. /* We are sharing ->siglock with it_real_fn() */
  17907. if (hrtimer_try_to_cancel(timer) < 0) {
  17908. spin_unlock_irq(&tsk->sighand->siglock);
  17909. + hrtimer_wait_for_timer(&tsk->signal->real_timer);
  17910. goto again;
  17911. }
  17912. expires = timeval_to_ktime(value->it_value);
  17913. diff -Nur linux-4.8.15.orig/kernel/time/jiffies.c linux-4.8.15/kernel/time/jiffies.c
  17914. --- linux-4.8.15.orig/kernel/time/jiffies.c 2016-12-15 17:50:48.000000000 +0100
  17915. +++ linux-4.8.15/kernel/time/jiffies.c 2017-01-01 17:07:16.047427991 +0100
  17916. @@ -74,7 +74,8 @@
  17917. .max_cycles = 10,
  17918. };
  17919. -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
  17920. +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
  17921. +__cacheline_aligned_in_smp seqcount_t jiffies_seq;
  17922. #if (BITS_PER_LONG < 64)
  17923. u64 get_jiffies_64(void)
  17924. @@ -83,9 +84,9 @@
  17925. u64 ret;
  17926. do {
  17927. - seq = read_seqbegin(&jiffies_lock);
  17928. + seq = read_seqcount_begin(&jiffies_seq);
  17929. ret = jiffies_64;
  17930. - } while (read_seqretry(&jiffies_lock, seq));
  17931. + } while (read_seqcount_retry(&jiffies_seq, seq));
  17932. return ret;
  17933. }
  17934. EXPORT_SYMBOL(get_jiffies_64);
  17935. diff -Nur linux-4.8.15.orig/kernel/time/ntp.c linux-4.8.15/kernel/time/ntp.c
  17936. --- linux-4.8.15.orig/kernel/time/ntp.c 2016-12-15 17:50:48.000000000 +0100
  17937. +++ linux-4.8.15/kernel/time/ntp.c 2017-01-01 17:07:16.047427991 +0100
  17938. @@ -17,6 +17,7 @@
  17939. #include <linux/module.h>
  17940. #include <linux/rtc.h>
  17941. #include <linux/math64.h>
  17942. +#include <linux/swork.h>
  17943. #include "ntp_internal.h"
  17944. #include "timekeeping_internal.h"
  17945. @@ -568,10 +569,35 @@
  17946. &sync_cmos_work, timespec64_to_jiffies(&next));
  17947. }
  17948. +#ifdef CONFIG_PREEMPT_RT_FULL
  17949. +
  17950. +static void run_clock_set_delay(struct swork_event *event)
  17951. +{
  17952. + queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
  17953. +}
  17954. +
  17955. +static struct swork_event ntp_cmos_swork;
  17956. +
  17957. +void ntp_notify_cmos_timer(void)
  17958. +{
  17959. + swork_queue(&ntp_cmos_swork);
  17960. +}
  17961. +
  17962. +static __init int create_cmos_delay_thread(void)
  17963. +{
  17964. + WARN_ON(swork_get());
  17965. + INIT_SWORK(&ntp_cmos_swork, run_clock_set_delay);
  17966. + return 0;
  17967. +}
  17968. +early_initcall(create_cmos_delay_thread);
  17969. +
  17970. +#else
  17971. +
  17972. void ntp_notify_cmos_timer(void)
  17973. {
  17974. queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
  17975. }
  17976. +#endif /* CONFIG_PREEMPT_RT_FULL */
  17977. #else
  17978. void ntp_notify_cmos_timer(void) { }
  17979. diff -Nur linux-4.8.15.orig/kernel/time/posix-cpu-timers.c linux-4.8.15/kernel/time/posix-cpu-timers.c
  17980. --- linux-4.8.15.orig/kernel/time/posix-cpu-timers.c 2016-12-15 17:50:48.000000000 +0100
  17981. +++ linux-4.8.15/kernel/time/posix-cpu-timers.c 2017-01-01 17:07:16.047427991 +0100
  17982. @@ -3,6 +3,7 @@
  17983. */
  17984. #include <linux/sched.h>
  17985. +#include <linux/sched/rt.h>
  17986. #include <linux/posix-timers.h>
  17987. #include <linux/errno.h>
  17988. #include <linux/math64.h>
  17989. @@ -620,7 +621,7 @@
  17990. /*
  17991. * Disarm any old timer after extracting its expiry time.
  17992. */
  17993. - WARN_ON_ONCE(!irqs_disabled());
  17994. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  17995. ret = 0;
  17996. old_incr = timer->it.cpu.incr;
  17997. @@ -1064,7 +1065,7 @@
  17998. /*
  17999. * Now re-arm for the new expiry time.
  18000. */
  18001. - WARN_ON_ONCE(!irqs_disabled());
  18002. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  18003. arm_timer(timer);
  18004. unlock_task_sighand(p, &flags);
  18005. @@ -1153,13 +1154,13 @@
  18006. * already updated our counts. We need to check if any timers fire now.
  18007. * Interrupts are disabled.
  18008. */
  18009. -void run_posix_cpu_timers(struct task_struct *tsk)
  18010. +static void __run_posix_cpu_timers(struct task_struct *tsk)
  18011. {
  18012. LIST_HEAD(firing);
  18013. struct k_itimer *timer, *next;
  18014. unsigned long flags;
  18015. - WARN_ON_ONCE(!irqs_disabled());
  18016. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  18017. /*
  18018. * The fast path checks that there are no expired thread or thread
  18019. @@ -1213,6 +1214,190 @@
  18020. }
  18021. }
  18022. +#ifdef CONFIG_PREEMPT_RT_BASE
  18023. +#include <linux/kthread.h>
  18024. +#include <linux/cpu.h>
  18025. +DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
  18026. +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
  18027. +
  18028. +static int posix_cpu_timers_thread(void *data)
  18029. +{
  18030. + int cpu = (long)data;
  18031. +
  18032. + BUG_ON(per_cpu(posix_timer_task,cpu) != current);
  18033. +
  18034. + while (!kthread_should_stop()) {
  18035. + struct task_struct *tsk = NULL;
  18036. + struct task_struct *next = NULL;
  18037. +
  18038. + if (cpu_is_offline(cpu))
  18039. + goto wait_to_die;
  18040. +
  18041. + /* grab task list */
  18042. + raw_local_irq_disable();
  18043. + tsk = per_cpu(posix_timer_tasklist, cpu);
  18044. + per_cpu(posix_timer_tasklist, cpu) = NULL;
  18045. + raw_local_irq_enable();
  18046. +
  18047. + /* its possible the list is empty, just return */
  18048. + if (!tsk) {
  18049. + set_current_state(TASK_INTERRUPTIBLE);
  18050. + schedule();
  18051. + __set_current_state(TASK_RUNNING);
  18052. + continue;
  18053. + }
  18054. +
  18055. + /* Process task list */
  18056. + while (1) {
  18057. + /* save next */
  18058. + next = tsk->posix_timer_list;
  18059. +
  18060. + /* run the task timers, clear its ptr and
  18061. + * unreference it
  18062. + */
  18063. + __run_posix_cpu_timers(tsk);
  18064. + tsk->posix_timer_list = NULL;
  18065. + put_task_struct(tsk);
  18066. +
  18067. + /* check if this is the last on the list */
  18068. + if (next == tsk)
  18069. + break;
  18070. + tsk = next;
  18071. + }
  18072. + }
  18073. + return 0;
  18074. +
  18075. +wait_to_die:
  18076. + /* Wait for kthread_stop */
  18077. + set_current_state(TASK_INTERRUPTIBLE);
  18078. + while (!kthread_should_stop()) {
  18079. + schedule();
  18080. + set_current_state(TASK_INTERRUPTIBLE);
  18081. + }
  18082. + __set_current_state(TASK_RUNNING);
  18083. + return 0;
  18084. +}
  18085. +
  18086. +static inline int __fastpath_timer_check(struct task_struct *tsk)
  18087. +{
  18088. + /* tsk == current, ensure it is safe to use ->signal/sighand */
  18089. + if (unlikely(tsk->exit_state))
  18090. + return 0;
  18091. +
  18092. + if (!task_cputime_zero(&tsk->cputime_expires))
  18093. + return 1;
  18094. +
  18095. + if (!task_cputime_zero(&tsk->signal->cputime_expires))
  18096. + return 1;
  18097. +
  18098. + return 0;
  18099. +}
  18100. +
  18101. +void run_posix_cpu_timers(struct task_struct *tsk)
  18102. +{
  18103. + unsigned long cpu = smp_processor_id();
  18104. + struct task_struct *tasklist;
  18105. +
  18106. + BUG_ON(!irqs_disabled());
  18107. + if(!per_cpu(posix_timer_task, cpu))
  18108. + return;
  18109. + /* get per-cpu references */
  18110. + tasklist = per_cpu(posix_timer_tasklist, cpu);
  18111. +
  18112. + /* check to see if we're already queued */
  18113. + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
  18114. + get_task_struct(tsk);
  18115. + if (tasklist) {
  18116. + tsk->posix_timer_list = tasklist;
  18117. + } else {
  18118. + /*
  18119. + * The list is terminated by a self-pointing
  18120. + * task_struct
  18121. + */
  18122. + tsk->posix_timer_list = tsk;
  18123. + }
  18124. + per_cpu(posix_timer_tasklist, cpu) = tsk;
  18125. +
  18126. + wake_up_process(per_cpu(posix_timer_task, cpu));
  18127. + }
  18128. +}
  18129. +
  18130. +/*
  18131. + * posix_cpu_thread_call - callback that gets triggered when a CPU is added.
  18132. + * Here we can start up the necessary migration thread for the new CPU.
  18133. + */
  18134. +static int posix_cpu_thread_call(struct notifier_block *nfb,
  18135. + unsigned long action, void *hcpu)
  18136. +{
  18137. + int cpu = (long)hcpu;
  18138. + struct task_struct *p;
  18139. + struct sched_param param;
  18140. +
  18141. + switch (action) {
  18142. + case CPU_UP_PREPARE:
  18143. + p = kthread_create(posix_cpu_timers_thread, hcpu,
  18144. + "posixcputmr/%d",cpu);
  18145. + if (IS_ERR(p))
  18146. + return NOTIFY_BAD;
  18147. + p->flags |= PF_NOFREEZE;
  18148. + kthread_bind(p, cpu);
  18149. + /* Must be high prio to avoid getting starved */
  18150. + param.sched_priority = MAX_RT_PRIO-1;
  18151. + sched_setscheduler(p, SCHED_FIFO, &param);
  18152. + per_cpu(posix_timer_task,cpu) = p;
  18153. + break;
  18154. + case CPU_ONLINE:
  18155. + /* Strictly unneccessary, as first user will wake it. */
  18156. + wake_up_process(per_cpu(posix_timer_task,cpu));
  18157. + break;
  18158. +#ifdef CONFIG_HOTPLUG_CPU
  18159. + case CPU_UP_CANCELED:
  18160. + /* Unbind it from offline cpu so it can run. Fall thru. */
  18161. + kthread_bind(per_cpu(posix_timer_task, cpu),
  18162. + cpumask_any(cpu_online_mask));
  18163. + kthread_stop(per_cpu(posix_timer_task,cpu));
  18164. + per_cpu(posix_timer_task,cpu) = NULL;
  18165. + break;
  18166. + case CPU_DEAD:
  18167. + kthread_stop(per_cpu(posix_timer_task,cpu));
  18168. + per_cpu(posix_timer_task,cpu) = NULL;
  18169. + break;
  18170. +#endif
  18171. + }
  18172. + return NOTIFY_OK;
  18173. +}
  18174. +
  18175. +/* Register at highest priority so that task migration (migrate_all_tasks)
  18176. + * happens before everything else.
  18177. + */
  18178. +static struct notifier_block posix_cpu_thread_notifier = {
  18179. + .notifier_call = posix_cpu_thread_call,
  18180. + .priority = 10
  18181. +};
  18182. +
  18183. +static int __init posix_cpu_thread_init(void)
  18184. +{
  18185. + void *hcpu = (void *)(long)smp_processor_id();
  18186. + /* Start one for boot CPU. */
  18187. + unsigned long cpu;
  18188. +
  18189. + /* init the per-cpu posix_timer_tasklets */
  18190. + for_each_possible_cpu(cpu)
  18191. + per_cpu(posix_timer_tasklist, cpu) = NULL;
  18192. +
  18193. + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu);
  18194. + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu);
  18195. + register_cpu_notifier(&posix_cpu_thread_notifier);
  18196. + return 0;
  18197. +}
  18198. +early_initcall(posix_cpu_thread_init);
  18199. +#else /* CONFIG_PREEMPT_RT_BASE */
  18200. +void run_posix_cpu_timers(struct task_struct *tsk)
  18201. +{
  18202. + __run_posix_cpu_timers(tsk);
  18203. +}
  18204. +#endif /* CONFIG_PREEMPT_RT_BASE */
  18205. +
  18206. /*
  18207. * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
  18208. * The tsk->sighand->siglock must be held by the caller.
  18209. diff -Nur linux-4.8.15.orig/kernel/time/posix-timers.c linux-4.8.15/kernel/time/posix-timers.c
  18210. --- linux-4.8.15.orig/kernel/time/posix-timers.c 2016-12-15 17:50:48.000000000 +0100
  18211. +++ linux-4.8.15/kernel/time/posix-timers.c 2017-01-01 17:07:16.047427991 +0100
  18212. @@ -506,6 +506,7 @@
  18213. static struct pid *good_sigevent(sigevent_t * event)
  18214. {
  18215. struct task_struct *rtn = current->group_leader;
  18216. + int sig = event->sigev_signo;
  18217. if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
  18218. (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
  18219. @@ -514,7 +515,8 @@
  18220. return NULL;
  18221. if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
  18222. - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
  18223. + (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) ||
  18224. + sig_kernel_coredump(sig)))
  18225. return NULL;
  18226. return task_pid(rtn);
  18227. @@ -826,6 +828,20 @@
  18228. return overrun;
  18229. }
  18230. +/*
  18231. + * Protected by RCU!
  18232. + */
  18233. +static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
  18234. +{
  18235. +#ifdef CONFIG_PREEMPT_RT_FULL
  18236. + if (kc->timer_set == common_timer_set)
  18237. + hrtimer_wait_for_timer(&timr->it.real.timer);
  18238. + else
  18239. + /* FIXME: Whacky hack for posix-cpu-timers */
  18240. + schedule_timeout(1);
  18241. +#endif
  18242. +}
  18243. +
  18244. /* Set a POSIX.1b interval timer. */
  18245. /* timr->it_lock is taken. */
  18246. static int
  18247. @@ -903,6 +919,7 @@
  18248. if (!timr)
  18249. return -EINVAL;
  18250. + rcu_read_lock();
  18251. kc = clockid_to_kclock(timr->it_clock);
  18252. if (WARN_ON_ONCE(!kc || !kc->timer_set))
  18253. error = -EINVAL;
  18254. @@ -911,9 +928,12 @@
  18255. unlock_timer(timr, flag);
  18256. if (error == TIMER_RETRY) {
  18257. + timer_wait_for_callback(kc, timr);
  18258. rtn = NULL; // We already got the old time...
  18259. + rcu_read_unlock();
  18260. goto retry;
  18261. }
  18262. + rcu_read_unlock();
  18263. if (old_setting && !error &&
  18264. copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
  18265. @@ -951,10 +971,15 @@
  18266. if (!timer)
  18267. return -EINVAL;
  18268. + rcu_read_lock();
  18269. if (timer_delete_hook(timer) == TIMER_RETRY) {
  18270. unlock_timer(timer, flags);
  18271. + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
  18272. + timer);
  18273. + rcu_read_unlock();
  18274. goto retry_delete;
  18275. }
  18276. + rcu_read_unlock();
  18277. spin_lock(&current->sighand->siglock);
  18278. list_del(&timer->list);
  18279. @@ -980,8 +1005,18 @@
  18280. retry_delete:
  18281. spin_lock_irqsave(&timer->it_lock, flags);
  18282. + /* On RT we can race with a deletion */
  18283. + if (!timer->it_signal) {
  18284. + unlock_timer(timer, flags);
  18285. + return;
  18286. + }
  18287. +
  18288. if (timer_delete_hook(timer) == TIMER_RETRY) {
  18289. + rcu_read_lock();
  18290. unlock_timer(timer, flags);
  18291. + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
  18292. + timer);
  18293. + rcu_read_unlock();
  18294. goto retry_delete;
  18295. }
  18296. list_del(&timer->list);
  18297. diff -Nur linux-4.8.15.orig/kernel/time/tick-broadcast-hrtimer.c linux-4.8.15/kernel/time/tick-broadcast-hrtimer.c
  18298. --- linux-4.8.15.orig/kernel/time/tick-broadcast-hrtimer.c 2016-12-15 17:50:48.000000000 +0100
  18299. +++ linux-4.8.15/kernel/time/tick-broadcast-hrtimer.c 2017-01-01 17:07:16.047427991 +0100
  18300. @@ -107,5 +107,6 @@
  18301. {
  18302. hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  18303. bctimer.function = bc_handler;
  18304. + bctimer.irqsafe = true;
  18305. clockevents_register_device(&ce_broadcast_hrtimer);
  18306. }
  18307. diff -Nur linux-4.8.15.orig/kernel/time/tick-common.c linux-4.8.15/kernel/time/tick-common.c
  18308. --- linux-4.8.15.orig/kernel/time/tick-common.c 2016-12-15 17:50:48.000000000 +0100
  18309. +++ linux-4.8.15/kernel/time/tick-common.c 2017-01-01 17:07:16.047427991 +0100
  18310. @@ -79,13 +79,15 @@
  18311. static void tick_periodic(int cpu)
  18312. {
  18313. if (tick_do_timer_cpu == cpu) {
  18314. - write_seqlock(&jiffies_lock);
  18315. + raw_spin_lock(&jiffies_lock);
  18316. + write_seqcount_begin(&jiffies_seq);
  18317. /* Keep track of the next tick event */
  18318. tick_next_period = ktime_add(tick_next_period, tick_period);
  18319. do_timer(1);
  18320. - write_sequnlock(&jiffies_lock);
  18321. + write_seqcount_end(&jiffies_seq);
  18322. + raw_spin_unlock(&jiffies_lock);
  18323. update_wall_time();
  18324. }
  18325. @@ -157,9 +159,9 @@
  18326. ktime_t next;
  18327. do {
  18328. - seq = read_seqbegin(&jiffies_lock);
  18329. + seq = read_seqcount_begin(&jiffies_seq);
  18330. next = tick_next_period;
  18331. - } while (read_seqretry(&jiffies_lock, seq));
  18332. + } while (read_seqcount_retry(&jiffies_seq, seq));
  18333. clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
  18334. diff -Nur linux-4.8.15.orig/kernel/time/tick-sched.c linux-4.8.15/kernel/time/tick-sched.c
  18335. --- linux-4.8.15.orig/kernel/time/tick-sched.c 2016-12-15 17:50:48.000000000 +0100
  18336. +++ linux-4.8.15/kernel/time/tick-sched.c 2017-01-01 17:07:16.047427991 +0100
  18337. @@ -62,7 +62,8 @@
  18338. return;
  18339. /* Reevaluate with jiffies_lock held */
  18340. - write_seqlock(&jiffies_lock);
  18341. + raw_spin_lock(&jiffies_lock);
  18342. + write_seqcount_begin(&jiffies_seq);
  18343. delta = ktime_sub(now, last_jiffies_update);
  18344. if (delta.tv64 >= tick_period.tv64) {
  18345. @@ -85,10 +86,12 @@
  18346. /* Keep the tick_next_period variable up to date */
  18347. tick_next_period = ktime_add(last_jiffies_update, tick_period);
  18348. } else {
  18349. - write_sequnlock(&jiffies_lock);
  18350. + write_seqcount_end(&jiffies_seq);
  18351. + raw_spin_unlock(&jiffies_lock);
  18352. return;
  18353. }
  18354. - write_sequnlock(&jiffies_lock);
  18355. + write_seqcount_end(&jiffies_seq);
  18356. + raw_spin_unlock(&jiffies_lock);
  18357. update_wall_time();
  18358. }
  18359. @@ -99,12 +102,14 @@
  18360. {
  18361. ktime_t period;
  18362. - write_seqlock(&jiffies_lock);
  18363. + raw_spin_lock(&jiffies_lock);
  18364. + write_seqcount_begin(&jiffies_seq);
  18365. /* Did we start the jiffies update yet ? */
  18366. if (last_jiffies_update.tv64 == 0)
  18367. last_jiffies_update = tick_next_period;
  18368. period = last_jiffies_update;
  18369. - write_sequnlock(&jiffies_lock);
  18370. + write_seqcount_end(&jiffies_seq);
  18371. + raw_spin_unlock(&jiffies_lock);
  18372. return period;
  18373. }
  18374. @@ -212,6 +217,7 @@
  18375. static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
  18376. .func = nohz_full_kick_func,
  18377. + .flags = IRQ_WORK_HARD_IRQ,
  18378. };
  18379. /*
  18380. @@ -670,10 +676,10 @@
  18381. /* Read jiffies and the time when jiffies were updated last */
  18382. do {
  18383. - seq = read_seqbegin(&jiffies_lock);
  18384. + seq = read_seqcount_begin(&jiffies_seq);
  18385. basemono = last_jiffies_update.tv64;
  18386. basejiff = jiffies;
  18387. - } while (read_seqretry(&jiffies_lock, seq));
  18388. + } while (read_seqcount_retry(&jiffies_seq, seq));
  18389. ts->last_jiffies = basejiff;
  18390. if (rcu_needs_cpu(basemono, &next_rcu) ||
  18391. @@ -874,14 +880,7 @@
  18392. return false;
  18393. if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
  18394. - static int ratelimit;
  18395. -
  18396. - if (ratelimit < 10 &&
  18397. - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  18398. - pr_warn("NOHZ: local_softirq_pending %02x\n",
  18399. - (unsigned int) local_softirq_pending());
  18400. - ratelimit++;
  18401. - }
  18402. + softirq_check_pending_idle();
  18403. return false;
  18404. }
  18405. @@ -1190,6 +1189,7 @@
  18406. * Emulate tick processing via per-CPU hrtimers:
  18407. */
  18408. hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  18409. + ts->sched_timer.irqsafe = 1;
  18410. ts->sched_timer.function = tick_sched_timer;
  18411. /* Get the next period (per-CPU) */
  18412. diff -Nur linux-4.8.15.orig/kernel/time/timekeeping.c linux-4.8.15/kernel/time/timekeeping.c
  18413. --- linux-4.8.15.orig/kernel/time/timekeeping.c 2016-12-15 17:50:48.000000000 +0100
  18414. +++ linux-4.8.15/kernel/time/timekeeping.c 2017-01-01 17:07:16.051428246 +0100
  18415. @@ -2328,8 +2328,10 @@
  18416. */
  18417. void xtime_update(unsigned long ticks)
  18418. {
  18419. - write_seqlock(&jiffies_lock);
  18420. + raw_spin_lock(&jiffies_lock);
  18421. + write_seqcount_begin(&jiffies_seq);
  18422. do_timer(ticks);
  18423. - write_sequnlock(&jiffies_lock);
  18424. + write_seqcount_end(&jiffies_seq);
  18425. + raw_spin_unlock(&jiffies_lock);
  18426. update_wall_time();
  18427. }
  18428. diff -Nur linux-4.8.15.orig/kernel/time/timekeeping.h linux-4.8.15/kernel/time/timekeeping.h
  18429. --- linux-4.8.15.orig/kernel/time/timekeeping.h 2016-12-15 17:50:48.000000000 +0100
  18430. +++ linux-4.8.15/kernel/time/timekeeping.h 2017-01-01 17:07:16.051428246 +0100
  18431. @@ -19,7 +19,8 @@
  18432. extern void do_timer(unsigned long ticks);
  18433. extern void update_wall_time(void);
  18434. -extern seqlock_t jiffies_lock;
  18435. +extern raw_spinlock_t jiffies_lock;
  18436. +extern seqcount_t jiffies_seq;
  18437. #define CS_NAME_LEN 32
  18438. diff -Nur linux-4.8.15.orig/kernel/time/timer.c linux-4.8.15/kernel/time/timer.c
  18439. --- linux-4.8.15.orig/kernel/time/timer.c 2016-12-15 17:50:48.000000000 +0100
  18440. +++ linux-4.8.15/kernel/time/timer.c 2017-01-01 17:07:16.051428246 +0100
  18441. @@ -193,8 +193,11 @@
  18442. #endif
  18443. struct timer_base {
  18444. - spinlock_t lock;
  18445. + raw_spinlock_t lock;
  18446. struct timer_list *running_timer;
  18447. +#ifdef CONFIG_PREEMPT_RT_FULL
  18448. + struct swait_queue_head wait_for_running_timer;
  18449. +#endif
  18450. unsigned long clk;
  18451. unsigned long next_expiry;
  18452. unsigned int cpu;
  18453. @@ -948,10 +951,10 @@
  18454. if (!(tf & TIMER_MIGRATING)) {
  18455. base = get_timer_base(tf);
  18456. - spin_lock_irqsave(&base->lock, *flags);
  18457. + raw_spin_lock_irqsave(&base->lock, *flags);
  18458. if (timer->flags == tf)
  18459. return base;
  18460. - spin_unlock_irqrestore(&base->lock, *flags);
  18461. + raw_spin_unlock_irqrestore(&base->lock, *flags);
  18462. }
  18463. cpu_relax();
  18464. }
  18465. @@ -1023,9 +1026,9 @@
  18466. /* See the comment in lock_timer_base() */
  18467. timer->flags |= TIMER_MIGRATING;
  18468. - spin_unlock(&base->lock);
  18469. + raw_spin_unlock(&base->lock);
  18470. base = new_base;
  18471. - spin_lock(&base->lock);
  18472. + raw_spin_lock(&base->lock);
  18473. WRITE_ONCE(timer->flags,
  18474. (timer->flags & ~TIMER_BASEMASK) | base->cpu);
  18475. }
  18476. @@ -1050,7 +1053,7 @@
  18477. }
  18478. out_unlock:
  18479. - spin_unlock_irqrestore(&base->lock, flags);
  18480. + raw_spin_unlock_irqrestore(&base->lock, flags);
  18481. return ret;
  18482. }
  18483. @@ -1144,19 +1147,46 @@
  18484. if (base != new_base) {
  18485. timer->flags |= TIMER_MIGRATING;
  18486. - spin_unlock(&base->lock);
  18487. + raw_spin_unlock(&base->lock);
  18488. base = new_base;
  18489. - spin_lock(&base->lock);
  18490. + raw_spin_lock(&base->lock);
  18491. WRITE_ONCE(timer->flags,
  18492. (timer->flags & ~TIMER_BASEMASK) | cpu);
  18493. }
  18494. debug_activate(timer, timer->expires);
  18495. internal_add_timer(base, timer);
  18496. - spin_unlock_irqrestore(&base->lock, flags);
  18497. + raw_spin_unlock_irqrestore(&base->lock, flags);
  18498. }
  18499. EXPORT_SYMBOL_GPL(add_timer_on);
  18500. +#ifdef CONFIG_PREEMPT_RT_FULL
  18501. +/*
  18502. + * Wait for a running timer
  18503. + */
  18504. +static void wait_for_running_timer(struct timer_list *timer)
  18505. +{
  18506. + struct timer_base *base;
  18507. + u32 tf = timer->flags;
  18508. +
  18509. + if (tf & TIMER_MIGRATING)
  18510. + return;
  18511. +
  18512. + base = get_timer_base(tf);
  18513. + swait_event(base->wait_for_running_timer,
  18514. + base->running_timer != timer);
  18515. +}
  18516. +
  18517. +# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer)
  18518. +#else
  18519. +static inline void wait_for_running_timer(struct timer_list *timer)
  18520. +{
  18521. + cpu_relax();
  18522. +}
  18523. +
  18524. +# define wakeup_timer_waiters(b) do { } while (0)
  18525. +#endif
  18526. +
  18527. /**
  18528. * del_timer - deactive a timer.
  18529. * @timer: the timer to be deactivated
  18530. @@ -1180,7 +1210,7 @@
  18531. if (timer_pending(timer)) {
  18532. base = lock_timer_base(timer, &flags);
  18533. ret = detach_if_pending(timer, base, true);
  18534. - spin_unlock_irqrestore(&base->lock, flags);
  18535. + raw_spin_unlock_irqrestore(&base->lock, flags);
  18536. }
  18537. return ret;
  18538. @@ -1208,13 +1238,13 @@
  18539. timer_stats_timer_clear_start_info(timer);
  18540. ret = detach_if_pending(timer, base, true);
  18541. }
  18542. - spin_unlock_irqrestore(&base->lock, flags);
  18543. + raw_spin_unlock_irqrestore(&base->lock, flags);
  18544. return ret;
  18545. }
  18546. EXPORT_SYMBOL(try_to_del_timer_sync);
  18547. -#ifdef CONFIG_SMP
  18548. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  18549. /**
  18550. * del_timer_sync - deactivate a timer and wait for the handler to finish.
  18551. * @timer: the timer to be deactivated
  18552. @@ -1274,7 +1304,7 @@
  18553. int ret = try_to_del_timer_sync(timer);
  18554. if (ret >= 0)
  18555. return ret;
  18556. - cpu_relax();
  18557. + wait_for_running_timer(timer);
  18558. }
  18559. }
  18560. EXPORT_SYMBOL(del_timer_sync);
  18561. @@ -1339,14 +1369,17 @@
  18562. fn = timer->function;
  18563. data = timer->data;
  18564. - if (timer->flags & TIMER_IRQSAFE) {
  18565. - spin_unlock(&base->lock);
  18566. + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL) &&
  18567. + timer->flags & TIMER_IRQSAFE) {
  18568. + raw_spin_unlock(&base->lock);
  18569. call_timer_fn(timer, fn, data);
  18570. - spin_lock(&base->lock);
  18571. + base->running_timer = NULL;
  18572. + raw_spin_lock(&base->lock);
  18573. } else {
  18574. - spin_unlock_irq(&base->lock);
  18575. + raw_spin_unlock_irq(&base->lock);
  18576. call_timer_fn(timer, fn, data);
  18577. - spin_lock_irq(&base->lock);
  18578. + base->running_timer = NULL;
  18579. + raw_spin_lock_irq(&base->lock);
  18580. }
  18581. }
  18582. }
  18583. @@ -1515,7 +1548,7 @@
  18584. if (cpu_is_offline(smp_processor_id()))
  18585. return expires;
  18586. - spin_lock(&base->lock);
  18587. + raw_spin_lock(&base->lock);
  18588. nextevt = __next_timer_interrupt(base);
  18589. is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
  18590. base->next_expiry = nextevt;
  18591. @@ -1543,7 +1576,7 @@
  18592. if ((expires - basem) > TICK_NSEC)
  18593. base->is_idle = true;
  18594. }
  18595. - spin_unlock(&base->lock);
  18596. + raw_spin_unlock(&base->lock);
  18597. return cmp_next_hrtimer_event(basem, expires);
  18598. }
  18599. @@ -1608,13 +1641,13 @@
  18600. /* Note: this timer irq context must be accounted for as well. */
  18601. account_process_tick(p, user_tick);
  18602. + scheduler_tick();
  18603. run_local_timers();
  18604. rcu_check_callbacks(user_tick);
  18605. -#ifdef CONFIG_IRQ_WORK
  18606. +#if defined(CONFIG_IRQ_WORK)
  18607. if (in_irq())
  18608. irq_work_tick();
  18609. #endif
  18610. - scheduler_tick();
  18611. run_posix_cpu_timers(p);
  18612. }
  18613. @@ -1630,7 +1663,7 @@
  18614. if (!time_after_eq(jiffies, base->clk))
  18615. return;
  18616. - spin_lock_irq(&base->lock);
  18617. + raw_spin_lock_irq(&base->lock);
  18618. while (time_after_eq(jiffies, base->clk)) {
  18619. @@ -1640,8 +1673,8 @@
  18620. while (levels--)
  18621. expire_timers(base, heads + levels);
  18622. }
  18623. - base->running_timer = NULL;
  18624. - spin_unlock_irq(&base->lock);
  18625. + raw_spin_unlock_irq(&base->lock);
  18626. + wakeup_timer_waiters(base);
  18627. }
  18628. /*
  18629. @@ -1651,6 +1684,8 @@
  18630. {
  18631. struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
  18632. + irq_work_tick_soft();
  18633. +
  18634. __run_timers(base);
  18635. if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
  18636. __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
  18637. @@ -1836,16 +1871,16 @@
  18638. * The caller is globally serialized and nobody else
  18639. * takes two locks at once, deadlock is not possible.
  18640. */
  18641. - spin_lock_irq(&new_base->lock);
  18642. - spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
  18643. + raw_spin_lock_irq(&new_base->lock);
  18644. + raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
  18645. BUG_ON(old_base->running_timer);
  18646. for (i = 0; i < WHEEL_SIZE; i++)
  18647. migrate_timer_list(new_base, old_base->vectors + i);
  18648. - spin_unlock(&old_base->lock);
  18649. - spin_unlock_irq(&new_base->lock);
  18650. + raw_spin_unlock(&old_base->lock);
  18651. + raw_spin_unlock_irq(&new_base->lock);
  18652. put_cpu_ptr(&timer_bases);
  18653. }
  18654. return 0;
  18655. @@ -1861,8 +1896,11 @@
  18656. for (i = 0; i < NR_BASES; i++) {
  18657. base = per_cpu_ptr(&timer_bases[i], cpu);
  18658. base->cpu = cpu;
  18659. - spin_lock_init(&base->lock);
  18660. + raw_spin_lock_init(&base->lock);
  18661. base->clk = jiffies;
  18662. +#ifdef CONFIG_PREEMPT_RT_FULL
  18663. + init_swait_queue_head(&base->wait_for_running_timer);
  18664. +#endif
  18665. }
  18666. }
  18667. diff -Nur linux-4.8.15.orig/kernel/trace/Kconfig linux-4.8.15/kernel/trace/Kconfig
  18668. --- linux-4.8.15.orig/kernel/trace/Kconfig 2016-12-15 17:50:48.000000000 +0100
  18669. +++ linux-4.8.15/kernel/trace/Kconfig 2017-01-01 17:07:16.051428246 +0100
  18670. @@ -187,6 +187,24 @@
  18671. enabled. This option and the preempt-off timing option can be
  18672. used together or separately.)
  18673. +config INTERRUPT_OFF_HIST
  18674. + bool "Interrupts-off Latency Histogram"
  18675. + depends on IRQSOFF_TRACER
  18676. + help
  18677. + This option generates continuously updated histograms (one per cpu)
  18678. + of the duration of time periods with interrupts disabled. The
  18679. + histograms are disabled by default. To enable them, write a non-zero
  18680. + number to
  18681. +
  18682. + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
  18683. +
  18684. + If PREEMPT_OFF_HIST is also selected, additional histograms (one
  18685. + per cpu) are generated that accumulate the duration of time periods
  18686. + when both interrupts and preemption are disabled. The histogram data
  18687. + will be located in the debug file system at
  18688. +
  18689. + /sys/kernel/debug/tracing/latency_hist/irqsoff
  18690. +
  18691. config PREEMPT_TRACER
  18692. bool "Preemption-off Latency Tracer"
  18693. default n
  18694. @@ -197,6 +215,7 @@
  18695. select RING_BUFFER_ALLOW_SWAP
  18696. select TRACER_SNAPSHOT
  18697. select TRACER_SNAPSHOT_PER_CPU_SWAP
  18698. + select USING_GET_LOCK_PARENT_IP
  18699. help
  18700. This option measures the time spent in preemption-off critical
  18701. sections, with microsecond accuracy.
  18702. @@ -211,6 +230,24 @@
  18703. enabled. This option and the irqs-off timing option can be
  18704. used together or separately.)
  18705. +config PREEMPT_OFF_HIST
  18706. + bool "Preemption-off Latency Histogram"
  18707. + depends on PREEMPT_TRACER
  18708. + help
  18709. + This option generates continuously updated histograms (one per cpu)
  18710. + of the duration of time periods with preemption disabled. The
  18711. + histograms are disabled by default. To enable them, write a non-zero
  18712. + number to
  18713. +
  18714. + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
  18715. +
  18716. + If INTERRUPT_OFF_HIST is also selected, additional histograms (one
  18717. + per cpu) are generated that accumulate the duration of time periods
  18718. + when both interrupts and preemption are disabled. The histogram data
  18719. + will be located in the debug file system at
  18720. +
  18721. + /sys/kernel/debug/tracing/latency_hist/preemptoff
  18722. +
  18723. config SCHED_TRACER
  18724. bool "Scheduling Latency Tracer"
  18725. select GENERIC_TRACER
  18726. @@ -221,6 +258,74 @@
  18727. This tracer tracks the latency of the highest priority task
  18728. to be scheduled in, starting from the point it has woken up.
  18729. +config WAKEUP_LATENCY_HIST
  18730. + bool "Scheduling Latency Histogram"
  18731. + depends on SCHED_TRACER
  18732. + help
  18733. + This option generates continuously updated histograms (one per cpu)
  18734. + of the scheduling latency of the highest priority task.
  18735. + The histograms are disabled by default. To enable them, write a
  18736. + non-zero number to
  18737. +
  18738. + /sys/kernel/debug/tracing/latency_hist/enable/wakeup
  18739. +
  18740. + Two different algorithms are used, one to determine the latency of
  18741. + processes that exclusively use the highest priority of the system and
  18742. + another one to determine the latency of processes that share the
  18743. + highest system priority with other processes. The former is used to
  18744. + improve hardware and system software, the latter to optimize the
  18745. + priority design of a given system. The histogram data will be
  18746. + located in the debug file system at
  18747. +
  18748. + /sys/kernel/debug/tracing/latency_hist/wakeup
  18749. +
  18750. + and
  18751. +
  18752. + /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio
  18753. +
  18754. + If both Scheduling Latency Histogram and Missed Timer Offsets
  18755. + Histogram are selected, additional histogram data will be collected
  18756. + that contain, in addition to the wakeup latency, the timer latency, in
  18757. + case the wakeup was triggered by an expired timer. These histograms
  18758. + are available in the
  18759. +
  18760. + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
  18761. +
  18762. + directory. They reflect the apparent interrupt and scheduling latency
  18763. + and are best suitable to determine the worst-case latency of a given
  18764. + system. To enable these histograms, write a non-zero number to
  18765. +
  18766. + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
  18767. +
  18768. +config MISSED_TIMER_OFFSETS_HIST
  18769. + depends on HIGH_RES_TIMERS
  18770. + select GENERIC_TRACER
  18771. + bool "Missed Timer Offsets Histogram"
  18772. + help
  18773. + Generate a histogram of missed timer offsets in microseconds. The
  18774. + histograms are disabled by default. To enable them, write a non-zero
  18775. + number to
  18776. +
  18777. + /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets
  18778. +
  18779. + The histogram data will be located in the debug file system at
  18780. +
  18781. + /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets
  18782. +
  18783. + If both Scheduling Latency Histogram and Missed Timer Offsets
  18784. + Histogram are selected, additional histogram data will be collected
  18785. + that contain, in addition to the wakeup latency, the timer latency, in
  18786. + case the wakeup was triggered by an expired timer. These histograms
  18787. + are available in the
  18788. +
  18789. + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
  18790. +
  18791. + directory. They reflect the apparent interrupt and scheduling latency
  18792. + and are best suitable to determine the worst-case latency of a given
  18793. + system. To enable these histograms, write a non-zero number to
  18794. +
  18795. + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
  18796. +
  18797. config ENABLE_DEFAULT_TRACERS
  18798. bool "Trace process context switches and events"
  18799. depends on !GENERIC_TRACER
  18800. diff -Nur linux-4.8.15.orig/kernel/trace/latency_hist.c linux-4.8.15/kernel/trace/latency_hist.c
  18801. --- linux-4.8.15.orig/kernel/trace/latency_hist.c 1970-01-01 01:00:00.000000000 +0100
  18802. +++ linux-4.8.15/kernel/trace/latency_hist.c 2017-01-01 17:07:16.051428246 +0100
  18803. @@ -0,0 +1,1178 @@
  18804. +/*
  18805. + * kernel/trace/latency_hist.c
  18806. + *
  18807. + * Add support for histograms of preemption-off latency and
  18808. + * interrupt-off latency and wakeup latency, it depends on
  18809. + * Real-Time Preemption Support.
  18810. + *
  18811. + * Copyright (C) 2005 MontaVista Software, Inc.
  18812. + * Yi Yang <yyang@ch.mvista.com>
  18813. + *
  18814. + * Converted to work with the new latency tracer.
  18815. + * Copyright (C) 2008 Red Hat, Inc.
  18816. + * Steven Rostedt <srostedt@redhat.com>
  18817. + *
  18818. + */
  18819. +#include <linux/module.h>
  18820. +#include <linux/debugfs.h>
  18821. +#include <linux/seq_file.h>
  18822. +#include <linux/percpu.h>
  18823. +#include <linux/kallsyms.h>
  18824. +#include <linux/uaccess.h>
  18825. +#include <linux/sched.h>
  18826. +#include <linux/sched/rt.h>
  18827. +#include <linux/slab.h>
  18828. +#include <linux/atomic.h>
  18829. +#include <asm/div64.h>
  18830. +
  18831. +#include "trace.h"
  18832. +#include <trace/events/sched.h>
  18833. +
  18834. +#define NSECS_PER_USECS 1000L
  18835. +
  18836. +#define CREATE_TRACE_POINTS
  18837. +#include <trace/events/hist.h>
  18838. +
  18839. +enum {
  18840. + IRQSOFF_LATENCY = 0,
  18841. + PREEMPTOFF_LATENCY,
  18842. + PREEMPTIRQSOFF_LATENCY,
  18843. + WAKEUP_LATENCY,
  18844. + WAKEUP_LATENCY_SHAREDPRIO,
  18845. + MISSED_TIMER_OFFSETS,
  18846. + TIMERANDWAKEUP_LATENCY,
  18847. + MAX_LATENCY_TYPE,
  18848. +};
  18849. +
  18850. +#define MAX_ENTRY_NUM 10240
  18851. +
  18852. +struct hist_data {
  18853. + atomic_t hist_mode; /* 0 log, 1 don't log */
  18854. + long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */
  18855. + long min_lat;
  18856. + long max_lat;
  18857. + unsigned long long below_hist_bound_samples;
  18858. + unsigned long long above_hist_bound_samples;
  18859. + long long accumulate_lat;
  18860. + unsigned long long total_samples;
  18861. + unsigned long long hist_array[MAX_ENTRY_NUM];
  18862. +};
  18863. +
  18864. +struct enable_data {
  18865. + int latency_type;
  18866. + int enabled;
  18867. +};
  18868. +
  18869. +static char *latency_hist_dir_root = "latency_hist";
  18870. +
  18871. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  18872. +static DEFINE_PER_CPU(struct hist_data, irqsoff_hist);
  18873. +static char *irqsoff_hist_dir = "irqsoff";
  18874. +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start);
  18875. +static DEFINE_PER_CPU(int, hist_irqsoff_counting);
  18876. +#endif
  18877. +
  18878. +#ifdef CONFIG_PREEMPT_OFF_HIST
  18879. +static DEFINE_PER_CPU(struct hist_data, preemptoff_hist);
  18880. +static char *preemptoff_hist_dir = "preemptoff";
  18881. +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start);
  18882. +static DEFINE_PER_CPU(int, hist_preemptoff_counting);
  18883. +#endif
  18884. +
  18885. +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
  18886. +static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist);
  18887. +static char *preemptirqsoff_hist_dir = "preemptirqsoff";
  18888. +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start);
  18889. +static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting);
  18890. +#endif
  18891. +
  18892. +#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST)
  18893. +static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start);
  18894. +static struct enable_data preemptirqsoff_enabled_data = {
  18895. + .latency_type = PREEMPTIRQSOFF_LATENCY,
  18896. + .enabled = 0,
  18897. +};
  18898. +#endif
  18899. +
  18900. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  18901. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  18902. +struct maxlatproc_data {
  18903. + char comm[FIELD_SIZEOF(struct task_struct, comm)];
  18904. + char current_comm[FIELD_SIZEOF(struct task_struct, comm)];
  18905. + int pid;
  18906. + int current_pid;
  18907. + int prio;
  18908. + int current_prio;
  18909. + long latency;
  18910. + long timeroffset;
  18911. + cycle_t timestamp;
  18912. +};
  18913. +#endif
  18914. +
  18915. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  18916. +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist);
  18917. +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio);
  18918. +static char *wakeup_latency_hist_dir = "wakeup";
  18919. +static char *wakeup_latency_hist_dir_sharedprio = "sharedprio";
  18920. +static notrace void probe_wakeup_latency_hist_start(void *v,
  18921. + struct task_struct *p);
  18922. +static notrace void probe_wakeup_latency_hist_stop(void *v,
  18923. + bool preempt, struct task_struct *prev, struct task_struct *next);
  18924. +static notrace void probe_sched_migrate_task(void *,
  18925. + struct task_struct *task, int cpu);
  18926. +static struct enable_data wakeup_latency_enabled_data = {
  18927. + .latency_type = WAKEUP_LATENCY,
  18928. + .enabled = 0,
  18929. +};
  18930. +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc);
  18931. +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio);
  18932. +static DEFINE_PER_CPU(struct task_struct *, wakeup_task);
  18933. +static DEFINE_PER_CPU(int, wakeup_sharedprio);
  18934. +static unsigned long wakeup_pid;
  18935. +#endif
  18936. +
  18937. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  18938. +static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets);
  18939. +static char *missed_timer_offsets_dir = "missed_timer_offsets";
  18940. +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
  18941. + long long offset, struct task_struct *curr, struct task_struct *task);
  18942. +static struct enable_data missed_timer_offsets_enabled_data = {
  18943. + .latency_type = MISSED_TIMER_OFFSETS,
  18944. + .enabled = 0,
  18945. +};
  18946. +static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc);
  18947. +static unsigned long missed_timer_offsets_pid;
  18948. +#endif
  18949. +
  18950. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  18951. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  18952. +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
  18953. +static char *timerandwakeup_latency_hist_dir = "timerandwakeup";
  18954. +static struct enable_data timerandwakeup_enabled_data = {
  18955. + .latency_type = TIMERANDWAKEUP_LATENCY,
  18956. + .enabled = 0,
  18957. +};
  18958. +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
  18959. +#endif
  18960. +
  18961. +void notrace latency_hist(int latency_type, int cpu, long latency,
  18962. + long timeroffset, cycle_t stop,
  18963. + struct task_struct *p)
  18964. +{
  18965. + struct hist_data *my_hist;
  18966. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  18967. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  18968. + struct maxlatproc_data *mp = NULL;
  18969. +#endif
  18970. +
  18971. + if (!cpu_possible(cpu) || latency_type < 0 ||
  18972. + latency_type >= MAX_LATENCY_TYPE)
  18973. + return;
  18974. +
  18975. + switch (latency_type) {
  18976. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  18977. + case IRQSOFF_LATENCY:
  18978. + my_hist = &per_cpu(irqsoff_hist, cpu);
  18979. + break;
  18980. +#endif
  18981. +#ifdef CONFIG_PREEMPT_OFF_HIST
  18982. + case PREEMPTOFF_LATENCY:
  18983. + my_hist = &per_cpu(preemptoff_hist, cpu);
  18984. + break;
  18985. +#endif
  18986. +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
  18987. + case PREEMPTIRQSOFF_LATENCY:
  18988. + my_hist = &per_cpu(preemptirqsoff_hist, cpu);
  18989. + break;
  18990. +#endif
  18991. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  18992. + case WAKEUP_LATENCY:
  18993. + my_hist = &per_cpu(wakeup_latency_hist, cpu);
  18994. + mp = &per_cpu(wakeup_maxlatproc, cpu);
  18995. + break;
  18996. + case WAKEUP_LATENCY_SHAREDPRIO:
  18997. + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
  18998. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
  18999. + break;
  19000. +#endif
  19001. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19002. + case MISSED_TIMER_OFFSETS:
  19003. + my_hist = &per_cpu(missed_timer_offsets, cpu);
  19004. + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
  19005. + break;
  19006. +#endif
  19007. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  19008. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19009. + case TIMERANDWAKEUP_LATENCY:
  19010. + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu);
  19011. + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
  19012. + break;
  19013. +#endif
  19014. +
  19015. + default:
  19016. + return;
  19017. + }
  19018. +
  19019. + latency += my_hist->offset;
  19020. +
  19021. + if (atomic_read(&my_hist->hist_mode) == 0)
  19022. + return;
  19023. +
  19024. + if (latency < 0 || latency >= MAX_ENTRY_NUM) {
  19025. + if (latency < 0)
  19026. + my_hist->below_hist_bound_samples++;
  19027. + else
  19028. + my_hist->above_hist_bound_samples++;
  19029. + } else
  19030. + my_hist->hist_array[latency]++;
  19031. +
  19032. + if (unlikely(latency > my_hist->max_lat ||
  19033. + my_hist->min_lat == LONG_MAX)) {
  19034. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  19035. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19036. + if (latency_type == WAKEUP_LATENCY ||
  19037. + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
  19038. + latency_type == MISSED_TIMER_OFFSETS ||
  19039. + latency_type == TIMERANDWAKEUP_LATENCY) {
  19040. + strncpy(mp->comm, p->comm, sizeof(mp->comm));
  19041. + strncpy(mp->current_comm, current->comm,
  19042. + sizeof(mp->current_comm));
  19043. + mp->pid = task_pid_nr(p);
  19044. + mp->current_pid = task_pid_nr(current);
  19045. + mp->prio = p->prio;
  19046. + mp->current_prio = current->prio;
  19047. + mp->latency = latency;
  19048. + mp->timeroffset = timeroffset;
  19049. + mp->timestamp = stop;
  19050. + }
  19051. +#endif
  19052. + my_hist->max_lat = latency;
  19053. + }
  19054. + if (unlikely(latency < my_hist->min_lat))
  19055. + my_hist->min_lat = latency;
  19056. + my_hist->total_samples++;
  19057. + my_hist->accumulate_lat += latency;
  19058. +}
  19059. +
  19060. +static void *l_start(struct seq_file *m, loff_t *pos)
  19061. +{
  19062. + loff_t *index_ptr = NULL;
  19063. + loff_t index = *pos;
  19064. + struct hist_data *my_hist = m->private;
  19065. +
  19066. + if (index == 0) {
  19067. + char minstr[32], avgstr[32], maxstr[32];
  19068. +
  19069. + atomic_dec(&my_hist->hist_mode);
  19070. +
  19071. + if (likely(my_hist->total_samples)) {
  19072. + long avg = (long) div64_s64(my_hist->accumulate_lat,
  19073. + my_hist->total_samples);
  19074. + snprintf(minstr, sizeof(minstr), "%ld",
  19075. + my_hist->min_lat - my_hist->offset);
  19076. + snprintf(avgstr, sizeof(avgstr), "%ld",
  19077. + avg - my_hist->offset);
  19078. + snprintf(maxstr, sizeof(maxstr), "%ld",
  19079. + my_hist->max_lat - my_hist->offset);
  19080. + } else {
  19081. + strcpy(minstr, "<undef>");
  19082. + strcpy(avgstr, minstr);
  19083. + strcpy(maxstr, minstr);
  19084. + }
  19085. +
  19086. + seq_printf(m, "#Minimum latency: %s microseconds\n"
  19087. + "#Average latency: %s microseconds\n"
  19088. + "#Maximum latency: %s microseconds\n"
  19089. + "#Total samples: %llu\n"
  19090. + "#There are %llu samples lower than %ld"
  19091. + " microseconds.\n"
  19092. + "#There are %llu samples greater or equal"
  19093. + " than %ld microseconds.\n"
  19094. + "#usecs\t%16s\n",
  19095. + minstr, avgstr, maxstr,
  19096. + my_hist->total_samples,
  19097. + my_hist->below_hist_bound_samples,
  19098. + -my_hist->offset,
  19099. + my_hist->above_hist_bound_samples,
  19100. + MAX_ENTRY_NUM - my_hist->offset,
  19101. + "samples");
  19102. + }
  19103. + if (index < MAX_ENTRY_NUM) {
  19104. + index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL);
  19105. + if (index_ptr)
  19106. + *index_ptr = index;
  19107. + }
  19108. +
  19109. + return index_ptr;
  19110. +}
  19111. +
  19112. +static void *l_next(struct seq_file *m, void *p, loff_t *pos)
  19113. +{
  19114. + loff_t *index_ptr = p;
  19115. + struct hist_data *my_hist = m->private;
  19116. +
  19117. + if (++*pos >= MAX_ENTRY_NUM) {
  19118. + atomic_inc(&my_hist->hist_mode);
  19119. + return NULL;
  19120. + }
  19121. + *index_ptr = *pos;
  19122. + return index_ptr;
  19123. +}
  19124. +
  19125. +static void l_stop(struct seq_file *m, void *p)
  19126. +{
  19127. + kfree(p);
  19128. +}
  19129. +
  19130. +static int l_show(struct seq_file *m, void *p)
  19131. +{
  19132. + int index = *(loff_t *) p;
  19133. + struct hist_data *my_hist = m->private;
  19134. +
  19135. + seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset,
  19136. + my_hist->hist_array[index]);
  19137. + return 0;
  19138. +}
  19139. +
  19140. +static const struct seq_operations latency_hist_seq_op = {
  19141. + .start = l_start,
  19142. + .next = l_next,
  19143. + .stop = l_stop,
  19144. + .show = l_show
  19145. +};
  19146. +
  19147. +static int latency_hist_open(struct inode *inode, struct file *file)
  19148. +{
  19149. + int ret;
  19150. +
  19151. + ret = seq_open(file, &latency_hist_seq_op);
  19152. + if (!ret) {
  19153. + struct seq_file *seq = file->private_data;
  19154. + seq->private = inode->i_private;
  19155. + }
  19156. + return ret;
  19157. +}
  19158. +
  19159. +static const struct file_operations latency_hist_fops = {
  19160. + .open = latency_hist_open,
  19161. + .read = seq_read,
  19162. + .llseek = seq_lseek,
  19163. + .release = seq_release,
  19164. +};
  19165. +
  19166. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  19167. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19168. +static void clear_maxlatprocdata(struct maxlatproc_data *mp)
  19169. +{
  19170. + mp->comm[0] = mp->current_comm[0] = '\0';
  19171. + mp->prio = mp->current_prio = mp->pid = mp->current_pid =
  19172. + mp->latency = mp->timeroffset = -1;
  19173. + mp->timestamp = 0;
  19174. +}
  19175. +#endif
  19176. +
  19177. +static void hist_reset(struct hist_data *hist)
  19178. +{
  19179. + atomic_dec(&hist->hist_mode);
  19180. +
  19181. + memset(hist->hist_array, 0, sizeof(hist->hist_array));
  19182. + hist->below_hist_bound_samples = 0ULL;
  19183. + hist->above_hist_bound_samples = 0ULL;
  19184. + hist->min_lat = LONG_MAX;
  19185. + hist->max_lat = LONG_MIN;
  19186. + hist->total_samples = 0ULL;
  19187. + hist->accumulate_lat = 0LL;
  19188. +
  19189. + atomic_inc(&hist->hist_mode);
  19190. +}
  19191. +
  19192. +static ssize_t
  19193. +latency_hist_reset(struct file *file, const char __user *a,
  19194. + size_t size, loff_t *off)
  19195. +{
  19196. + int cpu;
  19197. + struct hist_data *hist = NULL;
  19198. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  19199. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19200. + struct maxlatproc_data *mp = NULL;
  19201. +#endif
  19202. + off_t latency_type = (off_t) file->private_data;
  19203. +
  19204. + for_each_online_cpu(cpu) {
  19205. +
  19206. + switch (latency_type) {
  19207. +#ifdef CONFIG_PREEMPT_OFF_HIST
  19208. + case PREEMPTOFF_LATENCY:
  19209. + hist = &per_cpu(preemptoff_hist, cpu);
  19210. + break;
  19211. +#endif
  19212. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  19213. + case IRQSOFF_LATENCY:
  19214. + hist = &per_cpu(irqsoff_hist, cpu);
  19215. + break;
  19216. +#endif
  19217. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  19218. + case PREEMPTIRQSOFF_LATENCY:
  19219. + hist = &per_cpu(preemptirqsoff_hist, cpu);
  19220. + break;
  19221. +#endif
  19222. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19223. + case WAKEUP_LATENCY:
  19224. + hist = &per_cpu(wakeup_latency_hist, cpu);
  19225. + mp = &per_cpu(wakeup_maxlatproc, cpu);
  19226. + break;
  19227. + case WAKEUP_LATENCY_SHAREDPRIO:
  19228. + hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
  19229. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
  19230. + break;
  19231. +#endif
  19232. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19233. + case MISSED_TIMER_OFFSETS:
  19234. + hist = &per_cpu(missed_timer_offsets, cpu);
  19235. + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
  19236. + break;
  19237. +#endif
  19238. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  19239. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19240. + case TIMERANDWAKEUP_LATENCY:
  19241. + hist = &per_cpu(timerandwakeup_latency_hist, cpu);
  19242. + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
  19243. + break;
  19244. +#endif
  19245. + }
  19246. +
  19247. + hist_reset(hist);
  19248. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  19249. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19250. + if (latency_type == WAKEUP_LATENCY ||
  19251. + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
  19252. + latency_type == MISSED_TIMER_OFFSETS ||
  19253. + latency_type == TIMERANDWAKEUP_LATENCY)
  19254. + clear_maxlatprocdata(mp);
  19255. +#endif
  19256. + }
  19257. +
  19258. + return size;
  19259. +}
  19260. +
  19261. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  19262. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19263. +static ssize_t
  19264. +show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  19265. +{
  19266. + char buf[64];
  19267. + int r;
  19268. + unsigned long *this_pid = file->private_data;
  19269. +
  19270. + r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid);
  19271. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  19272. +}
  19273. +
  19274. +static ssize_t do_pid(struct file *file, const char __user *ubuf,
  19275. + size_t cnt, loff_t *ppos)
  19276. +{
  19277. + char buf[64];
  19278. + unsigned long pid;
  19279. + unsigned long *this_pid = file->private_data;
  19280. +
  19281. + if (cnt >= sizeof(buf))
  19282. + return -EINVAL;
  19283. +
  19284. + if (copy_from_user(&buf, ubuf, cnt))
  19285. + return -EFAULT;
  19286. +
  19287. + buf[cnt] = '\0';
  19288. +
  19289. + if (kstrtoul(buf, 10, &pid))
  19290. + return -EINVAL;
  19291. +
  19292. + *this_pid = pid;
  19293. +
  19294. + return cnt;
  19295. +}
  19296. +#endif
  19297. +
  19298. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  19299. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19300. +static ssize_t
  19301. +show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  19302. +{
  19303. + int r;
  19304. + struct maxlatproc_data *mp = file->private_data;
  19305. + int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
  19306. + unsigned long long t;
  19307. + unsigned long usecs, secs;
  19308. + char *buf;
  19309. +
  19310. + if (mp->pid == -1 || mp->current_pid == -1) {
  19311. + buf = "(none)\n";
  19312. + return simple_read_from_buffer(ubuf, cnt, ppos, buf,
  19313. + strlen(buf));
  19314. + }
  19315. +
  19316. + buf = kmalloc(strmaxlen, GFP_KERNEL);
  19317. + if (buf == NULL)
  19318. + return -ENOMEM;
  19319. +
  19320. + t = ns2usecs(mp->timestamp);
  19321. + usecs = do_div(t, USEC_PER_SEC);
  19322. + secs = (unsigned long) t;
  19323. + r = snprintf(buf, strmaxlen,
  19324. + "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
  19325. + MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
  19326. + mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
  19327. + secs, usecs);
  19328. + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  19329. + kfree(buf);
  19330. + return r;
  19331. +}
  19332. +#endif
  19333. +
  19334. +static ssize_t
  19335. +show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  19336. +{
  19337. + char buf[64];
  19338. + struct enable_data *ed = file->private_data;
  19339. + int r;
  19340. +
  19341. + r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled);
  19342. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  19343. +}
  19344. +
  19345. +static ssize_t
  19346. +do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
  19347. +{
  19348. + char buf[64];
  19349. + long enable;
  19350. + struct enable_data *ed = file->private_data;
  19351. +
  19352. + if (cnt >= sizeof(buf))
  19353. + return -EINVAL;
  19354. +
  19355. + if (copy_from_user(&buf, ubuf, cnt))
  19356. + return -EFAULT;
  19357. +
  19358. + buf[cnt] = 0;
  19359. +
  19360. + if (kstrtoul(buf, 10, &enable))
  19361. + return -EINVAL;
  19362. +
  19363. + if ((enable && ed->enabled) || (!enable && !ed->enabled))
  19364. + return cnt;
  19365. +
  19366. + if (enable) {
  19367. + int ret;
  19368. +
  19369. + switch (ed->latency_type) {
  19370. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  19371. + case PREEMPTIRQSOFF_LATENCY:
  19372. + ret = register_trace_preemptirqsoff_hist(
  19373. + probe_preemptirqsoff_hist, NULL);
  19374. + if (ret) {
  19375. + pr_info("wakeup trace: Couldn't assign "
  19376. + "probe_preemptirqsoff_hist "
  19377. + "to trace_preemptirqsoff_hist\n");
  19378. + return ret;
  19379. + }
  19380. + break;
  19381. +#endif
  19382. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19383. + case WAKEUP_LATENCY:
  19384. + ret = register_trace_sched_wakeup(
  19385. + probe_wakeup_latency_hist_start, NULL);
  19386. + if (ret) {
  19387. + pr_info("wakeup trace: Couldn't assign "
  19388. + "probe_wakeup_latency_hist_start "
  19389. + "to trace_sched_wakeup\n");
  19390. + return ret;
  19391. + }
  19392. + ret = register_trace_sched_wakeup_new(
  19393. + probe_wakeup_latency_hist_start, NULL);
  19394. + if (ret) {
  19395. + pr_info("wakeup trace: Couldn't assign "
  19396. + "probe_wakeup_latency_hist_start "
  19397. + "to trace_sched_wakeup_new\n");
  19398. + unregister_trace_sched_wakeup(
  19399. + probe_wakeup_latency_hist_start, NULL);
  19400. + return ret;
  19401. + }
  19402. + ret = register_trace_sched_switch(
  19403. + probe_wakeup_latency_hist_stop, NULL);
  19404. + if (ret) {
  19405. + pr_info("wakeup trace: Couldn't assign "
  19406. + "probe_wakeup_latency_hist_stop "
  19407. + "to trace_sched_switch\n");
  19408. + unregister_trace_sched_wakeup(
  19409. + probe_wakeup_latency_hist_start, NULL);
  19410. + unregister_trace_sched_wakeup_new(
  19411. + probe_wakeup_latency_hist_start, NULL);
  19412. + return ret;
  19413. + }
  19414. + ret = register_trace_sched_migrate_task(
  19415. + probe_sched_migrate_task, NULL);
  19416. + if (ret) {
  19417. + pr_info("wakeup trace: Couldn't assign "
  19418. + "probe_sched_migrate_task "
  19419. + "to trace_sched_migrate_task\n");
  19420. + unregister_trace_sched_wakeup(
  19421. + probe_wakeup_latency_hist_start, NULL);
  19422. + unregister_trace_sched_wakeup_new(
  19423. + probe_wakeup_latency_hist_start, NULL);
  19424. + unregister_trace_sched_switch(
  19425. + probe_wakeup_latency_hist_stop, NULL);
  19426. + return ret;
  19427. + }
  19428. + break;
  19429. +#endif
  19430. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19431. + case MISSED_TIMER_OFFSETS:
  19432. + ret = register_trace_hrtimer_interrupt(
  19433. + probe_hrtimer_interrupt, NULL);
  19434. + if (ret) {
  19435. + pr_info("wakeup trace: Couldn't assign "
  19436. + "probe_hrtimer_interrupt "
  19437. + "to trace_hrtimer_interrupt\n");
  19438. + return ret;
  19439. + }
  19440. + break;
  19441. +#endif
  19442. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  19443. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19444. + case TIMERANDWAKEUP_LATENCY:
  19445. + if (!wakeup_latency_enabled_data.enabled ||
  19446. + !missed_timer_offsets_enabled_data.enabled)
  19447. + return -EINVAL;
  19448. + break;
  19449. +#endif
  19450. + default:
  19451. + break;
  19452. + }
  19453. + } else {
  19454. + switch (ed->latency_type) {
  19455. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  19456. + case PREEMPTIRQSOFF_LATENCY:
  19457. + {
  19458. + int cpu;
  19459. +
  19460. + unregister_trace_preemptirqsoff_hist(
  19461. + probe_preemptirqsoff_hist, NULL);
  19462. + for_each_online_cpu(cpu) {
  19463. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  19464. + per_cpu(hist_irqsoff_counting,
  19465. + cpu) = 0;
  19466. +#endif
  19467. +#ifdef CONFIG_PREEMPT_OFF_HIST
  19468. + per_cpu(hist_preemptoff_counting,
  19469. + cpu) = 0;
  19470. +#endif
  19471. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  19472. + per_cpu(hist_preemptirqsoff_counting,
  19473. + cpu) = 0;
  19474. +#endif
  19475. + }
  19476. + }
  19477. + break;
  19478. +#endif
  19479. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19480. + case WAKEUP_LATENCY:
  19481. + {
  19482. + int cpu;
  19483. +
  19484. + unregister_trace_sched_wakeup(
  19485. + probe_wakeup_latency_hist_start, NULL);
  19486. + unregister_trace_sched_wakeup_new(
  19487. + probe_wakeup_latency_hist_start, NULL);
  19488. + unregister_trace_sched_switch(
  19489. + probe_wakeup_latency_hist_stop, NULL);
  19490. + unregister_trace_sched_migrate_task(
  19491. + probe_sched_migrate_task, NULL);
  19492. +
  19493. + for_each_online_cpu(cpu) {
  19494. + per_cpu(wakeup_task, cpu) = NULL;
  19495. + per_cpu(wakeup_sharedprio, cpu) = 0;
  19496. + }
  19497. + }
  19498. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19499. + timerandwakeup_enabled_data.enabled = 0;
  19500. +#endif
  19501. + break;
  19502. +#endif
  19503. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19504. + case MISSED_TIMER_OFFSETS:
  19505. + unregister_trace_hrtimer_interrupt(
  19506. + probe_hrtimer_interrupt, NULL);
  19507. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19508. + timerandwakeup_enabled_data.enabled = 0;
  19509. +#endif
  19510. + break;
  19511. +#endif
  19512. + default:
  19513. + break;
  19514. + }
  19515. + }
  19516. + ed->enabled = enable;
  19517. + return cnt;
  19518. +}
  19519. +
  19520. +static const struct file_operations latency_hist_reset_fops = {
  19521. + .open = tracing_open_generic,
  19522. + .write = latency_hist_reset,
  19523. +};
  19524. +
  19525. +static const struct file_operations enable_fops = {
  19526. + .open = tracing_open_generic,
  19527. + .read = show_enable,
  19528. + .write = do_enable,
  19529. +};
  19530. +
  19531. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  19532. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19533. +static const struct file_operations pid_fops = {
  19534. + .open = tracing_open_generic,
  19535. + .read = show_pid,
  19536. + .write = do_pid,
  19537. +};
  19538. +
  19539. +static const struct file_operations maxlatproc_fops = {
  19540. + .open = tracing_open_generic,
  19541. + .read = show_maxlatproc,
  19542. +};
  19543. +#endif
  19544. +
  19545. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  19546. +static notrace void probe_preemptirqsoff_hist(void *v, int reason,
  19547. + int starthist)
  19548. +{
  19549. + int cpu = raw_smp_processor_id();
  19550. + int time_set = 0;
  19551. +
  19552. + if (starthist) {
  19553. + cycle_t uninitialized_var(start);
  19554. +
  19555. + if (!preempt_count() && !irqs_disabled())
  19556. + return;
  19557. +
  19558. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  19559. + if ((reason == IRQS_OFF || reason == TRACE_START) &&
  19560. + !per_cpu(hist_irqsoff_counting, cpu)) {
  19561. + per_cpu(hist_irqsoff_counting, cpu) = 1;
  19562. + start = ftrace_now(cpu);
  19563. + time_set++;
  19564. + per_cpu(hist_irqsoff_start, cpu) = start;
  19565. + }
  19566. +#endif
  19567. +
  19568. +#ifdef CONFIG_PREEMPT_OFF_HIST
  19569. + if ((reason == PREEMPT_OFF || reason == TRACE_START) &&
  19570. + !per_cpu(hist_preemptoff_counting, cpu)) {
  19571. + per_cpu(hist_preemptoff_counting, cpu) = 1;
  19572. + if (!(time_set++))
  19573. + start = ftrace_now(cpu);
  19574. + per_cpu(hist_preemptoff_start, cpu) = start;
  19575. + }
  19576. +#endif
  19577. +
  19578. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  19579. + if (per_cpu(hist_irqsoff_counting, cpu) &&
  19580. + per_cpu(hist_preemptoff_counting, cpu) &&
  19581. + !per_cpu(hist_preemptirqsoff_counting, cpu)) {
  19582. + per_cpu(hist_preemptirqsoff_counting, cpu) = 1;
  19583. + if (!time_set)
  19584. + start = ftrace_now(cpu);
  19585. + per_cpu(hist_preemptirqsoff_start, cpu) = start;
  19586. + }
  19587. +#endif
  19588. + } else {
  19589. + cycle_t uninitialized_var(stop);
  19590. +
  19591. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  19592. + if ((reason == IRQS_ON || reason == TRACE_STOP) &&
  19593. + per_cpu(hist_irqsoff_counting, cpu)) {
  19594. + cycle_t start = per_cpu(hist_irqsoff_start, cpu);
  19595. +
  19596. + stop = ftrace_now(cpu);
  19597. + time_set++;
  19598. + if (start) {
  19599. + long latency = ((long) (stop - start)) /
  19600. + NSECS_PER_USECS;
  19601. +
  19602. + latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
  19603. + stop, NULL);
  19604. + }
  19605. + per_cpu(hist_irqsoff_counting, cpu) = 0;
  19606. + }
  19607. +#endif
  19608. +
  19609. +#ifdef CONFIG_PREEMPT_OFF_HIST
  19610. + if ((reason == PREEMPT_ON || reason == TRACE_STOP) &&
  19611. + per_cpu(hist_preemptoff_counting, cpu)) {
  19612. + cycle_t start = per_cpu(hist_preemptoff_start, cpu);
  19613. +
  19614. + if (!(time_set++))
  19615. + stop = ftrace_now(cpu);
  19616. + if (start) {
  19617. + long latency = ((long) (stop - start)) /
  19618. + NSECS_PER_USECS;
  19619. +
  19620. + latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
  19621. + 0, stop, NULL);
  19622. + }
  19623. + per_cpu(hist_preemptoff_counting, cpu) = 0;
  19624. + }
  19625. +#endif
  19626. +
  19627. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  19628. + if ((!per_cpu(hist_irqsoff_counting, cpu) ||
  19629. + !per_cpu(hist_preemptoff_counting, cpu)) &&
  19630. + per_cpu(hist_preemptirqsoff_counting, cpu)) {
  19631. + cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu);
  19632. +
  19633. + if (!time_set)
  19634. + stop = ftrace_now(cpu);
  19635. + if (start) {
  19636. + long latency = ((long) (stop - start)) /
  19637. + NSECS_PER_USECS;
  19638. +
  19639. + latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
  19640. + latency, 0, stop, NULL);
  19641. + }
  19642. + per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
  19643. + }
  19644. +#endif
  19645. + }
  19646. +}
  19647. +#endif
  19648. +
  19649. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19650. +static DEFINE_RAW_SPINLOCK(wakeup_lock);
  19651. +static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
  19652. + int cpu)
  19653. +{
  19654. + int old_cpu = task_cpu(task);
  19655. +
  19656. + if (cpu != old_cpu) {
  19657. + unsigned long flags;
  19658. + struct task_struct *cpu_wakeup_task;
  19659. +
  19660. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  19661. +
  19662. + cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
  19663. + if (task == cpu_wakeup_task) {
  19664. + put_task_struct(cpu_wakeup_task);
  19665. + per_cpu(wakeup_task, old_cpu) = NULL;
  19666. + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
  19667. + get_task_struct(cpu_wakeup_task);
  19668. + }
  19669. +
  19670. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  19671. + }
  19672. +}
  19673. +
  19674. +static notrace void probe_wakeup_latency_hist_start(void *v,
  19675. + struct task_struct *p)
  19676. +{
  19677. + unsigned long flags;
  19678. + struct task_struct *curr = current;
  19679. + int cpu = task_cpu(p);
  19680. + struct task_struct *cpu_wakeup_task;
  19681. +
  19682. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  19683. +
  19684. + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
  19685. +
  19686. + if (wakeup_pid) {
  19687. + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
  19688. + p->prio == curr->prio)
  19689. + per_cpu(wakeup_sharedprio, cpu) = 1;
  19690. + if (likely(wakeup_pid != task_pid_nr(p)))
  19691. + goto out;
  19692. + } else {
  19693. + if (likely(!rt_task(p)) ||
  19694. + (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) ||
  19695. + p->prio > curr->prio)
  19696. + goto out;
  19697. + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
  19698. + p->prio == curr->prio)
  19699. + per_cpu(wakeup_sharedprio, cpu) = 1;
  19700. + }
  19701. +
  19702. + if (cpu_wakeup_task)
  19703. + put_task_struct(cpu_wakeup_task);
  19704. + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p;
  19705. + get_task_struct(cpu_wakeup_task);
  19706. + cpu_wakeup_task->preempt_timestamp_hist =
  19707. + ftrace_now(raw_smp_processor_id());
  19708. +out:
  19709. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  19710. +}
  19711. +
  19712. +static notrace void probe_wakeup_latency_hist_stop(void *v,
  19713. + bool preempt, struct task_struct *prev, struct task_struct *next)
  19714. +{
  19715. + unsigned long flags;
  19716. + int cpu = task_cpu(next);
  19717. + long latency;
  19718. + cycle_t stop;
  19719. + struct task_struct *cpu_wakeup_task;
  19720. +
  19721. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  19722. +
  19723. + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
  19724. +
  19725. + if (cpu_wakeup_task == NULL)
  19726. + goto out;
  19727. +
  19728. + /* Already running? */
  19729. + if (unlikely(current == cpu_wakeup_task))
  19730. + goto out_reset;
  19731. +
  19732. + if (next != cpu_wakeup_task) {
  19733. + if (next->prio < cpu_wakeup_task->prio)
  19734. + goto out_reset;
  19735. +
  19736. + if (next->prio == cpu_wakeup_task->prio)
  19737. + per_cpu(wakeup_sharedprio, cpu) = 1;
  19738. +
  19739. + goto out;
  19740. + }
  19741. +
  19742. + if (current->prio == cpu_wakeup_task->prio)
  19743. + per_cpu(wakeup_sharedprio, cpu) = 1;
  19744. +
  19745. + /*
  19746. + * The task we are waiting for is about to be switched to.
  19747. + * Calculate latency and store it in histogram.
  19748. + */
  19749. + stop = ftrace_now(raw_smp_processor_id());
  19750. +
  19751. + latency = ((long) (stop - next->preempt_timestamp_hist)) /
  19752. + NSECS_PER_USECS;
  19753. +
  19754. + if (per_cpu(wakeup_sharedprio, cpu)) {
  19755. + latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
  19756. + next);
  19757. + per_cpu(wakeup_sharedprio, cpu) = 0;
  19758. + } else {
  19759. + latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
  19760. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19761. + if (timerandwakeup_enabled_data.enabled) {
  19762. + latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
  19763. + next->timer_offset + latency, next->timer_offset,
  19764. + stop, next);
  19765. + }
  19766. +#endif
  19767. + }
  19768. +
  19769. +out_reset:
  19770. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19771. + next->timer_offset = 0;
  19772. +#endif
  19773. + put_task_struct(cpu_wakeup_task);
  19774. + per_cpu(wakeup_task, cpu) = NULL;
  19775. +out:
  19776. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  19777. +}
  19778. +#endif
  19779. +
  19780. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19781. +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
  19782. + long long latency_ns, struct task_struct *curr,
  19783. + struct task_struct *task)
  19784. +{
  19785. + if (latency_ns <= 0 && task != NULL && rt_task(task) &&
  19786. + (task->prio < curr->prio ||
  19787. + (task->prio == curr->prio &&
  19788. + !cpumask_test_cpu(cpu, &task->cpus_allowed)))) {
  19789. + long latency;
  19790. + cycle_t now;
  19791. +
  19792. + if (missed_timer_offsets_pid) {
  19793. + if (likely(missed_timer_offsets_pid !=
  19794. + task_pid_nr(task)))
  19795. + return;
  19796. + }
  19797. +
  19798. + now = ftrace_now(cpu);
  19799. + latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
  19800. + latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
  19801. + task);
  19802. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19803. + task->timer_offset = latency;
  19804. +#endif
  19805. + }
  19806. +}
  19807. +#endif
  19808. +
  19809. +static __init int latency_hist_init(void)
  19810. +{
  19811. + struct dentry *latency_hist_root = NULL;
  19812. + struct dentry *dentry;
  19813. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19814. + struct dentry *dentry_sharedprio;
  19815. +#endif
  19816. + struct dentry *entry;
  19817. + struct dentry *enable_root;
  19818. + int i = 0;
  19819. + struct hist_data *my_hist;
  19820. + char name[64];
  19821. + char *cpufmt = "CPU%d";
  19822. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  19823. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19824. + char *cpufmt_maxlatproc = "max_latency-CPU%d";
  19825. + struct maxlatproc_data *mp = NULL;
  19826. +#endif
  19827. +
  19828. + dentry = tracing_init_dentry();
  19829. + latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry);
  19830. + enable_root = debugfs_create_dir("enable", latency_hist_root);
  19831. +
  19832. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  19833. + dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root);
  19834. + for_each_possible_cpu(i) {
  19835. + sprintf(name, cpufmt, i);
  19836. + entry = debugfs_create_file(name, 0444, dentry,
  19837. + &per_cpu(irqsoff_hist, i), &latency_hist_fops);
  19838. + my_hist = &per_cpu(irqsoff_hist, i);
  19839. + atomic_set(&my_hist->hist_mode, 1);
  19840. + my_hist->min_lat = LONG_MAX;
  19841. + }
  19842. + entry = debugfs_create_file("reset", 0644, dentry,
  19843. + (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops);
  19844. +#endif
  19845. +
  19846. +#ifdef CONFIG_PREEMPT_OFF_HIST
  19847. + dentry = debugfs_create_dir(preemptoff_hist_dir,
  19848. + latency_hist_root);
  19849. + for_each_possible_cpu(i) {
  19850. + sprintf(name, cpufmt, i);
  19851. + entry = debugfs_create_file(name, 0444, dentry,
  19852. + &per_cpu(preemptoff_hist, i), &latency_hist_fops);
  19853. + my_hist = &per_cpu(preemptoff_hist, i);
  19854. + atomic_set(&my_hist->hist_mode, 1);
  19855. + my_hist->min_lat = LONG_MAX;
  19856. + }
  19857. + entry = debugfs_create_file("reset", 0644, dentry,
  19858. + (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops);
  19859. +#endif
  19860. +
  19861. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  19862. + dentry = debugfs_create_dir(preemptirqsoff_hist_dir,
  19863. + latency_hist_root);
  19864. + for_each_possible_cpu(i) {
  19865. + sprintf(name, cpufmt, i);
  19866. + entry = debugfs_create_file(name, 0444, dentry,
  19867. + &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops);
  19868. + my_hist = &per_cpu(preemptirqsoff_hist, i);
  19869. + atomic_set(&my_hist->hist_mode, 1);
  19870. + my_hist->min_lat = LONG_MAX;
  19871. + }
  19872. + entry = debugfs_create_file("reset", 0644, dentry,
  19873. + (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops);
  19874. +#endif
  19875. +
  19876. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  19877. + entry = debugfs_create_file("preemptirqsoff", 0644,
  19878. + enable_root, (void *)&preemptirqsoff_enabled_data,
  19879. + &enable_fops);
  19880. +#endif
  19881. +
  19882. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  19883. + dentry = debugfs_create_dir(wakeup_latency_hist_dir,
  19884. + latency_hist_root);
  19885. + dentry_sharedprio = debugfs_create_dir(
  19886. + wakeup_latency_hist_dir_sharedprio, dentry);
  19887. + for_each_possible_cpu(i) {
  19888. + sprintf(name, cpufmt, i);
  19889. +
  19890. + entry = debugfs_create_file(name, 0444, dentry,
  19891. + &per_cpu(wakeup_latency_hist, i),
  19892. + &latency_hist_fops);
  19893. + my_hist = &per_cpu(wakeup_latency_hist, i);
  19894. + atomic_set(&my_hist->hist_mode, 1);
  19895. + my_hist->min_lat = LONG_MAX;
  19896. +
  19897. + entry = debugfs_create_file(name, 0444, dentry_sharedprio,
  19898. + &per_cpu(wakeup_latency_hist_sharedprio, i),
  19899. + &latency_hist_fops);
  19900. + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i);
  19901. + atomic_set(&my_hist->hist_mode, 1);
  19902. + my_hist->min_lat = LONG_MAX;
  19903. +
  19904. + sprintf(name, cpufmt_maxlatproc, i);
  19905. +
  19906. + mp = &per_cpu(wakeup_maxlatproc, i);
  19907. + entry = debugfs_create_file(name, 0444, dentry, mp,
  19908. + &maxlatproc_fops);
  19909. + clear_maxlatprocdata(mp);
  19910. +
  19911. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, i);
  19912. + entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp,
  19913. + &maxlatproc_fops);
  19914. + clear_maxlatprocdata(mp);
  19915. + }
  19916. + entry = debugfs_create_file("pid", 0644, dentry,
  19917. + (void *)&wakeup_pid, &pid_fops);
  19918. + entry = debugfs_create_file("reset", 0644, dentry,
  19919. + (void *)WAKEUP_LATENCY, &latency_hist_reset_fops);
  19920. + entry = debugfs_create_file("reset", 0644, dentry_sharedprio,
  19921. + (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops);
  19922. + entry = debugfs_create_file("wakeup", 0644,
  19923. + enable_root, (void *)&wakeup_latency_enabled_data,
  19924. + &enable_fops);
  19925. +#endif
  19926. +
  19927. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  19928. + dentry = debugfs_create_dir(missed_timer_offsets_dir,
  19929. + latency_hist_root);
  19930. + for_each_possible_cpu(i) {
  19931. + sprintf(name, cpufmt, i);
  19932. + entry = debugfs_create_file(name, 0444, dentry,
  19933. + &per_cpu(missed_timer_offsets, i), &latency_hist_fops);
  19934. + my_hist = &per_cpu(missed_timer_offsets, i);
  19935. + atomic_set(&my_hist->hist_mode, 1);
  19936. + my_hist->min_lat = LONG_MAX;
  19937. +
  19938. + sprintf(name, cpufmt_maxlatproc, i);
  19939. + mp = &per_cpu(missed_timer_offsets_maxlatproc, i);
  19940. + entry = debugfs_create_file(name, 0444, dentry, mp,
  19941. + &maxlatproc_fops);
  19942. + clear_maxlatprocdata(mp);
  19943. + }
  19944. + entry = debugfs_create_file("pid", 0644, dentry,
  19945. + (void *)&missed_timer_offsets_pid, &pid_fops);
  19946. + entry = debugfs_create_file("reset", 0644, dentry,
  19947. + (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops);
  19948. + entry = debugfs_create_file("missed_timer_offsets", 0644,
  19949. + enable_root, (void *)&missed_timer_offsets_enabled_data,
  19950. + &enable_fops);
  19951. +#endif
  19952. +
  19953. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  19954. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  19955. + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
  19956. + latency_hist_root);
  19957. + for_each_possible_cpu(i) {
  19958. + sprintf(name, cpufmt, i);
  19959. + entry = debugfs_create_file(name, 0444, dentry,
  19960. + &per_cpu(timerandwakeup_latency_hist, i),
  19961. + &latency_hist_fops);
  19962. + my_hist = &per_cpu(timerandwakeup_latency_hist, i);
  19963. + atomic_set(&my_hist->hist_mode, 1);
  19964. + my_hist->min_lat = LONG_MAX;
  19965. +
  19966. + sprintf(name, cpufmt_maxlatproc, i);
  19967. + mp = &per_cpu(timerandwakeup_maxlatproc, i);
  19968. + entry = debugfs_create_file(name, 0444, dentry, mp,
  19969. + &maxlatproc_fops);
  19970. + clear_maxlatprocdata(mp);
  19971. + }
  19972. + entry = debugfs_create_file("reset", 0644, dentry,
  19973. + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops);
  19974. + entry = debugfs_create_file("timerandwakeup", 0644,
  19975. + enable_root, (void *)&timerandwakeup_enabled_data,
  19976. + &enable_fops);
  19977. +#endif
  19978. + return 0;
  19979. +}
  19980. +
  19981. +device_initcall(latency_hist_init);
  19982. diff -Nur linux-4.8.15.orig/kernel/trace/Makefile linux-4.8.15/kernel/trace/Makefile
  19983. --- linux-4.8.15.orig/kernel/trace/Makefile 2016-12-15 17:50:48.000000000 +0100
  19984. +++ linux-4.8.15/kernel/trace/Makefile 2017-01-01 17:07:16.051428246 +0100
  19985. @@ -37,6 +37,10 @@
  19986. obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
  19987. obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
  19988. obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
  19989. +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o
  19990. +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o
  19991. +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o
  19992. +obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o
  19993. obj-$(CONFIG_NOP_TRACER) += trace_nop.o
  19994. obj-$(CONFIG_STACK_TRACER) += trace_stack.o
  19995. obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
  19996. diff -Nur linux-4.8.15.orig/kernel/trace/trace.c linux-4.8.15/kernel/trace/trace.c
  19997. --- linux-4.8.15.orig/kernel/trace/trace.c 2016-12-15 17:50:48.000000000 +0100
  19998. +++ linux-4.8.15/kernel/trace/trace.c 2017-01-01 17:07:16.055428503 +0100
  19999. @@ -1897,6 +1897,7 @@
  20000. struct task_struct *tsk = current;
  20001. entry->preempt_count = pc & 0xff;
  20002. + entry->preempt_lazy_count = preempt_lazy_count();
  20003. entry->pid = (tsk) ? tsk->pid : 0;
  20004. entry->flags =
  20005. #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
  20006. @@ -1907,8 +1908,11 @@
  20007. ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
  20008. ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
  20009. ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
  20010. - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
  20011. + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) |
  20012. + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) |
  20013. (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
  20014. +
  20015. + entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
  20016. }
  20017. EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
  20018. @@ -2892,14 +2896,17 @@
  20019. static void print_lat_help_header(struct seq_file *m)
  20020. {
  20021. - seq_puts(m, "# _------=> CPU# \n"
  20022. - "# / _-----=> irqs-off \n"
  20023. - "# | / _----=> need-resched \n"
  20024. - "# || / _---=> hardirq/softirq \n"
  20025. - "# ||| / _--=> preempt-depth \n"
  20026. - "# |||| / delay \n"
  20027. - "# cmd pid ||||| time | caller \n"
  20028. - "# \\ / ||||| \\ | / \n");
  20029. + seq_puts(m, "# _--------=> CPU# \n"
  20030. + "# / _-------=> irqs-off \n"
  20031. + "# | / _------=> need-resched \n"
  20032. + "# || / _-----=> need-resched_lazy \n"
  20033. + "# ||| / _----=> hardirq/softirq \n"
  20034. + "# |||| / _---=> preempt-depth \n"
  20035. + "# ||||| / _--=> preempt-lazy-depth\n"
  20036. + "# |||||| / _-=> migrate-disable \n"
  20037. + "# ||||||| / delay \n"
  20038. + "# cmd pid |||||||| time | caller \n"
  20039. + "# \\ / |||||||| \\ | / \n");
  20040. }
  20041. static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
  20042. @@ -2925,11 +2932,14 @@
  20043. print_event_info(buf, m);
  20044. seq_puts(m, "# _-----=> irqs-off\n"
  20045. "# / _----=> need-resched\n"
  20046. - "# | / _---=> hardirq/softirq\n"
  20047. - "# || / _--=> preempt-depth\n"
  20048. - "# ||| / delay\n"
  20049. - "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
  20050. - "# | | | |||| | |\n");
  20051. + "# |/ _-----=> need-resched_lazy\n"
  20052. + "# || / _---=> hardirq/softirq\n"
  20053. + "# ||| / _--=> preempt-depth\n"
  20054. + "# |||| / _-=> preempt-lazy-depth\n"
  20055. + "# ||||| / _-=> migrate-disable \n"
  20056. + "# |||||| / delay\n"
  20057. + "# TASK-PID CPU# ||||||| TIMESTAMP FUNCTION\n"
  20058. + "# | | | ||||||| | |\n");
  20059. }
  20060. void
  20061. diff -Nur linux-4.8.15.orig/kernel/trace/trace_events.c linux-4.8.15/kernel/trace/trace_events.c
  20062. --- linux-4.8.15.orig/kernel/trace/trace_events.c 2016-12-15 17:50:48.000000000 +0100
  20063. +++ linux-4.8.15/kernel/trace/trace_events.c 2017-01-01 17:07:16.059428767 +0100
  20064. @@ -187,6 +187,8 @@
  20065. __common_field(unsigned char, flags);
  20066. __common_field(unsigned char, preempt_count);
  20067. __common_field(int, pid);
  20068. + __common_field(unsigned short, migrate_disable);
  20069. + __common_field(unsigned short, padding);
  20070. return ret;
  20071. }
  20072. diff -Nur linux-4.8.15.orig/kernel/trace/trace.h linux-4.8.15/kernel/trace/trace.h
  20073. --- linux-4.8.15.orig/kernel/trace/trace.h 2016-12-15 17:50:48.000000000 +0100
  20074. +++ linux-4.8.15/kernel/trace/trace.h 2017-01-01 17:07:16.055428503 +0100
  20075. @@ -123,6 +123,7 @@
  20076. * NEED_RESCHED - reschedule is requested
  20077. * HARDIRQ - inside an interrupt handler
  20078. * SOFTIRQ - inside a softirq handler
  20079. + * NEED_RESCHED_LAZY - lazy reschedule is requested
  20080. */
  20081. enum trace_flag_type {
  20082. TRACE_FLAG_IRQS_OFF = 0x01,
  20083. @@ -132,6 +133,7 @@
  20084. TRACE_FLAG_SOFTIRQ = 0x10,
  20085. TRACE_FLAG_PREEMPT_RESCHED = 0x20,
  20086. TRACE_FLAG_NMI = 0x40,
  20087. + TRACE_FLAG_NEED_RESCHED_LAZY = 0x80,
  20088. };
  20089. #define TRACE_BUF_SIZE 1024
  20090. diff -Nur linux-4.8.15.orig/kernel/trace/trace_irqsoff.c linux-4.8.15/kernel/trace/trace_irqsoff.c
  20091. --- linux-4.8.15.orig/kernel/trace/trace_irqsoff.c 2016-12-15 17:50:48.000000000 +0100
  20092. +++ linux-4.8.15/kernel/trace/trace_irqsoff.c 2017-01-01 17:07:16.059428767 +0100
  20093. @@ -13,6 +13,7 @@
  20094. #include <linux/uaccess.h>
  20095. #include <linux/module.h>
  20096. #include <linux/ftrace.h>
  20097. +#include <trace/events/hist.h>
  20098. #include "trace.h"
  20099. @@ -424,11 +425,13 @@
  20100. {
  20101. if (preempt_trace() || irq_trace())
  20102. start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  20103. + trace_preemptirqsoff_hist_rcuidle(TRACE_START, 1);
  20104. }
  20105. EXPORT_SYMBOL_GPL(start_critical_timings);
  20106. void stop_critical_timings(void)
  20107. {
  20108. + trace_preemptirqsoff_hist_rcuidle(TRACE_STOP, 0);
  20109. if (preempt_trace() || irq_trace())
  20110. stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  20111. }
  20112. @@ -438,6 +441,7 @@
  20113. #ifdef CONFIG_PROVE_LOCKING
  20114. void time_hardirqs_on(unsigned long a0, unsigned long a1)
  20115. {
  20116. + trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0);
  20117. if (!preempt_trace() && irq_trace())
  20118. stop_critical_timing(a0, a1);
  20119. }
  20120. @@ -446,6 +450,7 @@
  20121. {
  20122. if (!preempt_trace() && irq_trace())
  20123. start_critical_timing(a0, a1);
  20124. + trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1);
  20125. }
  20126. #else /* !CONFIG_PROVE_LOCKING */
  20127. @@ -471,6 +476,7 @@
  20128. */
  20129. void trace_hardirqs_on(void)
  20130. {
  20131. + trace_preemptirqsoff_hist(IRQS_ON, 0);
  20132. if (!preempt_trace() && irq_trace())
  20133. stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  20134. }
  20135. @@ -480,11 +486,13 @@
  20136. {
  20137. if (!preempt_trace() && irq_trace())
  20138. start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  20139. + trace_preemptirqsoff_hist(IRQS_OFF, 1);
  20140. }
  20141. EXPORT_SYMBOL(trace_hardirqs_off);
  20142. __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
  20143. {
  20144. + trace_preemptirqsoff_hist(IRQS_ON, 0);
  20145. if (!preempt_trace() && irq_trace())
  20146. stop_critical_timing(CALLER_ADDR0, caller_addr);
  20147. }
  20148. @@ -494,6 +502,7 @@
  20149. {
  20150. if (!preempt_trace() && irq_trace())
  20151. start_critical_timing(CALLER_ADDR0, caller_addr);
  20152. + trace_preemptirqsoff_hist(IRQS_OFF, 1);
  20153. }
  20154. EXPORT_SYMBOL(trace_hardirqs_off_caller);
  20155. @@ -503,12 +512,14 @@
  20156. #ifdef CONFIG_PREEMPT_TRACER
  20157. void trace_preempt_on(unsigned long a0, unsigned long a1)
  20158. {
  20159. + trace_preemptirqsoff_hist(PREEMPT_ON, 0);
  20160. if (preempt_trace() && !irq_trace())
  20161. stop_critical_timing(a0, a1);
  20162. }
  20163. void trace_preempt_off(unsigned long a0, unsigned long a1)
  20164. {
  20165. + trace_preemptirqsoff_hist(PREEMPT_ON, 1);
  20166. if (preempt_trace() && !irq_trace())
  20167. start_critical_timing(a0, a1);
  20168. }
  20169. diff -Nur linux-4.8.15.orig/kernel/trace/trace_output.c linux-4.8.15/kernel/trace/trace_output.c
  20170. --- linux-4.8.15.orig/kernel/trace/trace_output.c 2016-12-15 17:50:48.000000000 +0100
  20171. +++ linux-4.8.15/kernel/trace/trace_output.c 2017-01-01 17:07:16.059428767 +0100
  20172. @@ -386,6 +386,7 @@
  20173. {
  20174. char hardsoft_irq;
  20175. char need_resched;
  20176. + char need_resched_lazy;
  20177. char irqs_off;
  20178. int hardirq;
  20179. int softirq;
  20180. @@ -416,6 +417,9 @@
  20181. break;
  20182. }
  20183. + need_resched_lazy =
  20184. + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
  20185. +
  20186. hardsoft_irq =
  20187. (nmi && hardirq) ? 'Z' :
  20188. nmi ? 'z' :
  20189. @@ -424,14 +428,25 @@
  20190. softirq ? 's' :
  20191. '.' ;
  20192. - trace_seq_printf(s, "%c%c%c",
  20193. - irqs_off, need_resched, hardsoft_irq);
  20194. + trace_seq_printf(s, "%c%c%c%c",
  20195. + irqs_off, need_resched, need_resched_lazy,
  20196. + hardsoft_irq);
  20197. if (entry->preempt_count)
  20198. trace_seq_printf(s, "%x", entry->preempt_count);
  20199. else
  20200. trace_seq_putc(s, '.');
  20201. + if (entry->preempt_lazy_count)
  20202. + trace_seq_printf(s, "%x", entry->preempt_lazy_count);
  20203. + else
  20204. + trace_seq_putc(s, '.');
  20205. +
  20206. + if (entry->migrate_disable)
  20207. + trace_seq_printf(s, "%x", entry->migrate_disable);
  20208. + else
  20209. + trace_seq_putc(s, '.');
  20210. +
  20211. return !trace_seq_has_overflowed(s);
  20212. }
  20213. diff -Nur linux-4.8.15.orig/kernel/user.c linux-4.8.15/kernel/user.c
  20214. --- linux-4.8.15.orig/kernel/user.c 2016-12-15 17:50:48.000000000 +0100
  20215. +++ linux-4.8.15/kernel/user.c 2017-01-01 17:07:16.059428767 +0100
  20216. @@ -161,11 +161,11 @@
  20217. if (!up)
  20218. return;
  20219. - local_irq_save(flags);
  20220. + local_irq_save_nort(flags);
  20221. if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
  20222. free_user(up, flags);
  20223. else
  20224. - local_irq_restore(flags);
  20225. + local_irq_restore_nort(flags);
  20226. }
  20227. struct user_struct *alloc_uid(kuid_t uid)
  20228. diff -Nur linux-4.8.15.orig/kernel/watchdog.c linux-4.8.15/kernel/watchdog.c
  20229. --- linux-4.8.15.orig/kernel/watchdog.c 2016-12-15 17:50:48.000000000 +0100
  20230. +++ linux-4.8.15/kernel/watchdog.c 2017-01-01 17:07:16.059428767 +0100
  20231. @@ -315,6 +315,8 @@
  20232. #ifdef CONFIG_HARDLOCKUP_DETECTOR
  20233. +static DEFINE_RAW_SPINLOCK(watchdog_output_lock);
  20234. +
  20235. static struct perf_event_attr wd_hw_attr = {
  20236. .type = PERF_TYPE_HARDWARE,
  20237. .config = PERF_COUNT_HW_CPU_CYCLES,
  20238. @@ -349,6 +351,13 @@
  20239. /* only print hardlockups once */
  20240. if (__this_cpu_read(hard_watchdog_warn) == true)
  20241. return;
  20242. + /*
  20243. + * If early-printk is enabled then make sure we do not
  20244. + * lock up in printk() and kill console logging:
  20245. + */
  20246. + printk_kill();
  20247. +
  20248. + raw_spin_lock(&watchdog_output_lock);
  20249. pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
  20250. print_modules();
  20251. @@ -366,6 +375,7 @@
  20252. !test_and_set_bit(0, &hardlockup_allcpu_dumped))
  20253. trigger_allbutself_cpu_backtrace();
  20254. + raw_spin_unlock(&watchdog_output_lock);
  20255. if (hardlockup_panic)
  20256. nmi_panic(regs, "Hard LOCKUP");
  20257. @@ -513,6 +523,7 @@
  20258. /* kick off the timer for the hardlockup detector */
  20259. hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  20260. hrtimer->function = watchdog_timer_fn;
  20261. + hrtimer->irqsafe = 1;
  20262. /* Enable the perf event */
  20263. watchdog_nmi_enable(cpu);
  20264. diff -Nur linux-4.8.15.orig/kernel/workqueue.c linux-4.8.15/kernel/workqueue.c
  20265. --- linux-4.8.15.orig/kernel/workqueue.c 2016-12-15 17:50:48.000000000 +0100
  20266. +++ linux-4.8.15/kernel/workqueue.c 2017-01-01 17:07:16.063429015 +0100
  20267. @@ -48,6 +48,8 @@
  20268. #include <linux/nodemask.h>
  20269. #include <linux/moduleparam.h>
  20270. #include <linux/uaccess.h>
  20271. +#include <linux/locallock.h>
  20272. +#include <linux/delay.h>
  20273. #include "workqueue_internal.h"
  20274. @@ -121,11 +123,16 @@
  20275. * cpu or grabbing pool->lock is enough for read access. If
  20276. * POOL_DISASSOCIATED is set, it's identical to L.
  20277. *
  20278. + * On RT we need the extra protection via rt_lock_idle_list() for
  20279. + * the list manipulations against read access from
  20280. + * wq_worker_sleeping(). All other places are nicely serialized via
  20281. + * pool->lock.
  20282. + *
  20283. * A: pool->attach_mutex protected.
  20284. *
  20285. * PL: wq_pool_mutex protected.
  20286. *
  20287. - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
  20288. + * PR: wq_pool_mutex protected for writes. RCU protected for reads.
  20289. *
  20290. * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
  20291. *
  20292. @@ -134,7 +141,7 @@
  20293. *
  20294. * WQ: wq->mutex protected.
  20295. *
  20296. - * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
  20297. + * WR: wq->mutex protected for writes. RCU protected for reads.
  20298. *
  20299. * MD: wq_mayday_lock protected.
  20300. */
  20301. @@ -185,7 +192,7 @@
  20302. atomic_t nr_running ____cacheline_aligned_in_smp;
  20303. /*
  20304. - * Destruction of pool is sched-RCU protected to allow dereferences
  20305. + * Destruction of pool is RCU protected to allow dereferences
  20306. * from get_work_pool().
  20307. */
  20308. struct rcu_head rcu;
  20309. @@ -214,7 +221,7 @@
  20310. /*
  20311. * Release of unbound pwq is punted to system_wq. See put_pwq()
  20312. * and pwq_unbound_release_workfn() for details. pool_workqueue
  20313. - * itself is also sched-RCU protected so that the first pwq can be
  20314. + * itself is also RCU protected so that the first pwq can be
  20315. * determined without grabbing wq->mutex.
  20316. */
  20317. struct work_struct unbound_release_work;
  20318. @@ -348,6 +355,8 @@
  20319. struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
  20320. EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
  20321. +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock);
  20322. +
  20323. static int worker_thread(void *__worker);
  20324. static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  20325. @@ -355,20 +364,20 @@
  20326. #include <trace/events/workqueue.h>
  20327. #define assert_rcu_or_pool_mutex() \
  20328. - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
  20329. + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
  20330. !lockdep_is_held(&wq_pool_mutex), \
  20331. - "sched RCU or wq_pool_mutex should be held")
  20332. + "RCU or wq_pool_mutex should be held")
  20333. #define assert_rcu_or_wq_mutex(wq) \
  20334. - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
  20335. + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
  20336. !lockdep_is_held(&wq->mutex), \
  20337. - "sched RCU or wq->mutex should be held")
  20338. + "RCU or wq->mutex should be held")
  20339. #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
  20340. - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
  20341. + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
  20342. !lockdep_is_held(&wq->mutex) && \
  20343. !lockdep_is_held(&wq_pool_mutex), \
  20344. - "sched RCU, wq->mutex or wq_pool_mutex should be held")
  20345. + "RCU, wq->mutex or wq_pool_mutex should be held")
  20346. #define for_each_cpu_worker_pool(pool, cpu) \
  20347. for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
  20348. @@ -380,7 +389,7 @@
  20349. * @pool: iteration cursor
  20350. * @pi: integer used for iteration
  20351. *
  20352. - * This must be called either with wq_pool_mutex held or sched RCU read
  20353. + * This must be called either with wq_pool_mutex held or RCU read
  20354. * locked. If the pool needs to be used beyond the locking in effect, the
  20355. * caller is responsible for guaranteeing that the pool stays online.
  20356. *
  20357. @@ -412,7 +421,7 @@
  20358. * @pwq: iteration cursor
  20359. * @wq: the target workqueue
  20360. *
  20361. - * This must be called either with wq->mutex held or sched RCU read locked.
  20362. + * This must be called either with wq->mutex held or RCU read locked.
  20363. * If the pwq needs to be used beyond the locking in effect, the caller is
  20364. * responsible for guaranteeing that the pwq stays online.
  20365. *
  20366. @@ -424,6 +433,31 @@
  20367. if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
  20368. else
  20369. +#ifdef CONFIG_PREEMPT_RT_BASE
  20370. +static inline void rt_lock_idle_list(struct worker_pool *pool)
  20371. +{
  20372. + preempt_disable();
  20373. +}
  20374. +static inline void rt_unlock_idle_list(struct worker_pool *pool)
  20375. +{
  20376. + preempt_enable();
  20377. +}
  20378. +static inline void sched_lock_idle_list(struct worker_pool *pool) { }
  20379. +static inline void sched_unlock_idle_list(struct worker_pool *pool) { }
  20380. +#else
  20381. +static inline void rt_lock_idle_list(struct worker_pool *pool) { }
  20382. +static inline void rt_unlock_idle_list(struct worker_pool *pool) { }
  20383. +static inline void sched_lock_idle_list(struct worker_pool *pool)
  20384. +{
  20385. + spin_lock_irq(&pool->lock);
  20386. +}
  20387. +static inline void sched_unlock_idle_list(struct worker_pool *pool)
  20388. +{
  20389. + spin_unlock_irq(&pool->lock);
  20390. +}
  20391. +#endif
  20392. +
  20393. +
  20394. #ifdef CONFIG_DEBUG_OBJECTS_WORK
  20395. static struct debug_obj_descr work_debug_descr;
  20396. @@ -548,7 +582,7 @@
  20397. * @wq: the target workqueue
  20398. * @node: the node ID
  20399. *
  20400. - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
  20401. + * This must be called with any of wq_pool_mutex, wq->mutex or RCU
  20402. * read locked.
  20403. * If the pwq needs to be used beyond the locking in effect, the caller is
  20404. * responsible for guaranteeing that the pwq stays online.
  20405. @@ -692,8 +726,8 @@
  20406. * @work: the work item of interest
  20407. *
  20408. * Pools are created and destroyed under wq_pool_mutex, and allows read
  20409. - * access under sched-RCU read lock. As such, this function should be
  20410. - * called under wq_pool_mutex or with preemption disabled.
  20411. + * access under RCU read lock. As such, this function should be
  20412. + * called under wq_pool_mutex or inside of a rcu_read_lock() region.
  20413. *
  20414. * All fields of the returned pool are accessible as long as the above
  20415. * mentioned locking is in effect. If the returned pool needs to be used
  20416. @@ -830,50 +864,45 @@
  20417. */
  20418. static void wake_up_worker(struct worker_pool *pool)
  20419. {
  20420. - struct worker *worker = first_idle_worker(pool);
  20421. + struct worker *worker;
  20422. +
  20423. + rt_lock_idle_list(pool);
  20424. +
  20425. + worker = first_idle_worker(pool);
  20426. if (likely(worker))
  20427. wake_up_process(worker->task);
  20428. +
  20429. + rt_unlock_idle_list(pool);
  20430. }
  20431. /**
  20432. - * wq_worker_waking_up - a worker is waking up
  20433. + * wq_worker_running - a worker is running again
  20434. * @task: task waking up
  20435. - * @cpu: CPU @task is waking up to
  20436. - *
  20437. - * This function is called during try_to_wake_up() when a worker is
  20438. - * being awoken.
  20439. *
  20440. - * CONTEXT:
  20441. - * spin_lock_irq(rq->lock)
  20442. + * This function is called when a worker returns from schedule()
  20443. */
  20444. -void wq_worker_waking_up(struct task_struct *task, int cpu)
  20445. +void wq_worker_running(struct task_struct *task)
  20446. {
  20447. struct worker *worker = kthread_data(task);
  20448. - if (!(worker->flags & WORKER_NOT_RUNNING)) {
  20449. - WARN_ON_ONCE(worker->pool->cpu != cpu);
  20450. + if (!worker->sleeping)
  20451. + return;
  20452. + if (!(worker->flags & WORKER_NOT_RUNNING))
  20453. atomic_inc(&worker->pool->nr_running);
  20454. - }
  20455. + worker->sleeping = 0;
  20456. }
  20457. /**
  20458. * wq_worker_sleeping - a worker is going to sleep
  20459. * @task: task going to sleep
  20460. *
  20461. - * This function is called during schedule() when a busy worker is
  20462. - * going to sleep. Worker on the same cpu can be woken up by
  20463. - * returning pointer to its task.
  20464. - *
  20465. - * CONTEXT:
  20466. - * spin_lock_irq(rq->lock)
  20467. - *
  20468. - * Return:
  20469. - * Worker task on @cpu to wake up, %NULL if none.
  20470. + * This function is called from schedule() when a busy worker is
  20471. + * going to sleep.
  20472. */
  20473. -struct task_struct *wq_worker_sleeping(struct task_struct *task)
  20474. +void wq_worker_sleeping(struct task_struct *task)
  20475. {
  20476. - struct worker *worker = kthread_data(task), *to_wakeup = NULL;
  20477. + struct worker *worker = kthread_data(task);
  20478. struct worker_pool *pool;
  20479. /*
  20480. @@ -882,29 +911,26 @@
  20481. * checking NOT_RUNNING.
  20482. */
  20483. if (worker->flags & WORKER_NOT_RUNNING)
  20484. - return NULL;
  20485. + return;
  20486. pool = worker->pool;
  20487. - /* this can only happen on the local cpu */
  20488. - if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
  20489. - return NULL;
  20490. + if (WARN_ON_ONCE(worker->sleeping))
  20491. + return;
  20492. +
  20493. + worker->sleeping = 1;
  20494. /*
  20495. * The counterpart of the following dec_and_test, implied mb,
  20496. * worklist not empty test sequence is in insert_work().
  20497. * Please read comment there.
  20498. - *
  20499. - * NOT_RUNNING is clear. This means that we're bound to and
  20500. - * running on the local cpu w/ rq lock held and preemption
  20501. - * disabled, which in turn means that none else could be
  20502. - * manipulating idle_list, so dereferencing idle_list without pool
  20503. - * lock is safe.
  20504. */
  20505. if (atomic_dec_and_test(&pool->nr_running) &&
  20506. - !list_empty(&pool->worklist))
  20507. - to_wakeup = first_idle_worker(pool);
  20508. - return to_wakeup ? to_wakeup->task : NULL;
  20509. + !list_empty(&pool->worklist)) {
  20510. + sched_lock_idle_list(pool);
  20511. + wake_up_worker(pool);
  20512. + sched_unlock_idle_list(pool);
  20513. + }
  20514. }
  20515. /**
  20516. @@ -1098,12 +1124,14 @@
  20517. {
  20518. if (pwq) {
  20519. /*
  20520. - * As both pwqs and pools are sched-RCU protected, the
  20521. + * As both pwqs and pools are RCU protected, the
  20522. * following lock operations are safe.
  20523. */
  20524. - spin_lock_irq(&pwq->pool->lock);
  20525. + rcu_read_lock();
  20526. + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock);
  20527. put_pwq(pwq);
  20528. - spin_unlock_irq(&pwq->pool->lock);
  20529. + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock);
  20530. + rcu_read_unlock();
  20531. }
  20532. }
  20533. @@ -1207,7 +1235,7 @@
  20534. struct worker_pool *pool;
  20535. struct pool_workqueue *pwq;
  20536. - local_irq_save(*flags);
  20537. + local_lock_irqsave(pendingb_lock, *flags);
  20538. /* try to steal the timer if it exists */
  20539. if (is_dwork) {
  20540. @@ -1226,6 +1254,7 @@
  20541. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
  20542. return 0;
  20543. + rcu_read_lock();
  20544. /*
  20545. * The queueing is in progress, or it is already queued. Try to
  20546. * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
  20547. @@ -1264,14 +1293,16 @@
  20548. set_work_pool_and_keep_pending(work, pool->id);
  20549. spin_unlock(&pool->lock);
  20550. + rcu_read_unlock();
  20551. return 1;
  20552. }
  20553. spin_unlock(&pool->lock);
  20554. fail:
  20555. - local_irq_restore(*flags);
  20556. + rcu_read_unlock();
  20557. + local_unlock_irqrestore(pendingb_lock, *flags);
  20558. if (work_is_canceling(work))
  20559. return -ENOENT;
  20560. - cpu_relax();
  20561. + cpu_chill();
  20562. return -EAGAIN;
  20563. }
  20564. @@ -1373,7 +1404,7 @@
  20565. * queued or lose PENDING. Grabbing PENDING and queueing should
  20566. * happen with IRQ disabled.
  20567. */
  20568. - WARN_ON_ONCE(!irqs_disabled());
  20569. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  20570. debug_work_activate(work);
  20571. @@ -1381,6 +1412,7 @@
  20572. if (unlikely(wq->flags & __WQ_DRAINING) &&
  20573. WARN_ON_ONCE(!is_chained_work(wq)))
  20574. return;
  20575. + rcu_read_lock();
  20576. retry:
  20577. if (req_cpu == WORK_CPU_UNBOUND)
  20578. cpu = wq_select_unbound_cpu(raw_smp_processor_id());
  20579. @@ -1437,10 +1469,8 @@
  20580. /* pwq determined, queue */
  20581. trace_workqueue_queue_work(req_cpu, pwq, work);
  20582. - if (WARN_ON(!list_empty(&work->entry))) {
  20583. - spin_unlock(&pwq->pool->lock);
  20584. - return;
  20585. - }
  20586. + if (WARN_ON(!list_empty(&work->entry)))
  20587. + goto out;
  20588. pwq->nr_in_flight[pwq->work_color]++;
  20589. work_flags = work_color_to_flags(pwq->work_color);
  20590. @@ -1458,7 +1488,9 @@
  20591. insert_work(pwq, work, worklist, work_flags);
  20592. +out:
  20593. spin_unlock(&pwq->pool->lock);
  20594. + rcu_read_unlock();
  20595. }
  20596. /**
  20597. @@ -1478,14 +1510,14 @@
  20598. bool ret = false;
  20599. unsigned long flags;
  20600. - local_irq_save(flags);
  20601. + local_lock_irqsave(pendingb_lock,flags);
  20602. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
  20603. __queue_work(cpu, wq, work);
  20604. ret = true;
  20605. }
  20606. - local_irq_restore(flags);
  20607. + local_unlock_irqrestore(pendingb_lock, flags);
  20608. return ret;
  20609. }
  20610. EXPORT_SYMBOL(queue_work_on);
  20611. @@ -1552,14 +1584,14 @@
  20612. unsigned long flags;
  20613. /* read the comment in __queue_work() */
  20614. - local_irq_save(flags);
  20615. + local_lock_irqsave(pendingb_lock, flags);
  20616. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
  20617. __queue_delayed_work(cpu, wq, dwork, delay);
  20618. ret = true;
  20619. }
  20620. - local_irq_restore(flags);
  20621. + local_unlock_irqrestore(pendingb_lock, flags);
  20622. return ret;
  20623. }
  20624. EXPORT_SYMBOL(queue_delayed_work_on);
  20625. @@ -1594,7 +1626,7 @@
  20626. if (likely(ret >= 0)) {
  20627. __queue_delayed_work(cpu, wq, dwork, delay);
  20628. - local_irq_restore(flags);
  20629. + local_unlock_irqrestore(pendingb_lock, flags);
  20630. }
  20631. /* -ENOENT from try_to_grab_pending() becomes %true */
  20632. @@ -1627,7 +1659,9 @@
  20633. worker->last_active = jiffies;
  20634. /* idle_list is LIFO */
  20635. + rt_lock_idle_list(pool);
  20636. list_add(&worker->entry, &pool->idle_list);
  20637. + rt_unlock_idle_list(pool);
  20638. if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
  20639. mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
  20640. @@ -1660,7 +1694,9 @@
  20641. return;
  20642. worker_clr_flags(worker, WORKER_IDLE);
  20643. pool->nr_idle--;
  20644. + rt_lock_idle_list(pool);
  20645. list_del_init(&worker->entry);
  20646. + rt_unlock_idle_list(pool);
  20647. }
  20648. static struct worker *alloc_worker(int node)
  20649. @@ -1826,7 +1862,9 @@
  20650. pool->nr_workers--;
  20651. pool->nr_idle--;
  20652. + rt_lock_idle_list(pool);
  20653. list_del_init(&worker->entry);
  20654. + rt_unlock_idle_list(pool);
  20655. worker->flags |= WORKER_DIE;
  20656. wake_up_process(worker->task);
  20657. }
  20658. @@ -2785,14 +2823,14 @@
  20659. might_sleep();
  20660. - local_irq_disable();
  20661. + rcu_read_lock();
  20662. pool = get_work_pool(work);
  20663. if (!pool) {
  20664. - local_irq_enable();
  20665. + rcu_read_unlock();
  20666. return false;
  20667. }
  20668. - spin_lock(&pool->lock);
  20669. + spin_lock_irq(&pool->lock);
  20670. /* see the comment in try_to_grab_pending() with the same code */
  20671. pwq = get_work_pwq(work);
  20672. if (pwq) {
  20673. @@ -2821,10 +2859,11 @@
  20674. else
  20675. lock_map_acquire_read(&pwq->wq->lockdep_map);
  20676. lock_map_release(&pwq->wq->lockdep_map);
  20677. -
  20678. + rcu_read_unlock();
  20679. return true;
  20680. already_gone:
  20681. spin_unlock_irq(&pool->lock);
  20682. + rcu_read_unlock();
  20683. return false;
  20684. }
  20685. @@ -2911,7 +2950,7 @@
  20686. /* tell other tasks trying to grab @work to back off */
  20687. mark_work_canceling(work);
  20688. - local_irq_restore(flags);
  20689. + local_unlock_irqrestore(pendingb_lock, flags);
  20690. flush_work(work);
  20691. clear_work_data(work);
  20692. @@ -2966,10 +3005,10 @@
  20693. */
  20694. bool flush_delayed_work(struct delayed_work *dwork)
  20695. {
  20696. - local_irq_disable();
  20697. + local_lock_irq(pendingb_lock);
  20698. if (del_timer_sync(&dwork->timer))
  20699. __queue_work(dwork->cpu, dwork->wq, &dwork->work);
  20700. - local_irq_enable();
  20701. + local_unlock_irq(pendingb_lock);
  20702. return flush_work(&dwork->work);
  20703. }
  20704. EXPORT_SYMBOL(flush_delayed_work);
  20705. @@ -3004,7 +3043,7 @@
  20706. set_work_pool_and_clear_pending(&dwork->work,
  20707. get_work_pool_id(&dwork->work));
  20708. - local_irq_restore(flags);
  20709. + local_unlock_irqrestore(pendingb_lock, flags);
  20710. return ret;
  20711. }
  20712. EXPORT_SYMBOL(cancel_delayed_work);
  20713. @@ -3233,7 +3272,7 @@
  20714. * put_unbound_pool - put a worker_pool
  20715. * @pool: worker_pool to put
  20716. *
  20717. - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
  20718. + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
  20719. * safe manner. get_unbound_pool() calls this function on its failure path
  20720. * and this function should be able to release pools which went through,
  20721. * successfully or not, init_worker_pool().
  20722. @@ -3287,8 +3326,8 @@
  20723. del_timer_sync(&pool->idle_timer);
  20724. del_timer_sync(&pool->mayday_timer);
  20725. - /* sched-RCU protected to allow dereferences from get_work_pool() */
  20726. - call_rcu_sched(&pool->rcu, rcu_free_pool);
  20727. + /* RCU protected to allow dereferences from get_work_pool() */
  20728. + call_rcu(&pool->rcu, rcu_free_pool);
  20729. }
  20730. /**
  20731. @@ -3395,14 +3434,14 @@
  20732. put_unbound_pool(pool);
  20733. mutex_unlock(&wq_pool_mutex);
  20734. - call_rcu_sched(&pwq->rcu, rcu_free_pwq);
  20735. + call_rcu(&pwq->rcu, rcu_free_pwq);
  20736. /*
  20737. * If we're the last pwq going away, @wq is already dead and no one
  20738. * is gonna access it anymore. Schedule RCU free.
  20739. */
  20740. if (is_last)
  20741. - call_rcu_sched(&wq->rcu, rcu_free_wq);
  20742. + call_rcu(&wq->rcu, rcu_free_wq);
  20743. }
  20744. /**
  20745. @@ -4052,7 +4091,7 @@
  20746. * The base ref is never dropped on per-cpu pwqs. Directly
  20747. * schedule RCU free.
  20748. */
  20749. - call_rcu_sched(&wq->rcu, rcu_free_wq);
  20750. + call_rcu(&wq->rcu, rcu_free_wq);
  20751. } else {
  20752. /*
  20753. * We're the sole accessor of @wq at this point. Directly
  20754. @@ -4145,7 +4184,8 @@
  20755. struct pool_workqueue *pwq;
  20756. bool ret;
  20757. - rcu_read_lock_sched();
  20758. + rcu_read_lock();
  20759. + preempt_disable();
  20760. if (cpu == WORK_CPU_UNBOUND)
  20761. cpu = smp_processor_id();
  20762. @@ -4156,7 +4196,8 @@
  20763. pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
  20764. ret = !list_empty(&pwq->delayed_works);
  20765. - rcu_read_unlock_sched();
  20766. + preempt_enable();
  20767. + rcu_read_unlock();
  20768. return ret;
  20769. }
  20770. @@ -4182,15 +4223,15 @@
  20771. if (work_pending(work))
  20772. ret |= WORK_BUSY_PENDING;
  20773. - local_irq_save(flags);
  20774. + rcu_read_lock();
  20775. pool = get_work_pool(work);
  20776. if (pool) {
  20777. - spin_lock(&pool->lock);
  20778. + spin_lock_irqsave(&pool->lock, flags);
  20779. if (find_worker_executing_work(pool, work))
  20780. ret |= WORK_BUSY_RUNNING;
  20781. - spin_unlock(&pool->lock);
  20782. + spin_unlock_irqrestore(&pool->lock, flags);
  20783. }
  20784. - local_irq_restore(flags);
  20785. + rcu_read_unlock();
  20786. return ret;
  20787. }
  20788. @@ -4379,7 +4420,7 @@
  20789. unsigned long flags;
  20790. int pi;
  20791. - rcu_read_lock_sched();
  20792. + rcu_read_lock();
  20793. pr_info("Showing busy workqueues and worker pools:\n");
  20794. @@ -4432,7 +4473,7 @@
  20795. spin_unlock_irqrestore(&pool->lock, flags);
  20796. }
  20797. - rcu_read_unlock_sched();
  20798. + rcu_read_unlock();
  20799. }
  20800. /*
  20801. @@ -4770,16 +4811,16 @@
  20802. * nr_active is monotonically decreasing. It's safe
  20803. * to peek without lock.
  20804. */
  20805. - rcu_read_lock_sched();
  20806. + rcu_read_lock();
  20807. for_each_pwq(pwq, wq) {
  20808. WARN_ON_ONCE(pwq->nr_active < 0);
  20809. if (pwq->nr_active) {
  20810. busy = true;
  20811. - rcu_read_unlock_sched();
  20812. + rcu_read_unlock();
  20813. goto out_unlock;
  20814. }
  20815. }
  20816. - rcu_read_unlock_sched();
  20817. + rcu_read_unlock();
  20818. }
  20819. out_unlock:
  20820. mutex_unlock(&wq_pool_mutex);
  20821. @@ -4969,7 +5010,8 @@
  20822. const char *delim = "";
  20823. int node, written = 0;
  20824. - rcu_read_lock_sched();
  20825. + get_online_cpus();
  20826. + rcu_read_lock();
  20827. for_each_node(node) {
  20828. written += scnprintf(buf + written, PAGE_SIZE - written,
  20829. "%s%d:%d", delim, node,
  20830. @@ -4977,7 +5019,8 @@
  20831. delim = " ";
  20832. }
  20833. written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
  20834. - rcu_read_unlock_sched();
  20835. + rcu_read_unlock();
  20836. + put_online_cpus();
  20837. return written;
  20838. }
  20839. diff -Nur linux-4.8.15.orig/kernel/workqueue_internal.h linux-4.8.15/kernel/workqueue_internal.h
  20840. --- linux-4.8.15.orig/kernel/workqueue_internal.h 2016-12-15 17:50:48.000000000 +0100
  20841. +++ linux-4.8.15/kernel/workqueue_internal.h 2017-01-01 17:07:16.063429015 +0100
  20842. @@ -43,6 +43,7 @@
  20843. unsigned long last_active; /* L: last active timestamp */
  20844. unsigned int flags; /* X: flags */
  20845. int id; /* I: worker id */
  20846. + int sleeping; /* None */
  20847. /*
  20848. * Opaque string set with work_set_desc(). Printed out with task
  20849. @@ -68,7 +69,7 @@
  20850. * Scheduler hooks for concurrency managed workqueue. Only to be used from
  20851. * sched/core.c and workqueue.c.
  20852. */
  20853. -void wq_worker_waking_up(struct task_struct *task, int cpu);
  20854. -struct task_struct *wq_worker_sleeping(struct task_struct *task);
  20855. +void wq_worker_running(struct task_struct *task);
  20856. +void wq_worker_sleeping(struct task_struct *task);
  20857. #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
  20858. diff -Nur linux-4.8.15.orig/lib/debugobjects.c linux-4.8.15/lib/debugobjects.c
  20859. --- linux-4.8.15.orig/lib/debugobjects.c 2016-12-15 17:50:48.000000000 +0100
  20860. +++ linux-4.8.15/lib/debugobjects.c 2017-01-01 17:07:16.063429015 +0100
  20861. @@ -308,7 +308,10 @@
  20862. struct debug_obj *obj;
  20863. unsigned long flags;
  20864. - fill_pool();
  20865. +#ifdef CONFIG_PREEMPT_RT_FULL
  20866. + if (preempt_count() == 0 && !irqs_disabled())
  20867. +#endif
  20868. + fill_pool();
  20869. db = get_bucket((unsigned long) addr);
  20870. diff -Nur linux-4.8.15.orig/lib/idr.c linux-4.8.15/lib/idr.c
  20871. --- linux-4.8.15.orig/lib/idr.c 2016-12-15 17:50:48.000000000 +0100
  20872. +++ linux-4.8.15/lib/idr.c 2017-01-01 17:07:16.067429274 +0100
  20873. @@ -30,6 +30,7 @@
  20874. #include <linux/idr.h>
  20875. #include <linux/spinlock.h>
  20876. #include <linux/percpu.h>
  20877. +#include <linux/locallock.h>
  20878. #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1)
  20879. #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
  20880. @@ -45,6 +46,37 @@
  20881. static DEFINE_PER_CPU(int, idr_preload_cnt);
  20882. static DEFINE_SPINLOCK(simple_ida_lock);
  20883. +#ifdef CONFIG_PREEMPT_RT_FULL
  20884. +static DEFINE_LOCAL_IRQ_LOCK(idr_lock);
  20885. +
  20886. +static inline void idr_preload_lock(void)
  20887. +{
  20888. + local_lock(idr_lock);
  20889. +}
  20890. +
  20891. +static inline void idr_preload_unlock(void)
  20892. +{
  20893. + local_unlock(idr_lock);
  20894. +}
  20895. +
  20896. +void idr_preload_end(void)
  20897. +{
  20898. + idr_preload_unlock();
  20899. +}
  20900. +EXPORT_SYMBOL(idr_preload_end);
  20901. +#else
  20902. +static inline void idr_preload_lock(void)
  20903. +{
  20904. + preempt_disable();
  20905. +}
  20906. +
  20907. +static inline void idr_preload_unlock(void)
  20908. +{
  20909. + preempt_enable();
  20910. +}
  20911. +#endif
  20912. +
  20913. +
  20914. /* the maximum ID which can be allocated given idr->layers */
  20915. static int idr_max(int layers)
  20916. {
  20917. @@ -115,14 +147,14 @@
  20918. * context. See idr_preload() for details.
  20919. */
  20920. if (!in_interrupt()) {
  20921. - preempt_disable();
  20922. + idr_preload_lock();
  20923. new = __this_cpu_read(idr_preload_head);
  20924. if (new) {
  20925. __this_cpu_write(idr_preload_head, new->ary[0]);
  20926. __this_cpu_dec(idr_preload_cnt);
  20927. new->ary[0] = NULL;
  20928. }
  20929. - preempt_enable();
  20930. + idr_preload_unlock();
  20931. if (new)
  20932. return new;
  20933. }
  20934. @@ -366,7 +398,6 @@
  20935. idr_mark_full(pa, id);
  20936. }
  20937. -
  20938. /**
  20939. * idr_preload - preload for idr_alloc()
  20940. * @gfp_mask: allocation mask to use for preloading
  20941. @@ -401,7 +432,7 @@
  20942. WARN_ON_ONCE(in_interrupt());
  20943. might_sleep_if(gfpflags_allow_blocking(gfp_mask));
  20944. - preempt_disable();
  20945. + idr_preload_lock();
  20946. /*
  20947. * idr_alloc() is likely to succeed w/o full idr_layer buffer and
  20948. @@ -413,9 +444,9 @@
  20949. while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
  20950. struct idr_layer *new;
  20951. - preempt_enable();
  20952. + idr_preload_unlock();
  20953. new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
  20954. - preempt_disable();
  20955. + idr_preload_lock();
  20956. if (!new)
  20957. break;
  20958. diff -Nur linux-4.8.15.orig/lib/irq_poll.c linux-4.8.15/lib/irq_poll.c
  20959. --- linux-4.8.15.orig/lib/irq_poll.c 2016-12-15 17:50:48.000000000 +0100
  20960. +++ linux-4.8.15/lib/irq_poll.c 2017-01-01 17:07:16.067429274 +0100
  20961. @@ -36,6 +36,7 @@
  20962. list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
  20963. __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
  20964. local_irq_restore(flags);
  20965. + preempt_check_resched_rt();
  20966. }
  20967. EXPORT_SYMBOL(irq_poll_sched);
  20968. @@ -71,6 +72,7 @@
  20969. local_irq_save(flags);
  20970. __irq_poll_complete(iop);
  20971. local_irq_restore(flags);
  20972. + preempt_check_resched_rt();
  20973. }
  20974. EXPORT_SYMBOL(irq_poll_complete);
  20975. @@ -95,6 +97,7 @@
  20976. }
  20977. local_irq_enable();
  20978. + preempt_check_resched_rt();
  20979. /* Even though interrupts have been re-enabled, this
  20980. * access is safe because interrupts can only add new
  20981. @@ -132,6 +135,7 @@
  20982. __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
  20983. local_irq_enable();
  20984. + preempt_check_resched_rt();
  20985. }
  20986. /**
  20987. @@ -199,6 +203,7 @@
  20988. this_cpu_ptr(&blk_cpu_iopoll));
  20989. __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
  20990. local_irq_enable();
  20991. + preempt_check_resched_rt();
  20992. }
  20993. return NOTIFY_OK;
  20994. diff -Nur linux-4.8.15.orig/lib/Kconfig linux-4.8.15/lib/Kconfig
  20995. --- linux-4.8.15.orig/lib/Kconfig 2016-12-15 17:50:48.000000000 +0100
  20996. +++ linux-4.8.15/lib/Kconfig 2017-01-01 17:07:16.063429015 +0100
  20997. @@ -400,6 +400,7 @@
  20998. config CPUMASK_OFFSTACK
  20999. bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
  21000. + depends on !PREEMPT_RT_FULL
  21001. help
  21002. Use dynamic allocation for cpumask_var_t, instead of putting
  21003. them on the stack. This is a bit more expensive, but avoids
  21004. diff -Nur linux-4.8.15.orig/lib/Kconfig.debug linux-4.8.15/lib/Kconfig.debug
  21005. --- linux-4.8.15.orig/lib/Kconfig.debug 2016-12-15 17:50:48.000000000 +0100
  21006. +++ linux-4.8.15/lib/Kconfig.debug 2017-01-01 17:07:16.063429015 +0100
  21007. @@ -977,6 +977,7 @@
  21008. config DEBUG_PREEMPT
  21009. bool "Debug preemptible kernel"
  21010. depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
  21011. + select USING_GET_LOCK_PARENT_IP
  21012. default y
  21013. help
  21014. If you say Y here then the kernel will use a debug variant of the
  21015. @@ -1159,8 +1160,17 @@
  21016. endmenu # lock debugging
  21017. +config USING_GET_LOCK_PARENT_IP
  21018. + bool
  21019. + help
  21020. + Enables the use of the function get_lock_parent_ip() that
  21021. + will use __builtin_return_address(n) with n > 0 causing
  21022. + some gcc warnings. When this is selected, those warnings
  21023. + will be suppressed.
  21024. +
  21025. config TRACE_IRQFLAGS
  21026. bool
  21027. + select USING_GET_LOCK_PARENT_IP
  21028. help
  21029. Enables hooks to interrupt enabling and disabling for
  21030. either tracing or lock debugging.
  21031. diff -Nur linux-4.8.15.orig/lib/locking-selftest.c linux-4.8.15/lib/locking-selftest.c
  21032. --- linux-4.8.15.orig/lib/locking-selftest.c 2016-12-15 17:50:48.000000000 +0100
  21033. +++ linux-4.8.15/lib/locking-selftest.c 2017-01-01 17:07:16.067429274 +0100
  21034. @@ -590,6 +590,8 @@
  21035. #include "locking-selftest-spin-hardirq.h"
  21036. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
  21037. +#ifndef CONFIG_PREEMPT_RT_FULL
  21038. +
  21039. #include "locking-selftest-rlock-hardirq.h"
  21040. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
  21041. @@ -605,9 +607,12 @@
  21042. #include "locking-selftest-wlock-softirq.h"
  21043. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
  21044. +#endif
  21045. +
  21046. #undef E1
  21047. #undef E2
  21048. +#ifndef CONFIG_PREEMPT_RT_FULL
  21049. /*
  21050. * Enabling hardirqs with a softirq-safe lock held:
  21051. */
  21052. @@ -640,6 +645,8 @@
  21053. #undef E1
  21054. #undef E2
  21055. +#endif
  21056. +
  21057. /*
  21058. * Enabling irqs with an irq-safe lock held:
  21059. */
  21060. @@ -663,6 +670,8 @@
  21061. #include "locking-selftest-spin-hardirq.h"
  21062. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
  21063. +#ifndef CONFIG_PREEMPT_RT_FULL
  21064. +
  21065. #include "locking-selftest-rlock-hardirq.h"
  21066. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
  21067. @@ -678,6 +687,8 @@
  21068. #include "locking-selftest-wlock-softirq.h"
  21069. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
  21070. +#endif
  21071. +
  21072. #undef E1
  21073. #undef E2
  21074. @@ -709,6 +720,8 @@
  21075. #include "locking-selftest-spin-hardirq.h"
  21076. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
  21077. +#ifndef CONFIG_PREEMPT_RT_FULL
  21078. +
  21079. #include "locking-selftest-rlock-hardirq.h"
  21080. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
  21081. @@ -724,6 +737,8 @@
  21082. #include "locking-selftest-wlock-softirq.h"
  21083. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
  21084. +#endif
  21085. +
  21086. #undef E1
  21087. #undef E2
  21088. #undef E3
  21089. @@ -757,6 +772,8 @@
  21090. #include "locking-selftest-spin-hardirq.h"
  21091. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
  21092. +#ifndef CONFIG_PREEMPT_RT_FULL
  21093. +
  21094. #include "locking-selftest-rlock-hardirq.h"
  21095. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
  21096. @@ -772,10 +789,14 @@
  21097. #include "locking-selftest-wlock-softirq.h"
  21098. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
  21099. +#endif
  21100. +
  21101. #undef E1
  21102. #undef E2
  21103. #undef E3
  21104. +#ifndef CONFIG_PREEMPT_RT_FULL
  21105. +
  21106. /*
  21107. * read-lock / write-lock irq inversion.
  21108. *
  21109. @@ -838,6 +859,10 @@
  21110. #undef E2
  21111. #undef E3
  21112. +#endif
  21113. +
  21114. +#ifndef CONFIG_PREEMPT_RT_FULL
  21115. +
  21116. /*
  21117. * read-lock / write-lock recursion that is actually safe.
  21118. */
  21119. @@ -876,6 +901,8 @@
  21120. #undef E2
  21121. #undef E3
  21122. +#endif
  21123. +
  21124. /*
  21125. * read-lock / write-lock recursion that is unsafe.
  21126. */
  21127. @@ -1858,6 +1885,7 @@
  21128. printk(" --------------------------------------------------------------------------\n");
  21129. +#ifndef CONFIG_PREEMPT_RT_FULL
  21130. /*
  21131. * irq-context testcases:
  21132. */
  21133. @@ -1870,6 +1898,28 @@
  21134. DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
  21135. // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
  21136. +#else
  21137. + /* On -rt, we only do hardirq context test for raw spinlock */
  21138. + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12);
  21139. + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21);
  21140. +
  21141. + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12);
  21142. + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21);
  21143. +
  21144. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123);
  21145. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132);
  21146. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213);
  21147. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231);
  21148. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312);
  21149. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321);
  21150. +
  21151. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123);
  21152. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132);
  21153. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213);
  21154. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231);
  21155. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312);
  21156. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321);
  21157. +#endif
  21158. ww_tests();
  21159. diff -Nur linux-4.8.15.orig/lib/percpu_ida.c linux-4.8.15/lib/percpu_ida.c
  21160. --- linux-4.8.15.orig/lib/percpu_ida.c 2016-12-15 17:50:48.000000000 +0100
  21161. +++ linux-4.8.15/lib/percpu_ida.c 2017-01-01 17:07:16.067429274 +0100
  21162. @@ -26,6 +26,9 @@
  21163. #include <linux/string.h>
  21164. #include <linux/spinlock.h>
  21165. #include <linux/percpu_ida.h>
  21166. +#include <linux/locallock.h>
  21167. +
  21168. +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock);
  21169. struct percpu_ida_cpu {
  21170. /*
  21171. @@ -148,13 +151,13 @@
  21172. unsigned long flags;
  21173. int tag;
  21174. - local_irq_save(flags);
  21175. + local_lock_irqsave(irq_off_lock, flags);
  21176. tags = this_cpu_ptr(pool->tag_cpu);
  21177. /* Fastpath */
  21178. tag = alloc_local_tag(tags);
  21179. if (likely(tag >= 0)) {
  21180. - local_irq_restore(flags);
  21181. + local_unlock_irqrestore(irq_off_lock, flags);
  21182. return tag;
  21183. }
  21184. @@ -173,6 +176,7 @@
  21185. if (!tags->nr_free)
  21186. alloc_global_tags(pool, tags);
  21187. +
  21188. if (!tags->nr_free)
  21189. steal_tags(pool, tags);
  21190. @@ -184,7 +188,7 @@
  21191. }
  21192. spin_unlock(&pool->lock);
  21193. - local_irq_restore(flags);
  21194. + local_unlock_irqrestore(irq_off_lock, flags);
  21195. if (tag >= 0 || state == TASK_RUNNING)
  21196. break;
  21197. @@ -196,7 +200,7 @@
  21198. schedule();
  21199. - local_irq_save(flags);
  21200. + local_lock_irqsave(irq_off_lock, flags);
  21201. tags = this_cpu_ptr(pool->tag_cpu);
  21202. }
  21203. if (state != TASK_RUNNING)
  21204. @@ -221,7 +225,7 @@
  21205. BUG_ON(tag >= pool->nr_tags);
  21206. - local_irq_save(flags);
  21207. + local_lock_irqsave(irq_off_lock, flags);
  21208. tags = this_cpu_ptr(pool->tag_cpu);
  21209. spin_lock(&tags->lock);
  21210. @@ -253,7 +257,7 @@
  21211. spin_unlock(&pool->lock);
  21212. }
  21213. - local_irq_restore(flags);
  21214. + local_unlock_irqrestore(irq_off_lock, flags);
  21215. }
  21216. EXPORT_SYMBOL_GPL(percpu_ida_free);
  21217. @@ -345,7 +349,7 @@
  21218. struct percpu_ida_cpu *remote;
  21219. unsigned cpu, i, err = 0;
  21220. - local_irq_save(flags);
  21221. + local_lock_irqsave(irq_off_lock, flags);
  21222. for_each_possible_cpu(cpu) {
  21223. remote = per_cpu_ptr(pool->tag_cpu, cpu);
  21224. spin_lock(&remote->lock);
  21225. @@ -367,7 +371,7 @@
  21226. }
  21227. spin_unlock(&pool->lock);
  21228. out:
  21229. - local_irq_restore(flags);
  21230. + local_unlock_irqrestore(irq_off_lock, flags);
  21231. return err;
  21232. }
  21233. EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
  21234. diff -Nur linux-4.8.15.orig/lib/radix-tree.c linux-4.8.15/lib/radix-tree.c
  21235. --- linux-4.8.15.orig/lib/radix-tree.c 2016-12-15 17:50:48.000000000 +0100
  21236. +++ linux-4.8.15/lib/radix-tree.c 2017-01-01 17:07:16.067429274 +0100
  21237. @@ -290,13 +290,14 @@
  21238. * succeed in getting a node here (and never reach
  21239. * kmem_cache_alloc)
  21240. */
  21241. - rtp = this_cpu_ptr(&radix_tree_preloads);
  21242. + rtp = &get_cpu_var(radix_tree_preloads);
  21243. if (rtp->nr) {
  21244. ret = rtp->nodes;
  21245. rtp->nodes = ret->private_data;
  21246. ret->private_data = NULL;
  21247. rtp->nr--;
  21248. }
  21249. + put_cpu_var(radix_tree_preloads);
  21250. /*
  21251. * Update the allocation stack trace as this is more useful
  21252. * for debugging.
  21253. @@ -336,6 +337,7 @@
  21254. call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
  21255. }
  21256. +#ifndef CONFIG_PREEMPT_RT_FULL
  21257. /*
  21258. * Load up this CPU's radix_tree_node buffer with sufficient objects to
  21259. * ensure that the addition of a single element in the tree cannot fail. On
  21260. @@ -455,6 +457,7 @@
  21261. return __radix_tree_preload(gfp_mask, nr_nodes);
  21262. }
  21263. +#endif
  21264. /*
  21265. * The maximum index which can be stored in a radix tree
  21266. diff -Nur linux-4.8.15.orig/lib/scatterlist.c linux-4.8.15/lib/scatterlist.c
  21267. --- linux-4.8.15.orig/lib/scatterlist.c 2016-12-15 17:50:48.000000000 +0100
  21268. +++ linux-4.8.15/lib/scatterlist.c 2017-01-01 17:07:16.067429274 +0100
  21269. @@ -620,7 +620,7 @@
  21270. flush_kernel_dcache_page(miter->page);
  21271. if (miter->__flags & SG_MITER_ATOMIC) {
  21272. - WARN_ON_ONCE(preemptible());
  21273. + WARN_ON_ONCE(!pagefault_disabled());
  21274. kunmap_atomic(miter->addr);
  21275. } else
  21276. kunmap(miter->page);
  21277. @@ -664,7 +664,7 @@
  21278. if (!sg_miter_skip(&miter, skip))
  21279. return false;
  21280. - local_irq_save(flags);
  21281. + local_irq_save_nort(flags);
  21282. while (sg_miter_next(&miter) && offset < buflen) {
  21283. unsigned int len;
  21284. @@ -681,7 +681,7 @@
  21285. sg_miter_stop(&miter);
  21286. - local_irq_restore(flags);
  21287. + local_irq_restore_nort(flags);
  21288. return offset;
  21289. }
  21290. EXPORT_SYMBOL(sg_copy_buffer);
  21291. diff -Nur linux-4.8.15.orig/lib/smp_processor_id.c linux-4.8.15/lib/smp_processor_id.c
  21292. --- linux-4.8.15.orig/lib/smp_processor_id.c 2016-12-15 17:50:48.000000000 +0100
  21293. +++ linux-4.8.15/lib/smp_processor_id.c 2017-01-01 17:07:16.067429274 +0100
  21294. @@ -39,8 +39,9 @@
  21295. if (!printk_ratelimit())
  21296. goto out_enable;
  21297. - printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n",
  21298. - what1, what2, preempt_count() - 1, current->comm, current->pid);
  21299. + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x %08x] code: %s/%d\n",
  21300. + what1, what2, preempt_count() - 1, __migrate_disabled(current),
  21301. + current->comm, current->pid);
  21302. print_symbol("caller is %s\n", (long)__builtin_return_address(0));
  21303. dump_stack();
  21304. diff -Nur linux-4.8.15.orig/mm/backing-dev.c linux-4.8.15/mm/backing-dev.c
  21305. --- linux-4.8.15.orig/mm/backing-dev.c 2016-12-15 17:50:48.000000000 +0100
  21306. +++ linux-4.8.15/mm/backing-dev.c 2017-01-01 17:07:16.103431597 +0100
  21307. @@ -457,9 +457,9 @@
  21308. {
  21309. unsigned long flags;
  21310. - local_irq_save(flags);
  21311. + local_irq_save_nort(flags);
  21312. if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
  21313. - local_irq_restore(flags);
  21314. + local_irq_restore_nort(flags);
  21315. return;
  21316. }
  21317. diff -Nur linux-4.8.15.orig/mm/compaction.c linux-4.8.15/mm/compaction.c
  21318. --- linux-4.8.15.orig/mm/compaction.c 2016-12-15 17:50:48.000000000 +0100
  21319. +++ linux-4.8.15/mm/compaction.c 2017-01-01 17:07:16.103431597 +0100
  21320. @@ -1585,10 +1585,12 @@
  21321. block_start_pfn(cc->migrate_pfn, cc->order);
  21322. if (cc->last_migrated_pfn < current_block_start) {
  21323. - cpu = get_cpu();
  21324. + cpu = get_cpu_light();
  21325. + local_lock_irq(swapvec_lock);
  21326. lru_add_drain_cpu(cpu);
  21327. + local_unlock_irq(swapvec_lock);
  21328. drain_local_pages(zone);
  21329. - put_cpu();
  21330. + put_cpu_light();
  21331. /* No more flushing until we migrate again */
  21332. cc->last_migrated_pfn = 0;
  21333. }
  21334. diff -Nur linux-4.8.15.orig/mm/filemap.c linux-4.8.15/mm/filemap.c
  21335. --- linux-4.8.15.orig/mm/filemap.c 2016-12-15 17:50:48.000000000 +0100
  21336. +++ linux-4.8.15/mm/filemap.c 2017-01-01 17:07:16.103431597 +0100
  21337. @@ -159,9 +159,12 @@
  21338. * node->private_list is protected by
  21339. * mapping->tree_lock.
  21340. */
  21341. - if (!list_empty(&node->private_list))
  21342. - list_lru_del(&workingset_shadow_nodes,
  21343. + if (!list_empty(&node->private_list)) {
  21344. + local_lock(workingset_shadow_lock);
  21345. + list_lru_del(&__workingset_shadow_nodes,
  21346. &node->private_list);
  21347. + local_unlock(workingset_shadow_lock);
  21348. + }
  21349. }
  21350. return 0;
  21351. }
  21352. @@ -217,8 +220,10 @@
  21353. if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
  21354. list_empty(&node->private_list)) {
  21355. node->private_data = mapping;
  21356. - list_lru_add(&workingset_shadow_nodes,
  21357. - &node->private_list);
  21358. + local_lock(workingset_shadow_lock);
  21359. + list_lru_add(&__workingset_shadow_nodes,
  21360. + &node->private_list);
  21361. + local_unlock(workingset_shadow_lock);
  21362. }
  21363. }
  21364. diff -Nur linux-4.8.15.orig/mm/highmem.c linux-4.8.15/mm/highmem.c
  21365. --- linux-4.8.15.orig/mm/highmem.c 2016-12-15 17:50:48.000000000 +0100
  21366. +++ linux-4.8.15/mm/highmem.c 2017-01-01 17:07:16.103431597 +0100
  21367. @@ -29,10 +29,11 @@
  21368. #include <linux/kgdb.h>
  21369. #include <asm/tlbflush.h>
  21370. -
  21371. +#ifndef CONFIG_PREEMPT_RT_FULL
  21372. #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
  21373. DEFINE_PER_CPU(int, __kmap_atomic_idx);
  21374. #endif
  21375. +#endif
  21376. /*
  21377. * Virtual_count is not a pure "count".
  21378. @@ -107,8 +108,9 @@
  21379. unsigned long totalhigh_pages __read_mostly;
  21380. EXPORT_SYMBOL(totalhigh_pages);
  21381. -
  21382. +#ifndef CONFIG_PREEMPT_RT_FULL
  21383. EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
  21384. +#endif
  21385. unsigned int nr_free_highpages (void)
  21386. {
  21387. diff -Nur linux-4.8.15.orig/mm/Kconfig linux-4.8.15/mm/Kconfig
  21388. --- linux-4.8.15.orig/mm/Kconfig 2016-12-15 17:50:48.000000000 +0100
  21389. +++ linux-4.8.15/mm/Kconfig 2017-01-01 17:07:16.103431597 +0100
  21390. @@ -410,7 +410,7 @@
  21391. config TRANSPARENT_HUGEPAGE
  21392. bool "Transparent Hugepage Support"
  21393. - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
  21394. + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
  21395. select COMPACTION
  21396. select RADIX_TREE_MULTIORDER
  21397. help
  21398. diff -Nur linux-4.8.15.orig/mm/memcontrol.c linux-4.8.15/mm/memcontrol.c
  21399. --- linux-4.8.15.orig/mm/memcontrol.c 2016-12-15 17:50:48.000000000 +0100
  21400. +++ linux-4.8.15/mm/memcontrol.c 2017-01-01 17:07:16.107431847 +0100
  21401. @@ -67,6 +67,7 @@
  21402. #include <net/sock.h>
  21403. #include <net/ip.h>
  21404. #include "slab.h"
  21405. +#include <linux/locallock.h>
  21406. #include <asm/uaccess.h>
  21407. @@ -92,6 +93,8 @@
  21408. #define do_swap_account 0
  21409. #endif
  21410. +static DEFINE_LOCAL_IRQ_LOCK(event_lock);
  21411. +
  21412. /* Whether legacy memory+swap accounting is active */
  21413. static bool do_memsw_account(void)
  21414. {
  21415. @@ -1724,6 +1727,7 @@
  21416. #define FLUSHING_CACHED_CHARGE 0
  21417. };
  21418. static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
  21419. +static DEFINE_LOCAL_IRQ_LOCK(memcg_stock_ll);
  21420. static DEFINE_MUTEX(percpu_charge_mutex);
  21421. /**
  21422. @@ -1746,7 +1750,7 @@
  21423. if (nr_pages > CHARGE_BATCH)
  21424. return ret;
  21425. - local_irq_save(flags);
  21426. + local_lock_irqsave(memcg_stock_ll, flags);
  21427. stock = this_cpu_ptr(&memcg_stock);
  21428. if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
  21429. @@ -1754,7 +1758,7 @@
  21430. ret = true;
  21431. }
  21432. - local_irq_restore(flags);
  21433. + local_unlock_irqrestore(memcg_stock_ll, flags);
  21434. return ret;
  21435. }
  21436. @@ -1781,13 +1785,13 @@
  21437. struct memcg_stock_pcp *stock;
  21438. unsigned long flags;
  21439. - local_irq_save(flags);
  21440. + local_lock_irqsave(memcg_stock_ll, flags);
  21441. stock = this_cpu_ptr(&memcg_stock);
  21442. drain_stock(stock);
  21443. clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
  21444. - local_irq_restore(flags);
  21445. + local_unlock_irqrestore(memcg_stock_ll, flags);
  21446. }
  21447. /*
  21448. @@ -1799,7 +1803,7 @@
  21449. struct memcg_stock_pcp *stock;
  21450. unsigned long flags;
  21451. - local_irq_save(flags);
  21452. + local_lock_irqsave(memcg_stock_ll, flags);
  21453. stock = this_cpu_ptr(&memcg_stock);
  21454. if (stock->cached != memcg) { /* reset if necessary */
  21455. @@ -1808,7 +1812,7 @@
  21456. }
  21457. stock->nr_pages += nr_pages;
  21458. - local_irq_restore(flags);
  21459. + local_unlock_irqrestore(memcg_stock_ll, flags);
  21460. }
  21461. /*
  21462. @@ -1824,7 +1828,7 @@
  21463. return;
  21464. /* Notify other cpus that system-wide "drain" is running */
  21465. get_online_cpus();
  21466. - curcpu = get_cpu();
  21467. + curcpu = get_cpu_light();
  21468. for_each_online_cpu(cpu) {
  21469. struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
  21470. struct mem_cgroup *memcg;
  21471. @@ -1841,7 +1845,7 @@
  21472. schedule_work_on(cpu, &stock->work);
  21473. }
  21474. }
  21475. - put_cpu();
  21476. + put_cpu_light();
  21477. put_online_cpus();
  21478. mutex_unlock(&percpu_charge_mutex);
  21479. }
  21480. @@ -4575,12 +4579,12 @@
  21481. ret = 0;
  21482. - local_irq_disable();
  21483. + local_lock_irq(event_lock);
  21484. mem_cgroup_charge_statistics(to, page, compound, nr_pages);
  21485. memcg_check_events(to, page);
  21486. mem_cgroup_charge_statistics(from, page, compound, -nr_pages);
  21487. memcg_check_events(from, page);
  21488. - local_irq_enable();
  21489. + local_unlock_irq(event_lock);
  21490. out_unlock:
  21491. unlock_page(page);
  21492. out:
  21493. @@ -5453,10 +5457,10 @@
  21494. commit_charge(page, memcg, lrucare);
  21495. - local_irq_disable();
  21496. + local_lock_irq(event_lock);
  21497. mem_cgroup_charge_statistics(memcg, page, compound, nr_pages);
  21498. memcg_check_events(memcg, page);
  21499. - local_irq_enable();
  21500. + local_unlock_irq(event_lock);
  21501. if (do_memsw_account() && PageSwapCache(page)) {
  21502. swp_entry_t entry = { .val = page_private(page) };
  21503. @@ -5512,14 +5516,14 @@
  21504. memcg_oom_recover(memcg);
  21505. }
  21506. - local_irq_save(flags);
  21507. + local_lock_irqsave(event_lock, flags);
  21508. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
  21509. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
  21510. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
  21511. __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
  21512. __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
  21513. memcg_check_events(memcg, dummy_page);
  21514. - local_irq_restore(flags);
  21515. + local_unlock_irqrestore(event_lock, flags);
  21516. if (!mem_cgroup_is_root(memcg))
  21517. css_put_many(&memcg->css, nr_pages);
  21518. @@ -5674,10 +5678,10 @@
  21519. commit_charge(newpage, memcg, false);
  21520. - local_irq_save(flags);
  21521. + local_lock_irqsave(event_lock, flags);
  21522. mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages);
  21523. memcg_check_events(memcg, newpage);
  21524. - local_irq_restore(flags);
  21525. + local_unlock_irqrestore(event_lock, flags);
  21526. }
  21527. DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
  21528. @@ -5854,6 +5858,7 @@
  21529. {
  21530. struct mem_cgroup *memcg, *swap_memcg;
  21531. unsigned short oldid;
  21532. + unsigned long flags;
  21533. VM_BUG_ON_PAGE(PageLRU(page), page);
  21534. VM_BUG_ON_PAGE(page_count(page), page);
  21535. @@ -5894,12 +5899,16 @@
  21536. * important here to have the interrupts disabled because it is the
  21537. * only synchronisation we have for udpating the per-CPU variables.
  21538. */
  21539. + local_lock_irqsave(event_lock, flags);
  21540. +#ifndef CONFIG_PREEMPT_RT_BASE
  21541. VM_BUG_ON(!irqs_disabled());
  21542. +#endif
  21543. mem_cgroup_charge_statistics(memcg, page, false, -1);
  21544. memcg_check_events(memcg, page);
  21545. if (!mem_cgroup_is_root(memcg))
  21546. css_put(&memcg->css);
  21547. + local_unlock_irqrestore(event_lock, flags);
  21548. }
  21549. /*
  21550. diff -Nur linux-4.8.15.orig/mm/mmu_context.c linux-4.8.15/mm/mmu_context.c
  21551. --- linux-4.8.15.orig/mm/mmu_context.c 2016-12-15 17:50:48.000000000 +0100
  21552. +++ linux-4.8.15/mm/mmu_context.c 2017-01-01 17:07:16.107431847 +0100
  21553. @@ -23,6 +23,7 @@
  21554. struct task_struct *tsk = current;
  21555. task_lock(tsk);
  21556. + preempt_disable_rt();
  21557. active_mm = tsk->active_mm;
  21558. if (active_mm != mm) {
  21559. atomic_inc(&mm->mm_count);
  21560. @@ -30,6 +31,7 @@
  21561. }
  21562. tsk->mm = mm;
  21563. switch_mm(active_mm, mm, tsk);
  21564. + preempt_enable_rt();
  21565. task_unlock(tsk);
  21566. #ifdef finish_arch_post_lock_switch
  21567. finish_arch_post_lock_switch();
  21568. diff -Nur linux-4.8.15.orig/mm/page_alloc.c linux-4.8.15/mm/page_alloc.c
  21569. --- linux-4.8.15.orig/mm/page_alloc.c 2016-12-15 17:50:48.000000000 +0100
  21570. +++ linux-4.8.15/mm/page_alloc.c 2017-01-01 17:07:16.111432108 +0100
  21571. @@ -61,6 +61,7 @@
  21572. #include <linux/page_ext.h>
  21573. #include <linux/hugetlb.h>
  21574. #include <linux/sched/rt.h>
  21575. +#include <linux/locallock.h>
  21576. #include <linux/page_owner.h>
  21577. #include <linux/kthread.h>
  21578. #include <linux/memcontrol.h>
  21579. @@ -276,6 +277,18 @@
  21580. EXPORT_SYMBOL(nr_online_nodes);
  21581. #endif
  21582. +static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
  21583. +
  21584. +#ifdef CONFIG_PREEMPT_RT_BASE
  21585. +# define cpu_lock_irqsave(cpu, flags) \
  21586. + local_lock_irqsave_on(pa_lock, flags, cpu)
  21587. +# define cpu_unlock_irqrestore(cpu, flags) \
  21588. + local_unlock_irqrestore_on(pa_lock, flags, cpu)
  21589. +#else
  21590. +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
  21591. +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
  21592. +#endif
  21593. +
  21594. int page_group_by_mobility_disabled __read_mostly;
  21595. #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
  21596. @@ -1056,7 +1069,7 @@
  21597. #endif /* CONFIG_DEBUG_VM */
  21598. /*
  21599. - * Frees a number of pages from the PCP lists
  21600. + * Frees a number of pages which have been collected from the pcp lists.
  21601. * Assumes all pages on list are in same zone, and of same order.
  21602. * count is the number of pages to free.
  21603. *
  21604. @@ -1067,19 +1080,58 @@
  21605. * pinned" detection logic.
  21606. */
  21607. static void free_pcppages_bulk(struct zone *zone, int count,
  21608. - struct per_cpu_pages *pcp)
  21609. + struct list_head *list)
  21610. {
  21611. - int migratetype = 0;
  21612. - int batch_free = 0;
  21613. unsigned long nr_scanned;
  21614. bool isolated_pageblocks;
  21615. + unsigned long flags;
  21616. +
  21617. + spin_lock_irqsave(&zone->lock, flags);
  21618. - spin_lock(&zone->lock);
  21619. isolated_pageblocks = has_isolate_pageblock(zone);
  21620. nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
  21621. if (nr_scanned)
  21622. __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
  21623. + while (!list_empty(list)) {
  21624. + struct page *page;
  21625. + int mt; /* migratetype of the to-be-freed page */
  21626. +
  21627. + page = list_first_entry(list, struct page, lru);
  21628. + /* must delete as __free_one_page list manipulates */
  21629. + list_del(&page->lru);
  21630. +
  21631. + mt = get_pcppage_migratetype(page);
  21632. + /* MIGRATE_ISOLATE page should not go to pcplists */
  21633. + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
  21634. + /* Pageblock could have been isolated meanwhile */
  21635. + if (unlikely(isolated_pageblocks))
  21636. + mt = get_pageblock_migratetype(page);
  21637. +
  21638. + if (bulkfree_pcp_prepare(page))
  21639. + continue;
  21640. +
  21641. + __free_one_page(page, page_to_pfn(page), zone, 0, mt);
  21642. + trace_mm_page_pcpu_drain(page, 0, mt);
  21643. + count--;
  21644. + }
  21645. + WARN_ON(count != 0);
  21646. + spin_unlock_irqrestore(&zone->lock, flags);
  21647. +}
  21648. +
  21649. +/*
  21650. + * Moves a number of pages from the PCP lists to free list which
  21651. + * is freed outside of the locked region.
  21652. + *
  21653. + * Assumes all pages on list are in same zone, and of same order.
  21654. + * count is the number of pages to free.
  21655. + */
  21656. +static void isolate_pcp_pages(int count, struct per_cpu_pages *src,
  21657. + struct list_head *dst)
  21658. +{
  21659. + int migratetype = 0;
  21660. + int batch_free = 0;
  21661. +
  21662. while (count) {
  21663. struct page *page;
  21664. struct list_head *list;
  21665. @@ -1095,7 +1147,7 @@
  21666. batch_free++;
  21667. if (++migratetype == MIGRATE_PCPTYPES)
  21668. migratetype = 0;
  21669. - list = &pcp->lists[migratetype];
  21670. + list = &src->lists[migratetype];
  21671. } while (list_empty(list));
  21672. /* This is the only non-empty list. Free them all. */
  21673. @@ -1103,27 +1155,12 @@
  21674. batch_free = count;
  21675. do {
  21676. - int mt; /* migratetype of the to-be-freed page */
  21677. -
  21678. page = list_last_entry(list, struct page, lru);
  21679. - /* must delete as __free_one_page list manipulates */
  21680. list_del(&page->lru);
  21681. - mt = get_pcppage_migratetype(page);
  21682. - /* MIGRATE_ISOLATE page should not go to pcplists */
  21683. - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
  21684. - /* Pageblock could have been isolated meanwhile */
  21685. - if (unlikely(isolated_pageblocks))
  21686. - mt = get_pageblock_migratetype(page);
  21687. -
  21688. - if (bulkfree_pcp_prepare(page))
  21689. - continue;
  21690. -
  21691. - __free_one_page(page, page_to_pfn(page), zone, 0, mt);
  21692. - trace_mm_page_pcpu_drain(page, 0, mt);
  21693. + list_add(&page->lru, dst);
  21694. } while (--count && --batch_free && !list_empty(list));
  21695. }
  21696. - spin_unlock(&zone->lock);
  21697. }
  21698. static void free_one_page(struct zone *zone,
  21699. @@ -1132,7 +1169,9 @@
  21700. int migratetype)
  21701. {
  21702. unsigned long nr_scanned;
  21703. - spin_lock(&zone->lock);
  21704. + unsigned long flags;
  21705. +
  21706. + spin_lock_irqsave(&zone->lock, flags);
  21707. nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
  21708. if (nr_scanned)
  21709. __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
  21710. @@ -1142,7 +1181,7 @@
  21711. migratetype = get_pfnblock_migratetype(page, pfn);
  21712. }
  21713. __free_one_page(page, pfn, zone, order, migratetype);
  21714. - spin_unlock(&zone->lock);
  21715. + spin_unlock_irqrestore(&zone->lock, flags);
  21716. }
  21717. static void __meminit __init_single_page(struct page *page, unsigned long pfn,
  21718. @@ -1228,10 +1267,10 @@
  21719. return;
  21720. migratetype = get_pfnblock_migratetype(page, pfn);
  21721. - local_irq_save(flags);
  21722. + local_lock_irqsave(pa_lock, flags);
  21723. __count_vm_events(PGFREE, 1 << order);
  21724. free_one_page(page_zone(page), page, pfn, order, migratetype);
  21725. - local_irq_restore(flags);
  21726. + local_unlock_irqrestore(pa_lock, flags);
  21727. }
  21728. static void __init __free_pages_boot_core(struct page *page, unsigned int order)
  21729. @@ -2219,16 +2258,18 @@
  21730. void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
  21731. {
  21732. unsigned long flags;
  21733. + LIST_HEAD(dst);
  21734. int to_drain, batch;
  21735. - local_irq_save(flags);
  21736. + local_lock_irqsave(pa_lock, flags);
  21737. batch = READ_ONCE(pcp->batch);
  21738. to_drain = min(pcp->count, batch);
  21739. if (to_drain > 0) {
  21740. - free_pcppages_bulk(zone, to_drain, pcp);
  21741. + isolate_pcp_pages(to_drain, pcp, &dst);
  21742. pcp->count -= to_drain;
  21743. }
  21744. - local_irq_restore(flags);
  21745. + local_unlock_irqrestore(pa_lock, flags);
  21746. + free_pcppages_bulk(zone, to_drain, &dst);
  21747. }
  21748. #endif
  21749. @@ -2244,16 +2285,21 @@
  21750. unsigned long flags;
  21751. struct per_cpu_pageset *pset;
  21752. struct per_cpu_pages *pcp;
  21753. + LIST_HEAD(dst);
  21754. + int count;
  21755. - local_irq_save(flags);
  21756. + cpu_lock_irqsave(cpu, flags);
  21757. pset = per_cpu_ptr(zone->pageset, cpu);
  21758. pcp = &pset->pcp;
  21759. - if (pcp->count) {
  21760. - free_pcppages_bulk(zone, pcp->count, pcp);
  21761. + count = pcp->count;
  21762. + if (count) {
  21763. + isolate_pcp_pages(count, pcp, &dst);
  21764. pcp->count = 0;
  21765. }
  21766. - local_irq_restore(flags);
  21767. + cpu_unlock_irqrestore(cpu, flags);
  21768. + if (count)
  21769. + free_pcppages_bulk(zone, count, &dst);
  21770. }
  21771. /*
  21772. @@ -2339,8 +2385,17 @@
  21773. else
  21774. cpumask_clear_cpu(cpu, &cpus_with_pcps);
  21775. }
  21776. +#ifndef CONFIG_PREEMPT_RT_BASE
  21777. on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
  21778. zone, 1);
  21779. +#else
  21780. + for_each_cpu(cpu, &cpus_with_pcps) {
  21781. + if (zone)
  21782. + drain_pages_zone(cpu, zone);
  21783. + else
  21784. + drain_pages(cpu);
  21785. + }
  21786. +#endif
  21787. }
  21788. #ifdef CONFIG_HIBERNATION
  21789. @@ -2400,7 +2455,7 @@
  21790. migratetype = get_pfnblock_migratetype(page, pfn);
  21791. set_pcppage_migratetype(page, migratetype);
  21792. - local_irq_save(flags);
  21793. + local_lock_irqsave(pa_lock, flags);
  21794. __count_vm_event(PGFREE);
  21795. /*
  21796. @@ -2426,12 +2481,17 @@
  21797. pcp->count++;
  21798. if (pcp->count >= pcp->high) {
  21799. unsigned long batch = READ_ONCE(pcp->batch);
  21800. - free_pcppages_bulk(zone, batch, pcp);
  21801. + LIST_HEAD(dst);
  21802. +
  21803. + isolate_pcp_pages(batch, pcp, &dst);
  21804. pcp->count -= batch;
  21805. + local_unlock_irqrestore(pa_lock, flags);
  21806. + free_pcppages_bulk(zone, batch, &dst);
  21807. + return;
  21808. }
  21809. out:
  21810. - local_irq_restore(flags);
  21811. + local_unlock_irqrestore(pa_lock, flags);
  21812. }
  21813. /*
  21814. @@ -2568,7 +2628,7 @@
  21815. struct per_cpu_pages *pcp;
  21816. struct list_head *list;
  21817. - local_irq_save(flags);
  21818. + local_lock_irqsave(pa_lock, flags);
  21819. do {
  21820. pcp = &this_cpu_ptr(zone->pageset)->pcp;
  21821. list = &pcp->lists[migratetype];
  21822. @@ -2595,7 +2655,7 @@
  21823. * allocate greater than order-1 page units with __GFP_NOFAIL.
  21824. */
  21825. WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
  21826. - spin_lock_irqsave(&zone->lock, flags);
  21827. + local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
  21828. do {
  21829. page = NULL;
  21830. @@ -2607,22 +2667,24 @@
  21831. if (!page)
  21832. page = __rmqueue(zone, order, migratetype);
  21833. } while (page && check_new_pages(page, order));
  21834. - spin_unlock(&zone->lock);
  21835. - if (!page)
  21836. + if (!page) {
  21837. + spin_unlock(&zone->lock);
  21838. goto failed;
  21839. + }
  21840. __mod_zone_freepage_state(zone, -(1 << order),
  21841. get_pcppage_migratetype(page));
  21842. + spin_unlock(&zone->lock);
  21843. }
  21844. __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
  21845. zone_statistics(preferred_zone, zone, gfp_flags);
  21846. - local_irq_restore(flags);
  21847. + local_unlock_irqrestore(pa_lock, flags);
  21848. VM_BUG_ON_PAGE(bad_range(zone, page), page);
  21849. return page;
  21850. failed:
  21851. - local_irq_restore(flags);
  21852. + local_unlock_irqrestore(pa_lock, flags);
  21853. return NULL;
  21854. }
  21855. @@ -6538,7 +6600,9 @@
  21856. int cpu = (unsigned long)hcpu;
  21857. if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
  21858. + local_lock_irq_on(swapvec_lock, cpu);
  21859. lru_add_drain_cpu(cpu);
  21860. + local_unlock_irq_on(swapvec_lock, cpu);
  21861. drain_pages(cpu);
  21862. /*
  21863. @@ -6564,6 +6628,7 @@
  21864. void __init page_alloc_init(void)
  21865. {
  21866. hotcpu_notifier(page_alloc_cpu_notify, 0);
  21867. + local_irq_lock_init(pa_lock);
  21868. }
  21869. /*
  21870. @@ -7380,7 +7445,7 @@
  21871. struct per_cpu_pageset *pset;
  21872. /* avoid races with drain_pages() */
  21873. - local_irq_save(flags);
  21874. + local_lock_irqsave(pa_lock, flags);
  21875. if (zone->pageset != &boot_pageset) {
  21876. for_each_online_cpu(cpu) {
  21877. pset = per_cpu_ptr(zone->pageset, cpu);
  21878. @@ -7389,7 +7454,7 @@
  21879. free_percpu(zone->pageset);
  21880. zone->pageset = &boot_pageset;
  21881. }
  21882. - local_irq_restore(flags);
  21883. + local_unlock_irqrestore(pa_lock, flags);
  21884. }
  21885. #ifdef CONFIG_MEMORY_HOTREMOVE
  21886. diff -Nur linux-4.8.15.orig/mm/slab.h linux-4.8.15/mm/slab.h
  21887. --- linux-4.8.15.orig/mm/slab.h 2016-12-15 17:50:48.000000000 +0100
  21888. +++ linux-4.8.15/mm/slab.h 2017-01-01 17:07:16.111432108 +0100
  21889. @@ -426,7 +426,11 @@
  21890. * The slab lists for all objects.
  21891. */
  21892. struct kmem_cache_node {
  21893. +#ifdef CONFIG_SLUB
  21894. + raw_spinlock_t list_lock;
  21895. +#else
  21896. spinlock_t list_lock;
  21897. +#endif
  21898. #ifdef CONFIG_SLAB
  21899. struct list_head slabs_partial; /* partial list first, better asm code */
  21900. diff -Nur linux-4.8.15.orig/mm/slub.c linux-4.8.15/mm/slub.c
  21901. --- linux-4.8.15.orig/mm/slub.c 2016-12-15 17:50:48.000000000 +0100
  21902. +++ linux-4.8.15/mm/slub.c 2017-01-01 17:07:16.111432108 +0100
  21903. @@ -1145,7 +1145,7 @@
  21904. unsigned long uninitialized_var(flags);
  21905. int ret = 0;
  21906. - spin_lock_irqsave(&n->list_lock, flags);
  21907. + raw_spin_lock_irqsave(&n->list_lock, flags);
  21908. slab_lock(page);
  21909. if (s->flags & SLAB_CONSISTENCY_CHECKS) {
  21910. @@ -1180,7 +1180,7 @@
  21911. bulk_cnt, cnt);
  21912. slab_unlock(page);
  21913. - spin_unlock_irqrestore(&n->list_lock, flags);
  21914. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  21915. if (!ret)
  21916. slab_fix(s, "Object at 0x%p not freed", object);
  21917. return ret;
  21918. @@ -1308,6 +1308,12 @@
  21919. #endif /* CONFIG_SLUB_DEBUG */
  21920. +struct slub_free_list {
  21921. + raw_spinlock_t lock;
  21922. + struct list_head list;
  21923. +};
  21924. +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
  21925. +
  21926. /*
  21927. * Hooks for other subsystems that check memory allocations. In a typical
  21928. * production configuration these hooks all should produce no code at all.
  21929. @@ -1527,10 +1533,17 @@
  21930. void *start, *p;
  21931. int idx, order;
  21932. bool shuffle;
  21933. + bool enableirqs = false;
  21934. flags &= gfp_allowed_mask;
  21935. if (gfpflags_allow_blocking(flags))
  21936. + enableirqs = true;
  21937. +#ifdef CONFIG_PREEMPT_RT_FULL
  21938. + if (system_state == SYSTEM_RUNNING)
  21939. + enableirqs = true;
  21940. +#endif
  21941. + if (enableirqs)
  21942. local_irq_enable();
  21943. flags |= s->allocflags;
  21944. @@ -1605,7 +1618,7 @@
  21945. page->frozen = 1;
  21946. out:
  21947. - if (gfpflags_allow_blocking(flags))
  21948. + if (enableirqs)
  21949. local_irq_disable();
  21950. if (!page)
  21951. return NULL;
  21952. @@ -1664,6 +1677,16 @@
  21953. __free_pages(page, order);
  21954. }
  21955. +static void free_delayed(struct list_head *h)
  21956. +{
  21957. + while(!list_empty(h)) {
  21958. + struct page *page = list_first_entry(h, struct page, lru);
  21959. +
  21960. + list_del(&page->lru);
  21961. + __free_slab(page->slab_cache, page);
  21962. + }
  21963. +}
  21964. +
  21965. #define need_reserve_slab_rcu \
  21966. (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
  21967. @@ -1695,6 +1718,12 @@
  21968. }
  21969. call_rcu(head, rcu_free_slab);
  21970. + } else if (irqs_disabled()) {
  21971. + struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
  21972. +
  21973. + raw_spin_lock(&f->lock);
  21974. + list_add(&page->lru, &f->list);
  21975. + raw_spin_unlock(&f->lock);
  21976. } else
  21977. __free_slab(s, page);
  21978. }
  21979. @@ -1802,7 +1831,7 @@
  21980. if (!n || !n->nr_partial)
  21981. return NULL;
  21982. - spin_lock(&n->list_lock);
  21983. + raw_spin_lock(&n->list_lock);
  21984. list_for_each_entry_safe(page, page2, &n->partial, lru) {
  21985. void *t;
  21986. @@ -1827,7 +1856,7 @@
  21987. break;
  21988. }
  21989. - spin_unlock(&n->list_lock);
  21990. + raw_spin_unlock(&n->list_lock);
  21991. return object;
  21992. }
  21993. @@ -2073,7 +2102,7 @@
  21994. * that acquire_slab() will see a slab page that
  21995. * is frozen
  21996. */
  21997. - spin_lock(&n->list_lock);
  21998. + raw_spin_lock(&n->list_lock);
  21999. }
  22000. } else {
  22001. m = M_FULL;
  22002. @@ -2084,7 +2113,7 @@
  22003. * slabs from diagnostic functions will not see
  22004. * any frozen slabs.
  22005. */
  22006. - spin_lock(&n->list_lock);
  22007. + raw_spin_lock(&n->list_lock);
  22008. }
  22009. }
  22010. @@ -2119,7 +2148,7 @@
  22011. goto redo;
  22012. if (lock)
  22013. - spin_unlock(&n->list_lock);
  22014. + raw_spin_unlock(&n->list_lock);
  22015. if (m == M_FREE) {
  22016. stat(s, DEACTIVATE_EMPTY);
  22017. @@ -2151,10 +2180,10 @@
  22018. n2 = get_node(s, page_to_nid(page));
  22019. if (n != n2) {
  22020. if (n)
  22021. - spin_unlock(&n->list_lock);
  22022. + raw_spin_unlock(&n->list_lock);
  22023. n = n2;
  22024. - spin_lock(&n->list_lock);
  22025. + raw_spin_lock(&n->list_lock);
  22026. }
  22027. do {
  22028. @@ -2183,7 +2212,7 @@
  22029. }
  22030. if (n)
  22031. - spin_unlock(&n->list_lock);
  22032. + raw_spin_unlock(&n->list_lock);
  22033. while (discard_page) {
  22034. page = discard_page;
  22035. @@ -2222,14 +2251,21 @@
  22036. pobjects = oldpage->pobjects;
  22037. pages = oldpage->pages;
  22038. if (drain && pobjects > s->cpu_partial) {
  22039. + struct slub_free_list *f;
  22040. unsigned long flags;
  22041. + LIST_HEAD(tofree);
  22042. /*
  22043. * partial array is full. Move the existing
  22044. * set to the per node partial list.
  22045. */
  22046. local_irq_save(flags);
  22047. unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
  22048. + f = this_cpu_ptr(&slub_free_list);
  22049. + raw_spin_lock(&f->lock);
  22050. + list_splice_init(&f->list, &tofree);
  22051. + raw_spin_unlock(&f->lock);
  22052. local_irq_restore(flags);
  22053. + free_delayed(&tofree);
  22054. oldpage = NULL;
  22055. pobjects = 0;
  22056. pages = 0;
  22057. @@ -2301,7 +2337,22 @@
  22058. static void flush_all(struct kmem_cache *s)
  22059. {
  22060. + LIST_HEAD(tofree);
  22061. + int cpu;
  22062. +
  22063. on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
  22064. + for_each_online_cpu(cpu) {
  22065. + struct slub_free_list *f;
  22066. +
  22067. + if (!has_cpu_slab(cpu, s))
  22068. + continue;
  22069. +
  22070. + f = &per_cpu(slub_free_list, cpu);
  22071. + raw_spin_lock_irq(&f->lock);
  22072. + list_splice_init(&f->list, &tofree);
  22073. + raw_spin_unlock_irq(&f->lock);
  22074. + free_delayed(&tofree);
  22075. + }
  22076. }
  22077. /*
  22078. @@ -2337,10 +2388,10 @@
  22079. unsigned long x = 0;
  22080. struct page *page;
  22081. - spin_lock_irqsave(&n->list_lock, flags);
  22082. + raw_spin_lock_irqsave(&n->list_lock, flags);
  22083. list_for_each_entry(page, &n->partial, lru)
  22084. x += get_count(page);
  22085. - spin_unlock_irqrestore(&n->list_lock, flags);
  22086. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  22087. return x;
  22088. }
  22089. #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
  22090. @@ -2478,8 +2529,10 @@
  22091. * already disabled (which is the case for bulk allocation).
  22092. */
  22093. static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  22094. - unsigned long addr, struct kmem_cache_cpu *c)
  22095. + unsigned long addr, struct kmem_cache_cpu *c,
  22096. + struct list_head *to_free)
  22097. {
  22098. + struct slub_free_list *f;
  22099. void *freelist;
  22100. struct page *page;
  22101. @@ -2539,6 +2592,13 @@
  22102. VM_BUG_ON(!c->page->frozen);
  22103. c->freelist = get_freepointer(s, freelist);
  22104. c->tid = next_tid(c->tid);
  22105. +
  22106. +out:
  22107. + f = this_cpu_ptr(&slub_free_list);
  22108. + raw_spin_lock(&f->lock);
  22109. + list_splice_init(&f->list, to_free);
  22110. + raw_spin_unlock(&f->lock);
  22111. +
  22112. return freelist;
  22113. new_slab:
  22114. @@ -2570,7 +2630,7 @@
  22115. deactivate_slab(s, page, get_freepointer(s, freelist));
  22116. c->page = NULL;
  22117. c->freelist = NULL;
  22118. - return freelist;
  22119. + goto out;
  22120. }
  22121. /*
  22122. @@ -2582,6 +2642,7 @@
  22123. {
  22124. void *p;
  22125. unsigned long flags;
  22126. + LIST_HEAD(tofree);
  22127. local_irq_save(flags);
  22128. #ifdef CONFIG_PREEMPT
  22129. @@ -2593,8 +2654,9 @@
  22130. c = this_cpu_ptr(s->cpu_slab);
  22131. #endif
  22132. - p = ___slab_alloc(s, gfpflags, node, addr, c);
  22133. + p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
  22134. local_irq_restore(flags);
  22135. + free_delayed(&tofree);
  22136. return p;
  22137. }
  22138. @@ -2780,7 +2842,7 @@
  22139. do {
  22140. if (unlikely(n)) {
  22141. - spin_unlock_irqrestore(&n->list_lock, flags);
  22142. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  22143. n = NULL;
  22144. }
  22145. prior = page->freelist;
  22146. @@ -2812,7 +2874,7 @@
  22147. * Otherwise the list_lock will synchronize with
  22148. * other processors updating the list of slabs.
  22149. */
  22150. - spin_lock_irqsave(&n->list_lock, flags);
  22151. + raw_spin_lock_irqsave(&n->list_lock, flags);
  22152. }
  22153. }
  22154. @@ -2854,7 +2916,7 @@
  22155. add_partial(n, page, DEACTIVATE_TO_TAIL);
  22156. stat(s, FREE_ADD_PARTIAL);
  22157. }
  22158. - spin_unlock_irqrestore(&n->list_lock, flags);
  22159. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  22160. return;
  22161. slab_empty:
  22162. @@ -2869,7 +2931,7 @@
  22163. remove_full(s, n, page);
  22164. }
  22165. - spin_unlock_irqrestore(&n->list_lock, flags);
  22166. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  22167. stat(s, FREE_SLAB);
  22168. discard_slab(s, page);
  22169. }
  22170. @@ -3074,6 +3136,7 @@
  22171. void **p)
  22172. {
  22173. struct kmem_cache_cpu *c;
  22174. + LIST_HEAD(to_free);
  22175. int i;
  22176. /* memcg and kmem_cache debug support */
  22177. @@ -3097,7 +3160,7 @@
  22178. * of re-populating per CPU c->freelist
  22179. */
  22180. p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
  22181. - _RET_IP_, c);
  22182. + _RET_IP_, c, &to_free);
  22183. if (unlikely(!p[i]))
  22184. goto error;
  22185. @@ -3109,6 +3172,7 @@
  22186. }
  22187. c->tid = next_tid(c->tid);
  22188. local_irq_enable();
  22189. + free_delayed(&to_free);
  22190. /* Clear memory outside IRQ disabled fastpath loop */
  22191. if (unlikely(flags & __GFP_ZERO)) {
  22192. @@ -3256,7 +3320,7 @@
  22193. init_kmem_cache_node(struct kmem_cache_node *n)
  22194. {
  22195. n->nr_partial = 0;
  22196. - spin_lock_init(&n->list_lock);
  22197. + raw_spin_lock_init(&n->list_lock);
  22198. INIT_LIST_HEAD(&n->partial);
  22199. #ifdef CONFIG_SLUB_DEBUG
  22200. atomic_long_set(&n->nr_slabs, 0);
  22201. @@ -3600,6 +3664,10 @@
  22202. const char *text)
  22203. {
  22204. #ifdef CONFIG_SLUB_DEBUG
  22205. +#ifdef CONFIG_PREEMPT_RT_BASE
  22206. + /* XXX move out of irq-off section */
  22207. + slab_err(s, page, text, s->name);
  22208. +#else
  22209. void *addr = page_address(page);
  22210. void *p;
  22211. unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
  22212. @@ -3620,6 +3688,7 @@
  22213. slab_unlock(page);
  22214. kfree(map);
  22215. #endif
  22216. +#endif
  22217. }
  22218. /*
  22219. @@ -3633,7 +3702,7 @@
  22220. struct page *page, *h;
  22221. BUG_ON(irqs_disabled());
  22222. - spin_lock_irq(&n->list_lock);
  22223. + raw_spin_lock_irq(&n->list_lock);
  22224. list_for_each_entry_safe(page, h, &n->partial, lru) {
  22225. if (!page->inuse) {
  22226. remove_partial(n, page);
  22227. @@ -3643,7 +3712,7 @@
  22228. "Objects remaining in %s on __kmem_cache_shutdown()");
  22229. }
  22230. }
  22231. - spin_unlock_irq(&n->list_lock);
  22232. + raw_spin_unlock_irq(&n->list_lock);
  22233. list_for_each_entry_safe(page, h, &discard, lru)
  22234. discard_slab(s, page);
  22235. @@ -3901,7 +3970,7 @@
  22236. for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
  22237. INIT_LIST_HEAD(promote + i);
  22238. - spin_lock_irqsave(&n->list_lock, flags);
  22239. + raw_spin_lock_irqsave(&n->list_lock, flags);
  22240. /*
  22241. * Build lists of slabs to discard or promote.
  22242. @@ -3932,7 +4001,7 @@
  22243. for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
  22244. list_splice(promote + i, &n->partial);
  22245. - spin_unlock_irqrestore(&n->list_lock, flags);
  22246. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  22247. /* Release empty slabs */
  22248. list_for_each_entry_safe(page, t, &discard, lru)
  22249. @@ -4108,6 +4177,12 @@
  22250. {
  22251. static __initdata struct kmem_cache boot_kmem_cache,
  22252. boot_kmem_cache_node;
  22253. + int cpu;
  22254. +
  22255. + for_each_possible_cpu(cpu) {
  22256. + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
  22257. + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
  22258. + }
  22259. if (debug_guardpage_minorder())
  22260. slub_max_order = 0;
  22261. @@ -4354,7 +4429,7 @@
  22262. struct page *page;
  22263. unsigned long flags;
  22264. - spin_lock_irqsave(&n->list_lock, flags);
  22265. + raw_spin_lock_irqsave(&n->list_lock, flags);
  22266. list_for_each_entry(page, &n->partial, lru) {
  22267. validate_slab_slab(s, page, map);
  22268. @@ -4376,7 +4451,7 @@
  22269. s->name, count, atomic_long_read(&n->nr_slabs));
  22270. out:
  22271. - spin_unlock_irqrestore(&n->list_lock, flags);
  22272. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  22273. return count;
  22274. }
  22275. @@ -4564,12 +4639,12 @@
  22276. if (!atomic_long_read(&n->nr_slabs))
  22277. continue;
  22278. - spin_lock_irqsave(&n->list_lock, flags);
  22279. + raw_spin_lock_irqsave(&n->list_lock, flags);
  22280. list_for_each_entry(page, &n->partial, lru)
  22281. process_slab(&t, s, page, alloc, map);
  22282. list_for_each_entry(page, &n->full, lru)
  22283. process_slab(&t, s, page, alloc, map);
  22284. - spin_unlock_irqrestore(&n->list_lock, flags);
  22285. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  22286. }
  22287. for (i = 0; i < t.count; i++) {
  22288. diff -Nur linux-4.8.15.orig/mm/swap.c linux-4.8.15/mm/swap.c
  22289. --- linux-4.8.15.orig/mm/swap.c 2016-12-15 17:50:48.000000000 +0100
  22290. +++ linux-4.8.15/mm/swap.c 2017-01-01 17:07:16.111432108 +0100
  22291. @@ -32,6 +32,7 @@
  22292. #include <linux/memcontrol.h>
  22293. #include <linux/gfp.h>
  22294. #include <linux/uio.h>
  22295. +#include <linux/locallock.h>
  22296. #include <linux/hugetlb.h>
  22297. #include <linux/page_idle.h>
  22298. @@ -50,6 +51,8 @@
  22299. #ifdef CONFIG_SMP
  22300. static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
  22301. #endif
  22302. +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
  22303. +DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
  22304. /*
  22305. * This path almost never happens for VM activity - pages are normally
  22306. @@ -240,11 +243,11 @@
  22307. unsigned long flags;
  22308. get_page(page);
  22309. - local_irq_save(flags);
  22310. + local_lock_irqsave(rotate_lock, flags);
  22311. pvec = this_cpu_ptr(&lru_rotate_pvecs);
  22312. if (!pagevec_add(pvec, page) || PageCompound(page))
  22313. pagevec_move_tail(pvec);
  22314. - local_irq_restore(flags);
  22315. + local_unlock_irqrestore(rotate_lock, flags);
  22316. }
  22317. }
  22318. @@ -294,12 +297,13 @@
  22319. {
  22320. page = compound_head(page);
  22321. if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  22322. - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
  22323. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  22324. + activate_page_pvecs);
  22325. get_page(page);
  22326. if (!pagevec_add(pvec, page) || PageCompound(page))
  22327. pagevec_lru_move_fn(pvec, __activate_page, NULL);
  22328. - put_cpu_var(activate_page_pvecs);
  22329. + put_locked_var(swapvec_lock, activate_page_pvecs);
  22330. }
  22331. }
  22332. @@ -326,7 +330,7 @@
  22333. static void __lru_cache_activate_page(struct page *page)
  22334. {
  22335. - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  22336. + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
  22337. int i;
  22338. /*
  22339. @@ -348,7 +352,7 @@
  22340. }
  22341. }
  22342. - put_cpu_var(lru_add_pvec);
  22343. + put_locked_var(swapvec_lock, lru_add_pvec);
  22344. }
  22345. /*
  22346. @@ -390,12 +394,12 @@
  22347. static void __lru_cache_add(struct page *page)
  22348. {
  22349. - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  22350. + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
  22351. get_page(page);
  22352. if (!pagevec_add(pvec, page) || PageCompound(page))
  22353. __pagevec_lru_add(pvec);
  22354. - put_cpu_var(lru_add_pvec);
  22355. + put_locked_var(swapvec_lock, lru_add_pvec);
  22356. }
  22357. /**
  22358. @@ -593,9 +597,15 @@
  22359. unsigned long flags;
  22360. /* No harm done if a racing interrupt already did this */
  22361. - local_irq_save(flags);
  22362. +#ifdef CONFIG_PREEMPT_RT_BASE
  22363. + local_lock_irqsave_on(rotate_lock, flags, cpu);
  22364. pagevec_move_tail(pvec);
  22365. - local_irq_restore(flags);
  22366. + local_unlock_irqrestore_on(rotate_lock, flags, cpu);
  22367. +#else
  22368. + local_lock_irqsave(rotate_lock, flags);
  22369. + pagevec_move_tail(pvec);
  22370. + local_unlock_irqrestore(rotate_lock, flags);
  22371. +#endif
  22372. }
  22373. pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
  22374. @@ -627,11 +637,12 @@
  22375. return;
  22376. if (likely(get_page_unless_zero(page))) {
  22377. - struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
  22378. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  22379. + lru_deactivate_file_pvecs);
  22380. if (!pagevec_add(pvec, page) || PageCompound(page))
  22381. pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
  22382. - put_cpu_var(lru_deactivate_file_pvecs);
  22383. + put_locked_var(swapvec_lock, lru_deactivate_file_pvecs);
  22384. }
  22385. }
  22386. @@ -646,27 +657,31 @@
  22387. void deactivate_page(struct page *page)
  22388. {
  22389. if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
  22390. - struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
  22391. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  22392. + lru_deactivate_pvecs);
  22393. get_page(page);
  22394. if (!pagevec_add(pvec, page) || PageCompound(page))
  22395. pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
  22396. - put_cpu_var(lru_deactivate_pvecs);
  22397. + put_locked_var(swapvec_lock, lru_deactivate_pvecs);
  22398. }
  22399. }
  22400. void lru_add_drain(void)
  22401. {
  22402. - lru_add_drain_cpu(get_cpu());
  22403. - put_cpu();
  22404. + lru_add_drain_cpu(local_lock_cpu(swapvec_lock));
  22405. + local_unlock_cpu(swapvec_lock);
  22406. }
  22407. -static void lru_add_drain_per_cpu(struct work_struct *dummy)
  22408. +#ifdef CONFIG_PREEMPT_RT_BASE
  22409. +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
  22410. {
  22411. - lru_add_drain();
  22412. + local_lock_on(swapvec_lock, cpu);
  22413. + lru_add_drain_cpu(cpu);
  22414. + local_unlock_on(swapvec_lock, cpu);
  22415. }
  22416. -static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
  22417. +#else
  22418. /*
  22419. * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
  22420. @@ -686,6 +701,22 @@
  22421. }
  22422. early_initcall(lru_init);
  22423. +static void lru_add_drain_per_cpu(struct work_struct *dummy)
  22424. +{
  22425. + lru_add_drain();
  22426. +}
  22427. +
  22428. +static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
  22429. +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
  22430. +{
  22431. + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  22432. +
  22433. + INIT_WORK(work, lru_add_drain_per_cpu);
  22434. + queue_work_on(cpu, lru_add_drain_wq, work);
  22435. + cpumask_set_cpu(cpu, has_work);
  22436. +}
  22437. +#endif
  22438. +
  22439. void lru_add_drain_all(void)
  22440. {
  22441. static DEFINE_MUTEX(lock);
  22442. @@ -697,21 +728,18 @@
  22443. cpumask_clear(&has_work);
  22444. for_each_online_cpu(cpu) {
  22445. - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  22446. -
  22447. if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
  22448. pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
  22449. pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
  22450. pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
  22451. - need_activate_page_drain(cpu)) {
  22452. - INIT_WORK(work, lru_add_drain_per_cpu);
  22453. - queue_work_on(cpu, lru_add_drain_wq, work);
  22454. - cpumask_set_cpu(cpu, &has_work);
  22455. - }
  22456. + need_activate_page_drain(cpu))
  22457. + remote_lru_add_drain(cpu, &has_work);
  22458. }
  22459. +#ifndef CONFIG_PREEMPT_RT_BASE
  22460. for_each_cpu(cpu, &has_work)
  22461. flush_work(&per_cpu(lru_add_drain_work, cpu));
  22462. +#endif
  22463. put_online_cpus();
  22464. mutex_unlock(&lock);
  22465. diff -Nur linux-4.8.15.orig/mm/truncate.c linux-4.8.15/mm/truncate.c
  22466. --- linux-4.8.15.orig/mm/truncate.c 2016-12-15 17:50:48.000000000 +0100
  22467. +++ linux-4.8.15/mm/truncate.c 2017-01-01 17:07:16.115432374 +0100
  22468. @@ -62,9 +62,12 @@
  22469. * protected by mapping->tree_lock.
  22470. */
  22471. if (!workingset_node_shadows(node) &&
  22472. - !list_empty(&node->private_list))
  22473. - list_lru_del(&workingset_shadow_nodes,
  22474. + !list_empty(&node->private_list)) {
  22475. + local_lock(workingset_shadow_lock);
  22476. + list_lru_del(&__workingset_shadow_nodes,
  22477. &node->private_list);
  22478. + local_unlock(workingset_shadow_lock);
  22479. + }
  22480. __radix_tree_delete_node(&mapping->page_tree, node);
  22481. unlock:
  22482. spin_unlock_irq(&mapping->tree_lock);
  22483. diff -Nur linux-4.8.15.orig/mm/vmalloc.c linux-4.8.15/mm/vmalloc.c
  22484. --- linux-4.8.15.orig/mm/vmalloc.c 2016-12-15 17:50:48.000000000 +0100
  22485. +++ linux-4.8.15/mm/vmalloc.c 2017-01-01 17:07:16.115432374 +0100
  22486. @@ -845,7 +845,7 @@
  22487. struct vmap_block *vb;
  22488. struct vmap_area *va;
  22489. unsigned long vb_idx;
  22490. - int node, err;
  22491. + int node, err, cpu;
  22492. void *vaddr;
  22493. node = numa_node_id();
  22494. @@ -888,11 +888,12 @@
  22495. BUG_ON(err);
  22496. radix_tree_preload_end();
  22497. - vbq = &get_cpu_var(vmap_block_queue);
  22498. + cpu = get_cpu_light();
  22499. + vbq = this_cpu_ptr(&vmap_block_queue);
  22500. spin_lock(&vbq->lock);
  22501. list_add_tail_rcu(&vb->free_list, &vbq->free);
  22502. spin_unlock(&vbq->lock);
  22503. - put_cpu_var(vmap_block_queue);
  22504. + put_cpu_light();
  22505. return vaddr;
  22506. }
  22507. @@ -961,6 +962,7 @@
  22508. struct vmap_block *vb;
  22509. void *vaddr = NULL;
  22510. unsigned int order;
  22511. + int cpu;
  22512. BUG_ON(offset_in_page(size));
  22513. BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
  22514. @@ -975,7 +977,8 @@
  22515. order = get_order(size);
  22516. rcu_read_lock();
  22517. - vbq = &get_cpu_var(vmap_block_queue);
  22518. + cpu = get_cpu_light();
  22519. + vbq = this_cpu_ptr(&vmap_block_queue);
  22520. list_for_each_entry_rcu(vb, &vbq->free, free_list) {
  22521. unsigned long pages_off;
  22522. @@ -998,7 +1001,7 @@
  22523. break;
  22524. }
  22525. - put_cpu_var(vmap_block_queue);
  22526. + put_cpu_light();
  22527. rcu_read_unlock();
  22528. /* Allocate new block if nothing was found */
  22529. diff -Nur linux-4.8.15.orig/mm/vmstat.c linux-4.8.15/mm/vmstat.c
  22530. --- linux-4.8.15.orig/mm/vmstat.c 2016-12-15 17:50:48.000000000 +0100
  22531. +++ linux-4.8.15/mm/vmstat.c 2017-01-01 17:07:16.115432374 +0100
  22532. @@ -245,6 +245,7 @@
  22533. long x;
  22534. long t;
  22535. + preempt_disable_rt();
  22536. x = delta + __this_cpu_read(*p);
  22537. t = __this_cpu_read(pcp->stat_threshold);
  22538. @@ -254,6 +255,7 @@
  22539. x = 0;
  22540. }
  22541. __this_cpu_write(*p, x);
  22542. + preempt_enable_rt();
  22543. }
  22544. EXPORT_SYMBOL(__mod_zone_page_state);
  22545. @@ -265,6 +267,7 @@
  22546. long x;
  22547. long t;
  22548. + preempt_disable_rt();
  22549. x = delta + __this_cpu_read(*p);
  22550. t = __this_cpu_read(pcp->stat_threshold);
  22551. @@ -274,6 +277,7 @@
  22552. x = 0;
  22553. }
  22554. __this_cpu_write(*p, x);
  22555. + preempt_enable_rt();
  22556. }
  22557. EXPORT_SYMBOL(__mod_node_page_state);
  22558. @@ -306,6 +310,7 @@
  22559. s8 __percpu *p = pcp->vm_stat_diff + item;
  22560. s8 v, t;
  22561. + preempt_disable_rt();
  22562. v = __this_cpu_inc_return(*p);
  22563. t = __this_cpu_read(pcp->stat_threshold);
  22564. if (unlikely(v > t)) {
  22565. @@ -314,6 +319,7 @@
  22566. zone_page_state_add(v + overstep, zone, item);
  22567. __this_cpu_write(*p, -overstep);
  22568. }
  22569. + preempt_enable_rt();
  22570. }
  22571. void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
  22572. @@ -322,6 +328,7 @@
  22573. s8 __percpu *p = pcp->vm_node_stat_diff + item;
  22574. s8 v, t;
  22575. + preempt_disable_rt();
  22576. v = __this_cpu_inc_return(*p);
  22577. t = __this_cpu_read(pcp->stat_threshold);
  22578. if (unlikely(v > t)) {
  22579. @@ -330,6 +337,7 @@
  22580. node_page_state_add(v + overstep, pgdat, item);
  22581. __this_cpu_write(*p, -overstep);
  22582. }
  22583. + preempt_enable_rt();
  22584. }
  22585. void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
  22586. @@ -350,6 +358,7 @@
  22587. s8 __percpu *p = pcp->vm_stat_diff + item;
  22588. s8 v, t;
  22589. + preempt_disable_rt();
  22590. v = __this_cpu_dec_return(*p);
  22591. t = __this_cpu_read(pcp->stat_threshold);
  22592. if (unlikely(v < - t)) {
  22593. @@ -358,6 +367,7 @@
  22594. zone_page_state_add(v - overstep, zone, item);
  22595. __this_cpu_write(*p, overstep);
  22596. }
  22597. + preempt_enable_rt();
  22598. }
  22599. void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
  22600. @@ -366,6 +376,7 @@
  22601. s8 __percpu *p = pcp->vm_node_stat_diff + item;
  22602. s8 v, t;
  22603. + preempt_disable_rt();
  22604. v = __this_cpu_dec_return(*p);
  22605. t = __this_cpu_read(pcp->stat_threshold);
  22606. if (unlikely(v < - t)) {
  22607. @@ -374,6 +385,7 @@
  22608. node_page_state_add(v - overstep, pgdat, item);
  22609. __this_cpu_write(*p, overstep);
  22610. }
  22611. + preempt_enable_rt();
  22612. }
  22613. void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
  22614. diff -Nur linux-4.8.15.orig/mm/workingset.c linux-4.8.15/mm/workingset.c
  22615. --- linux-4.8.15.orig/mm/workingset.c 2016-12-15 17:50:48.000000000 +0100
  22616. +++ linux-4.8.15/mm/workingset.c 2017-01-01 17:07:16.115432374 +0100
  22617. @@ -334,7 +334,8 @@
  22618. * point where they would still be useful.
  22619. */
  22620. -struct list_lru workingset_shadow_nodes;
  22621. +struct list_lru __workingset_shadow_nodes;
  22622. +DEFINE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
  22623. static unsigned long count_shadow_nodes(struct shrinker *shrinker,
  22624. struct shrink_control *sc)
  22625. @@ -344,9 +345,9 @@
  22626. unsigned long pages;
  22627. /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
  22628. - local_irq_disable();
  22629. - shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc);
  22630. - local_irq_enable();
  22631. + local_lock_irq(workingset_shadow_lock);
  22632. + shadow_nodes = list_lru_shrink_count(&__workingset_shadow_nodes, sc);
  22633. + local_unlock_irq(workingset_shadow_lock);
  22634. if (sc->memcg) {
  22635. pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
  22636. @@ -438,9 +439,9 @@
  22637. spin_unlock(&mapping->tree_lock);
  22638. ret = LRU_REMOVED_RETRY;
  22639. out:
  22640. - local_irq_enable();
  22641. + local_unlock_irq(workingset_shadow_lock);
  22642. cond_resched();
  22643. - local_irq_disable();
  22644. + local_lock_irq(workingset_shadow_lock);
  22645. spin_lock(lru_lock);
  22646. return ret;
  22647. }
  22648. @@ -451,10 +452,10 @@
  22649. unsigned long ret;
  22650. /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
  22651. - local_irq_disable();
  22652. - ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc,
  22653. + local_lock_irq(workingset_shadow_lock);
  22654. + ret = list_lru_shrink_walk(&__workingset_shadow_nodes, sc,
  22655. shadow_lru_isolate, NULL);
  22656. - local_irq_enable();
  22657. + local_unlock_irq(workingset_shadow_lock);
  22658. return ret;
  22659. }
  22660. @@ -492,7 +493,7 @@
  22661. pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
  22662. timestamp_bits, max_order, bucket_order);
  22663. - ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);
  22664. + ret = list_lru_init_key(&__workingset_shadow_nodes, &shadow_nodes_key);
  22665. if (ret)
  22666. goto err;
  22667. ret = register_shrinker(&workingset_shadow_shrinker);
  22668. @@ -500,7 +501,7 @@
  22669. goto err_list_lru;
  22670. return 0;
  22671. err_list_lru:
  22672. - list_lru_destroy(&workingset_shadow_nodes);
  22673. + list_lru_destroy(&__workingset_shadow_nodes);
  22674. err:
  22675. return ret;
  22676. }
  22677. diff -Nur linux-4.8.15.orig/mm/zsmalloc.c linux-4.8.15/mm/zsmalloc.c
  22678. --- linux-4.8.15.orig/mm/zsmalloc.c 2016-12-15 17:50:48.000000000 +0100
  22679. +++ linux-4.8.15/mm/zsmalloc.c 2017-01-01 17:07:16.115432374 +0100
  22680. @@ -53,6 +53,7 @@
  22681. #include <linux/mount.h>
  22682. #include <linux/migrate.h>
  22683. #include <linux/pagemap.h>
  22684. +#include <linux/locallock.h>
  22685. #define ZSPAGE_MAGIC 0x58
  22686. @@ -70,9 +71,22 @@
  22687. */
  22688. #define ZS_MAX_ZSPAGE_ORDER 2
  22689. #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
  22690. -
  22691. #define ZS_HANDLE_SIZE (sizeof(unsigned long))
  22692. +#ifdef CONFIG_PREEMPT_RT_FULL
  22693. +
  22694. +struct zsmalloc_handle {
  22695. + unsigned long addr;
  22696. + struct mutex lock;
  22697. +};
  22698. +
  22699. +#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
  22700. +
  22701. +#else
  22702. +
  22703. +#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long))
  22704. +#endif
  22705. +
  22706. /*
  22707. * Object location (<PFN>, <obj_idx>) is encoded as
  22708. * as single (unsigned long) handle value.
  22709. @@ -327,7 +341,7 @@
  22710. static int create_cache(struct zs_pool *pool)
  22711. {
  22712. - pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
  22713. + pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE,
  22714. 0, 0, NULL);
  22715. if (!pool->handle_cachep)
  22716. return 1;
  22717. @@ -351,10 +365,27 @@
  22718. static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
  22719. {
  22720. - return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
  22721. - gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
  22722. + void *p;
  22723. +
  22724. + p = kmem_cache_alloc(pool->handle_cachep,
  22725. + gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
  22726. +#ifdef CONFIG_PREEMPT_RT_FULL
  22727. + if (p) {
  22728. + struct zsmalloc_handle *zh = p;
  22729. +
  22730. + mutex_init(&zh->lock);
  22731. + }
  22732. +#endif
  22733. + return (unsigned long)p;
  22734. }
  22735. +#ifdef CONFIG_PREEMPT_RT_FULL
  22736. +static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
  22737. +{
  22738. + return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1));
  22739. +}
  22740. +#endif
  22741. +
  22742. static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
  22743. {
  22744. kmem_cache_free(pool->handle_cachep, (void *)handle);
  22745. @@ -373,12 +404,18 @@
  22746. static void record_obj(unsigned long handle, unsigned long obj)
  22747. {
  22748. +#ifdef CONFIG_PREEMPT_RT_FULL
  22749. + struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
  22750. +
  22751. + WRITE_ONCE(zh->addr, obj);
  22752. +#else
  22753. /*
  22754. * lsb of @obj represents handle lock while other bits
  22755. * represent object value the handle is pointing so
  22756. * updating shouldn't do store tearing.
  22757. */
  22758. WRITE_ONCE(*(unsigned long *)handle, obj);
  22759. +#endif
  22760. }
  22761. /* zpool driver */
  22762. @@ -467,6 +504,7 @@
  22763. /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
  22764. static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
  22765. +static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock);
  22766. static bool is_zspage_isolated(struct zspage *zspage)
  22767. {
  22768. @@ -902,7 +940,13 @@
  22769. static unsigned long handle_to_obj(unsigned long handle)
  22770. {
  22771. +#ifdef CONFIG_PREEMPT_RT_FULL
  22772. + struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
  22773. +
  22774. + return zh->addr;
  22775. +#else
  22776. return *(unsigned long *)handle;
  22777. +#endif
  22778. }
  22779. static unsigned long obj_to_head(struct page *page, void *obj)
  22780. @@ -916,22 +960,46 @@
  22781. static inline int testpin_tag(unsigned long handle)
  22782. {
  22783. +#ifdef CONFIG_PREEMPT_RT_FULL
  22784. + struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
  22785. +
  22786. + return mutex_is_locked(&zh->lock);
  22787. +#else
  22788. return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
  22789. +#endif
  22790. }
  22791. static inline int trypin_tag(unsigned long handle)
  22792. {
  22793. +#ifdef CONFIG_PREEMPT_RT_FULL
  22794. + struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
  22795. +
  22796. + return mutex_trylock(&zh->lock);
  22797. +#else
  22798. return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
  22799. +#endif
  22800. }
  22801. static void pin_tag(unsigned long handle)
  22802. {
  22803. +#ifdef CONFIG_PREEMPT_RT_FULL
  22804. + struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
  22805. +
  22806. + return mutex_lock(&zh->lock);
  22807. +#else
  22808. bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
  22809. +#endif
  22810. }
  22811. static void unpin_tag(unsigned long handle)
  22812. {
  22813. +#ifdef CONFIG_PREEMPT_RT_FULL
  22814. + struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
  22815. +
  22816. + return mutex_unlock(&zh->lock);
  22817. +#else
  22818. bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
  22819. +#endif
  22820. }
  22821. static void reset_page(struct page *page)
  22822. @@ -1423,7 +1491,7 @@
  22823. class = pool->size_class[class_idx];
  22824. off = (class->size * obj_idx) & ~PAGE_MASK;
  22825. - area = &get_cpu_var(zs_map_area);
  22826. + area = &get_locked_var(zs_map_area_lock, zs_map_area);
  22827. area->vm_mm = mm;
  22828. if (off + class->size <= PAGE_SIZE) {
  22829. /* this object is contained entirely within a page */
  22830. @@ -1477,7 +1545,7 @@
  22831. __zs_unmap_object(area, pages, off, class->size);
  22832. }
  22833. - put_cpu_var(zs_map_area);
  22834. + put_locked_var(zs_map_area_lock, zs_map_area);
  22835. migrate_read_unlock(zspage);
  22836. unpin_tag(handle);
  22837. diff -Nur linux-4.8.15.orig/net/core/dev.c linux-4.8.15/net/core/dev.c
  22838. --- linux-4.8.15.orig/net/core/dev.c 2016-12-15 17:50:48.000000000 +0100
  22839. +++ linux-4.8.15/net/core/dev.c 2017-01-01 17:07:16.119432629 +0100
  22840. @@ -190,6 +190,7 @@
  22841. static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
  22842. static seqcount_t devnet_rename_seq;
  22843. +static DEFINE_MUTEX(devnet_rename_mutex);
  22844. static inline void dev_base_seq_inc(struct net *net)
  22845. {
  22846. @@ -211,14 +212,14 @@
  22847. static inline void rps_lock(struct softnet_data *sd)
  22848. {
  22849. #ifdef CONFIG_RPS
  22850. - spin_lock(&sd->input_pkt_queue.lock);
  22851. + raw_spin_lock(&sd->input_pkt_queue.raw_lock);
  22852. #endif
  22853. }
  22854. static inline void rps_unlock(struct softnet_data *sd)
  22855. {
  22856. #ifdef CONFIG_RPS
  22857. - spin_unlock(&sd->input_pkt_queue.lock);
  22858. + raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
  22859. #endif
  22860. }
  22861. @@ -888,7 +889,8 @@
  22862. strcpy(name, dev->name);
  22863. rcu_read_unlock();
  22864. if (read_seqcount_retry(&devnet_rename_seq, seq)) {
  22865. - cond_resched();
  22866. + mutex_lock(&devnet_rename_mutex);
  22867. + mutex_unlock(&devnet_rename_mutex);
  22868. goto retry;
  22869. }
  22870. @@ -1157,20 +1159,17 @@
  22871. if (dev->flags & IFF_UP)
  22872. return -EBUSY;
  22873. - write_seqcount_begin(&devnet_rename_seq);
  22874. + mutex_lock(&devnet_rename_mutex);
  22875. + __raw_write_seqcount_begin(&devnet_rename_seq);
  22876. - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
  22877. - write_seqcount_end(&devnet_rename_seq);
  22878. - return 0;
  22879. - }
  22880. + if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
  22881. + goto outunlock;
  22882. memcpy(oldname, dev->name, IFNAMSIZ);
  22883. err = dev_get_valid_name(net, dev, newname);
  22884. - if (err < 0) {
  22885. - write_seqcount_end(&devnet_rename_seq);
  22886. - return err;
  22887. - }
  22888. + if (err < 0)
  22889. + goto outunlock;
  22890. if (oldname[0] && !strchr(oldname, '%'))
  22891. netdev_info(dev, "renamed from %s\n", oldname);
  22892. @@ -1183,11 +1182,12 @@
  22893. if (ret) {
  22894. memcpy(dev->name, oldname, IFNAMSIZ);
  22895. dev->name_assign_type = old_assign_type;
  22896. - write_seqcount_end(&devnet_rename_seq);
  22897. - return ret;
  22898. + err = ret;
  22899. + goto outunlock;
  22900. }
  22901. - write_seqcount_end(&devnet_rename_seq);
  22902. + __raw_write_seqcount_end(&devnet_rename_seq);
  22903. + mutex_unlock(&devnet_rename_mutex);
  22904. netdev_adjacent_rename_links(dev, oldname);
  22905. @@ -1208,7 +1208,8 @@
  22906. /* err >= 0 after dev_alloc_name() or stores the first errno */
  22907. if (err >= 0) {
  22908. err = ret;
  22909. - write_seqcount_begin(&devnet_rename_seq);
  22910. + mutex_lock(&devnet_rename_mutex);
  22911. + __raw_write_seqcount_begin(&devnet_rename_seq);
  22912. memcpy(dev->name, oldname, IFNAMSIZ);
  22913. memcpy(oldname, newname, IFNAMSIZ);
  22914. dev->name_assign_type = old_assign_type;
  22915. @@ -1221,6 +1222,11 @@
  22916. }
  22917. return err;
  22918. +
  22919. +outunlock:
  22920. + __raw_write_seqcount_end(&devnet_rename_seq);
  22921. + mutex_unlock(&devnet_rename_mutex);
  22922. + return err;
  22923. }
  22924. /**
  22925. @@ -2268,6 +2274,7 @@
  22926. sd->output_queue_tailp = &q->next_sched;
  22927. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  22928. local_irq_restore(flags);
  22929. + preempt_check_resched_rt();
  22930. }
  22931. void __netif_schedule(struct Qdisc *q)
  22932. @@ -2349,6 +2356,7 @@
  22933. __this_cpu_write(softnet_data.completion_queue, skb);
  22934. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  22935. local_irq_restore(flags);
  22936. + preempt_check_resched_rt();
  22937. }
  22938. EXPORT_SYMBOL(__dev_kfree_skb_irq);
  22939. @@ -3083,7 +3091,11 @@
  22940. * This permits qdisc->running owner to get the lock more
  22941. * often and dequeue packets faster.
  22942. */
  22943. +#ifdef CONFIG_PREEMPT_RT_FULL
  22944. + contended = true;
  22945. +#else
  22946. contended = qdisc_is_running(q);
  22947. +#endif
  22948. if (unlikely(contended))
  22949. spin_lock(&q->busylock);
  22950. @@ -3146,8 +3158,10 @@
  22951. #define skb_update_prio(skb)
  22952. #endif
  22953. +#ifndef CONFIG_PREEMPT_RT_FULL
  22954. DEFINE_PER_CPU(int, xmit_recursion);
  22955. EXPORT_SYMBOL(xmit_recursion);
  22956. +#endif
  22957. /**
  22958. * dev_loopback_xmit - loop back @skb
  22959. @@ -3391,8 +3405,7 @@
  22960. int cpu = smp_processor_id(); /* ok because BHs are off */
  22961. if (txq->xmit_lock_owner != cpu) {
  22962. - if (unlikely(__this_cpu_read(xmit_recursion) >
  22963. - XMIT_RECURSION_LIMIT))
  22964. + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT))
  22965. goto recursion_alert;
  22966. skb = validate_xmit_skb(skb, dev);
  22967. @@ -3402,9 +3415,9 @@
  22968. HARD_TX_LOCK(dev, txq, cpu);
  22969. if (!netif_xmit_stopped(txq)) {
  22970. - __this_cpu_inc(xmit_recursion);
  22971. + xmit_rec_inc();
  22972. skb = dev_hard_start_xmit(skb, dev, txq, &rc);
  22973. - __this_cpu_dec(xmit_recursion);
  22974. + xmit_rec_dec();
  22975. if (dev_xmit_complete(rc)) {
  22976. HARD_TX_UNLOCK(dev, txq);
  22977. goto out;
  22978. @@ -3778,6 +3791,7 @@
  22979. rps_unlock(sd);
  22980. local_irq_restore(flags);
  22981. + preempt_check_resched_rt();
  22982. atomic_long_inc(&skb->dev->rx_dropped);
  22983. kfree_skb(skb);
  22984. @@ -3796,7 +3810,7 @@
  22985. struct rps_dev_flow voidflow, *rflow = &voidflow;
  22986. int cpu;
  22987. - preempt_disable();
  22988. + migrate_disable();
  22989. rcu_read_lock();
  22990. cpu = get_rps_cpu(skb->dev, skb, &rflow);
  22991. @@ -3806,13 +3820,13 @@
  22992. ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
  22993. rcu_read_unlock();
  22994. - preempt_enable();
  22995. + migrate_enable();
  22996. } else
  22997. #endif
  22998. {
  22999. unsigned int qtail;
  23000. - ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
  23001. - put_cpu();
  23002. + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
  23003. + put_cpu_light();
  23004. }
  23005. return ret;
  23006. }
  23007. @@ -3846,11 +3860,9 @@
  23008. trace_netif_rx_ni_entry(skb);
  23009. - preempt_disable();
  23010. + local_bh_disable();
  23011. err = netif_rx_internal(skb);
  23012. - if (local_softirq_pending())
  23013. - do_softirq();
  23014. - preempt_enable();
  23015. + local_bh_enable();
  23016. return err;
  23017. }
  23018. @@ -4322,7 +4334,7 @@
  23019. skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
  23020. if (skb->dev == dev) {
  23021. __skb_unlink(skb, &sd->input_pkt_queue);
  23022. - kfree_skb(skb);
  23023. + __skb_queue_tail(&sd->tofree_queue, skb);
  23024. input_queue_head_incr(sd);
  23025. }
  23026. }
  23027. @@ -4331,10 +4343,13 @@
  23028. skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
  23029. if (skb->dev == dev) {
  23030. __skb_unlink(skb, &sd->process_queue);
  23031. - kfree_skb(skb);
  23032. + __skb_queue_tail(&sd->tofree_queue, skb);
  23033. input_queue_head_incr(sd);
  23034. }
  23035. }
  23036. +
  23037. + if (!skb_queue_empty(&sd->tofree_queue))
  23038. + raise_softirq_irqoff(NET_RX_SOFTIRQ);
  23039. }
  23040. static int napi_gro_complete(struct sk_buff *skb)
  23041. @@ -4797,6 +4812,7 @@
  23042. sd->rps_ipi_list = NULL;
  23043. local_irq_enable();
  23044. + preempt_check_resched_rt();
  23045. /* Send pending IPI's to kick RPS processing on remote cpus. */
  23046. while (remsd) {
  23047. @@ -4810,6 +4826,7 @@
  23048. } else
  23049. #endif
  23050. local_irq_enable();
  23051. + preempt_check_resched_rt();
  23052. }
  23053. static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
  23054. @@ -4891,9 +4908,11 @@
  23055. local_irq_save(flags);
  23056. ____napi_schedule(this_cpu_ptr(&softnet_data), n);
  23057. local_irq_restore(flags);
  23058. + preempt_check_resched_rt();
  23059. }
  23060. EXPORT_SYMBOL(__napi_schedule);
  23061. +#ifndef CONFIG_PREEMPT_RT_FULL
  23062. /**
  23063. * __napi_schedule_irqoff - schedule for receive
  23064. * @n: entry to schedule
  23065. @@ -4905,6 +4924,7 @@
  23066. ____napi_schedule(this_cpu_ptr(&softnet_data), n);
  23067. }
  23068. EXPORT_SYMBOL(__napi_schedule_irqoff);
  23069. +#endif
  23070. void __napi_complete(struct napi_struct *n)
  23071. {
  23072. @@ -5194,13 +5214,21 @@
  23073. struct softnet_data *sd = this_cpu_ptr(&softnet_data);
  23074. unsigned long time_limit = jiffies + 2;
  23075. int budget = netdev_budget;
  23076. + struct sk_buff_head tofree_q;
  23077. + struct sk_buff *skb;
  23078. LIST_HEAD(list);
  23079. LIST_HEAD(repoll);
  23080. + __skb_queue_head_init(&tofree_q);
  23081. +
  23082. local_irq_disable();
  23083. + skb_queue_splice_init(&sd->tofree_queue, &tofree_q);
  23084. list_splice_init(&sd->poll_list, &list);
  23085. local_irq_enable();
  23086. + while ((skb = __skb_dequeue(&tofree_q)))
  23087. + kfree_skb(skb);
  23088. +
  23089. for (;;) {
  23090. struct napi_struct *n;
  23091. @@ -5231,7 +5259,7 @@
  23092. list_splice_tail(&repoll, &list);
  23093. list_splice(&list, &sd->poll_list);
  23094. if (!list_empty(&sd->poll_list))
  23095. - __raise_softirq_irqoff(NET_RX_SOFTIRQ);
  23096. + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ);
  23097. net_rps_action_and_irq_enable(sd);
  23098. }
  23099. @@ -7989,16 +8017,20 @@
  23100. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  23101. local_irq_enable();
  23102. + preempt_check_resched_rt();
  23103. /* Process offline CPU's input_pkt_queue */
  23104. while ((skb = __skb_dequeue(&oldsd->process_queue))) {
  23105. netif_rx_ni(skb);
  23106. input_queue_head_incr(oldsd);
  23107. }
  23108. - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
  23109. + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
  23110. netif_rx_ni(skb);
  23111. input_queue_head_incr(oldsd);
  23112. }
  23113. + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
  23114. + kfree_skb(skb);
  23115. + }
  23116. return NOTIFY_OK;
  23117. }
  23118. @@ -8300,8 +8332,9 @@
  23119. for_each_possible_cpu(i) {
  23120. struct softnet_data *sd = &per_cpu(softnet_data, i);
  23121. - skb_queue_head_init(&sd->input_pkt_queue);
  23122. - skb_queue_head_init(&sd->process_queue);
  23123. + skb_queue_head_init_raw(&sd->input_pkt_queue);
  23124. + skb_queue_head_init_raw(&sd->process_queue);
  23125. + skb_queue_head_init_raw(&sd->tofree_queue);
  23126. INIT_LIST_HEAD(&sd->poll_list);
  23127. sd->output_queue_tailp = &sd->output_queue;
  23128. #ifdef CONFIG_RPS
  23129. diff -Nur linux-4.8.15.orig/net/core/filter.c linux-4.8.15/net/core/filter.c
  23130. --- linux-4.8.15.orig/net/core/filter.c 2016-12-15 17:50:48.000000000 +0100
  23131. +++ linux-4.8.15/net/core/filter.c 2017-01-01 17:07:16.119432629 +0100
  23132. @@ -1592,7 +1592,7 @@
  23133. {
  23134. int ret;
  23135. - if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) {
  23136. + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) {
  23137. net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
  23138. kfree_skb(skb);
  23139. return -ENETDOWN;
  23140. @@ -1600,9 +1600,9 @@
  23141. skb->dev = dev;
  23142. - __this_cpu_inc(xmit_recursion);
  23143. + xmit_rec_inc();
  23144. ret = dev_queue_xmit(skb);
  23145. - __this_cpu_dec(xmit_recursion);
  23146. + xmit_rec_dec();
  23147. return ret;
  23148. }
  23149. diff -Nur linux-4.8.15.orig/net/core/gen_estimator.c linux-4.8.15/net/core/gen_estimator.c
  23150. --- linux-4.8.15.orig/net/core/gen_estimator.c 2016-12-15 17:50:48.000000000 +0100
  23151. +++ linux-4.8.15/net/core/gen_estimator.c 2017-01-01 17:07:16.119432629 +0100
  23152. @@ -84,7 +84,7 @@
  23153. struct gnet_stats_basic_packed *bstats;
  23154. struct gnet_stats_rate_est64 *rate_est;
  23155. spinlock_t *stats_lock;
  23156. - seqcount_t *running;
  23157. + net_seqlock_t *running;
  23158. int ewma_log;
  23159. u32 last_packets;
  23160. unsigned long avpps;
  23161. @@ -213,7 +213,7 @@
  23162. struct gnet_stats_basic_cpu __percpu *cpu_bstats,
  23163. struct gnet_stats_rate_est64 *rate_est,
  23164. spinlock_t *stats_lock,
  23165. - seqcount_t *running,
  23166. + net_seqlock_t *running,
  23167. struct nlattr *opt)
  23168. {
  23169. struct gen_estimator *est;
  23170. @@ -309,7 +309,7 @@
  23171. struct gnet_stats_basic_cpu __percpu *cpu_bstats,
  23172. struct gnet_stats_rate_est64 *rate_est,
  23173. spinlock_t *stats_lock,
  23174. - seqcount_t *running, struct nlattr *opt)
  23175. + net_seqlock_t *running, struct nlattr *opt)
  23176. {
  23177. gen_kill_estimator(bstats, rate_est);
  23178. return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt);
  23179. diff -Nur linux-4.8.15.orig/net/core/gen_stats.c linux-4.8.15/net/core/gen_stats.c
  23180. --- linux-4.8.15.orig/net/core/gen_stats.c 2016-12-15 17:50:48.000000000 +0100
  23181. +++ linux-4.8.15/net/core/gen_stats.c 2017-01-01 17:07:16.119432629 +0100
  23182. @@ -130,7 +130,7 @@
  23183. }
  23184. void
  23185. -__gnet_stats_copy_basic(const seqcount_t *running,
  23186. +__gnet_stats_copy_basic(net_seqlock_t *running,
  23187. struct gnet_stats_basic_packed *bstats,
  23188. struct gnet_stats_basic_cpu __percpu *cpu,
  23189. struct gnet_stats_basic_packed *b)
  23190. @@ -143,10 +143,10 @@
  23191. }
  23192. do {
  23193. if (running)
  23194. - seq = read_seqcount_begin(running);
  23195. + seq = net_seq_begin(running);
  23196. bstats->bytes = b->bytes;
  23197. bstats->packets = b->packets;
  23198. - } while (running && read_seqcount_retry(running, seq));
  23199. + } while (running && net_seq_retry(running, seq));
  23200. }
  23201. EXPORT_SYMBOL(__gnet_stats_copy_basic);
  23202. @@ -164,7 +164,7 @@
  23203. * if the room in the socket buffer was not sufficient.
  23204. */
  23205. int
  23206. -gnet_stats_copy_basic(const seqcount_t *running,
  23207. +gnet_stats_copy_basic(net_seqlock_t *running,
  23208. struct gnet_dump *d,
  23209. struct gnet_stats_basic_cpu __percpu *cpu,
  23210. struct gnet_stats_basic_packed *b)
  23211. diff -Nur linux-4.8.15.orig/net/core/skbuff.c linux-4.8.15/net/core/skbuff.c
  23212. --- linux-4.8.15.orig/net/core/skbuff.c 2016-12-15 17:50:48.000000000 +0100
  23213. +++ linux-4.8.15/net/core/skbuff.c 2017-01-01 17:07:16.123432884 +0100
  23214. @@ -64,6 +64,7 @@
  23215. #include <linux/errqueue.h>
  23216. #include <linux/prefetch.h>
  23217. #include <linux/if_vlan.h>
  23218. +#include <linux/locallock.h>
  23219. #include <net/protocol.h>
  23220. #include <net/dst.h>
  23221. @@ -360,6 +361,8 @@
  23222. static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
  23223. static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
  23224. +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock);
  23225. +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock);
  23226. static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  23227. {
  23228. @@ -367,10 +370,10 @@
  23229. unsigned long flags;
  23230. void *data;
  23231. - local_irq_save(flags);
  23232. + local_lock_irqsave(netdev_alloc_lock, flags);
  23233. nc = this_cpu_ptr(&netdev_alloc_cache);
  23234. data = __alloc_page_frag(nc, fragsz, gfp_mask);
  23235. - local_irq_restore(flags);
  23236. + local_unlock_irqrestore(netdev_alloc_lock, flags);
  23237. return data;
  23238. }
  23239. @@ -389,9 +392,13 @@
  23240. static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  23241. {
  23242. - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  23243. + struct napi_alloc_cache *nc;
  23244. + void *data;
  23245. - return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
  23246. + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  23247. + data = __alloc_page_frag(&nc->page, fragsz, gfp_mask);
  23248. + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  23249. + return data;
  23250. }
  23251. void *napi_alloc_frag(unsigned int fragsz)
  23252. @@ -438,13 +445,13 @@
  23253. if (sk_memalloc_socks())
  23254. gfp_mask |= __GFP_MEMALLOC;
  23255. - local_irq_save(flags);
  23256. + local_lock_irqsave(netdev_alloc_lock, flags);
  23257. nc = this_cpu_ptr(&netdev_alloc_cache);
  23258. data = __alloc_page_frag(nc, len, gfp_mask);
  23259. pfmemalloc = nc->pfmemalloc;
  23260. - local_irq_restore(flags);
  23261. + local_unlock_irqrestore(netdev_alloc_lock, flags);
  23262. if (unlikely(!data))
  23263. return NULL;
  23264. @@ -485,9 +492,10 @@
  23265. struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
  23266. gfp_t gfp_mask)
  23267. {
  23268. - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  23269. + struct napi_alloc_cache *nc;
  23270. struct sk_buff *skb;
  23271. void *data;
  23272. + bool pfmemalloc;
  23273. len += NET_SKB_PAD + NET_IP_ALIGN;
  23274. @@ -505,7 +513,10 @@
  23275. if (sk_memalloc_socks())
  23276. gfp_mask |= __GFP_MEMALLOC;
  23277. + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  23278. data = __alloc_page_frag(&nc->page, len, gfp_mask);
  23279. + pfmemalloc = nc->page.pfmemalloc;
  23280. + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  23281. if (unlikely(!data))
  23282. return NULL;
  23283. @@ -516,7 +527,7 @@
  23284. }
  23285. /* use OR instead of assignment to avoid clearing of bits in mask */
  23286. - if (nc->page.pfmemalloc)
  23287. + if (pfmemalloc)
  23288. skb->pfmemalloc = 1;
  23289. skb->head_frag = 1;
  23290. @@ -760,23 +771,26 @@
  23291. void __kfree_skb_flush(void)
  23292. {
  23293. - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  23294. + struct napi_alloc_cache *nc;
  23295. + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  23296. /* flush skb_cache if containing objects */
  23297. if (nc->skb_count) {
  23298. kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
  23299. nc->skb_cache);
  23300. nc->skb_count = 0;
  23301. }
  23302. + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  23303. }
  23304. static inline void _kfree_skb_defer(struct sk_buff *skb)
  23305. {
  23306. - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  23307. + struct napi_alloc_cache *nc;
  23308. /* drop skb->head and call any destructors for packet */
  23309. skb_release_all(skb);
  23310. + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  23311. /* record skb to CPU local list */
  23312. nc->skb_cache[nc->skb_count++] = skb;
  23313. @@ -791,6 +805,7 @@
  23314. nc->skb_cache);
  23315. nc->skb_count = 0;
  23316. }
  23317. + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  23318. }
  23319. void __kfree_skb_defer(struct sk_buff *skb)
  23320. {
  23321. diff -Nur linux-4.8.15.orig/net/core/sock.c linux-4.8.15/net/core/sock.c
  23322. --- linux-4.8.15.orig/net/core/sock.c 2016-12-15 17:50:48.000000000 +0100
  23323. +++ linux-4.8.15/net/core/sock.c 2017-01-01 17:07:16.123432884 +0100
  23324. @@ -2510,12 +2510,11 @@
  23325. if (sk->sk_lock.owned)
  23326. __lock_sock(sk);
  23327. sk->sk_lock.owned = 1;
  23328. - spin_unlock(&sk->sk_lock.slock);
  23329. + spin_unlock_bh(&sk->sk_lock.slock);
  23330. /*
  23331. * The sk_lock has mutex_lock() semantics here:
  23332. */
  23333. mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
  23334. - local_bh_enable();
  23335. }
  23336. EXPORT_SYMBOL(lock_sock_nested);
  23337. diff -Nur linux-4.8.15.orig/net/ipv4/icmp.c linux-4.8.15/net/ipv4/icmp.c
  23338. --- linux-4.8.15.orig/net/ipv4/icmp.c 2016-12-15 17:50:48.000000000 +0100
  23339. +++ linux-4.8.15/net/ipv4/icmp.c 2017-01-01 17:07:16.123432884 +0100
  23340. @@ -69,6 +69,7 @@
  23341. #include <linux/jiffies.h>
  23342. #include <linux/kernel.h>
  23343. #include <linux/fcntl.h>
  23344. +#include <linux/sysrq.h>
  23345. #include <linux/socket.h>
  23346. #include <linux/in.h>
  23347. #include <linux/inet.h>
  23348. @@ -77,6 +78,7 @@
  23349. #include <linux/string.h>
  23350. #include <linux/netfilter_ipv4.h>
  23351. #include <linux/slab.h>
  23352. +#include <linux/locallock.h>
  23353. #include <net/snmp.h>
  23354. #include <net/ip.h>
  23355. #include <net/route.h>
  23356. @@ -204,6 +206,8 @@
  23357. *
  23358. * On SMP we have one ICMP socket per-cpu.
  23359. */
  23360. +static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock);
  23361. +
  23362. static struct sock *icmp_sk(struct net *net)
  23363. {
  23364. return *this_cpu_ptr(net->ipv4.icmp_sk);
  23365. @@ -215,12 +219,14 @@
  23366. local_bh_disable();
  23367. + local_lock(icmp_sk_lock);
  23368. sk = icmp_sk(net);
  23369. if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
  23370. /* This can happen if the output path signals a
  23371. * dst_link_failure() for an outgoing ICMP packet.
  23372. */
  23373. + local_unlock(icmp_sk_lock);
  23374. local_bh_enable();
  23375. return NULL;
  23376. }
  23377. @@ -230,6 +236,7 @@
  23378. static inline void icmp_xmit_unlock(struct sock *sk)
  23379. {
  23380. spin_unlock_bh(&sk->sk_lock.slock);
  23381. + local_unlock(icmp_sk_lock);
  23382. }
  23383. int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
  23384. @@ -358,6 +365,7 @@
  23385. struct sock *sk;
  23386. struct sk_buff *skb;
  23387. + local_lock(icmp_sk_lock);
  23388. sk = icmp_sk(dev_net((*rt)->dst.dev));
  23389. if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
  23390. icmp_param->data_len+icmp_param->head_len,
  23391. @@ -380,6 +388,7 @@
  23392. skb->ip_summed = CHECKSUM_NONE;
  23393. ip_push_pending_frames(sk, fl4);
  23394. }
  23395. + local_unlock(icmp_sk_lock);
  23396. }
  23397. /*
  23398. @@ -891,6 +900,30 @@
  23399. }
  23400. /*
  23401. + * 32bit and 64bit have different timestamp length, so we check for
  23402. + * the cookie at offset 20 and verify it is repeated at offset 50
  23403. + */
  23404. +#define CO_POS0 20
  23405. +#define CO_POS1 50
  23406. +#define CO_SIZE sizeof(int)
  23407. +#define ICMP_SYSRQ_SIZE 57
  23408. +
  23409. +/*
  23410. + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
  23411. + * pattern and if it matches send the next byte as a trigger to sysrq.
  23412. + */
  23413. +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb)
  23414. +{
  23415. + int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
  23416. + char *p = skb->data;
  23417. +
  23418. + if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
  23419. + !memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
  23420. + p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
  23421. + handle_sysrq(p[CO_POS0 + CO_SIZE]);
  23422. +}
  23423. +
  23424. +/*
  23425. * Handle ICMP_ECHO ("ping") requests.
  23426. *
  23427. * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
  23428. @@ -917,6 +950,11 @@
  23429. icmp_param.data_len = skb->len;
  23430. icmp_param.head_len = sizeof(struct icmphdr);
  23431. icmp_reply(&icmp_param, skb);
  23432. +
  23433. + if (skb->len == ICMP_SYSRQ_SIZE &&
  23434. + net->ipv4.sysctl_icmp_echo_sysrq) {
  23435. + icmp_check_sysrq(net, skb);
  23436. + }
  23437. }
  23438. /* should there be an ICMP stat for ignored echos? */
  23439. return true;
  23440. diff -Nur linux-4.8.15.orig/net/ipv4/sysctl_net_ipv4.c linux-4.8.15/net/ipv4/sysctl_net_ipv4.c
  23441. --- linux-4.8.15.orig/net/ipv4/sysctl_net_ipv4.c 2016-12-15 17:50:48.000000000 +0100
  23442. +++ linux-4.8.15/net/ipv4/sysctl_net_ipv4.c 2017-01-01 17:07:16.123432884 +0100
  23443. @@ -681,6 +681,13 @@
  23444. .proc_handler = proc_dointvec
  23445. },
  23446. {
  23447. + .procname = "icmp_echo_sysrq",
  23448. + .data = &init_net.ipv4.sysctl_icmp_echo_sysrq,
  23449. + .maxlen = sizeof(int),
  23450. + .mode = 0644,
  23451. + .proc_handler = proc_dointvec
  23452. + },
  23453. + {
  23454. .procname = "icmp_ignore_bogus_error_responses",
  23455. .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
  23456. .maxlen = sizeof(int),
  23457. diff -Nur linux-4.8.15.orig/net/ipv4/tcp_ipv4.c linux-4.8.15/net/ipv4/tcp_ipv4.c
  23458. --- linux-4.8.15.orig/net/ipv4/tcp_ipv4.c 2016-12-15 17:50:48.000000000 +0100
  23459. +++ linux-4.8.15/net/ipv4/tcp_ipv4.c 2017-01-01 17:07:16.127433142 +0100
  23460. @@ -62,6 +62,7 @@
  23461. #include <linux/init.h>
  23462. #include <linux/times.h>
  23463. #include <linux/slab.h>
  23464. +#include <linux/locallock.h>
  23465. #include <net/net_namespace.h>
  23466. #include <net/icmp.h>
  23467. @@ -565,6 +566,7 @@
  23468. }
  23469. EXPORT_SYMBOL(tcp_v4_send_check);
  23470. +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock);
  23471. /*
  23472. * This routine will send an RST to the other tcp.
  23473. *
  23474. @@ -692,6 +694,8 @@
  23475. offsetof(struct inet_timewait_sock, tw_bound_dev_if));
  23476. arg.tos = ip_hdr(skb)->tos;
  23477. +
  23478. + local_lock(tcp_sk_lock);
  23479. local_bh_disable();
  23480. ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
  23481. skb, &TCP_SKB_CB(skb)->header.h4.opt,
  23482. @@ -701,6 +705,7 @@
  23483. __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
  23484. __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
  23485. local_bh_enable();
  23486. + local_unlock(tcp_sk_lock);
  23487. #ifdef CONFIG_TCP_MD5SIG
  23488. out:
  23489. @@ -776,6 +781,7 @@
  23490. if (oif)
  23491. arg.bound_dev_if = oif;
  23492. arg.tos = tos;
  23493. + local_lock(tcp_sk_lock);
  23494. local_bh_disable();
  23495. ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
  23496. skb, &TCP_SKB_CB(skb)->header.h4.opt,
  23497. @@ -784,6 +790,7 @@
  23498. __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
  23499. local_bh_enable();
  23500. + local_unlock(tcp_sk_lock);
  23501. }
  23502. static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
  23503. diff -Nur linux-4.8.15.orig/net/mac80211/rx.c linux-4.8.15/net/mac80211/rx.c
  23504. --- linux-4.8.15.orig/net/mac80211/rx.c 2016-12-15 17:50:48.000000000 +0100
  23505. +++ linux-4.8.15/net/mac80211/rx.c 2017-01-01 17:07:16.127433142 +0100
  23506. @@ -4070,7 +4070,7 @@
  23507. struct ieee80211_supported_band *sband;
  23508. struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
  23509. - WARN_ON_ONCE(softirq_count() == 0);
  23510. + WARN_ON_ONCE_NONRT(softirq_count() == 0);
  23511. if (WARN_ON(status->band >= NUM_NL80211_BANDS))
  23512. goto drop;
  23513. diff -Nur linux-4.8.15.orig/net/netfilter/core.c linux-4.8.15/net/netfilter/core.c
  23514. --- linux-4.8.15.orig/net/netfilter/core.c 2016-12-15 17:50:48.000000000 +0100
  23515. +++ linux-4.8.15/net/netfilter/core.c 2017-01-01 17:07:16.127433142 +0100
  23516. @@ -22,11 +22,17 @@
  23517. #include <linux/proc_fs.h>
  23518. #include <linux/mutex.h>
  23519. #include <linux/slab.h>
  23520. +#include <linux/locallock.h>
  23521. #include <net/net_namespace.h>
  23522. #include <net/sock.h>
  23523. #include "nf_internals.h"
  23524. +#ifdef CONFIG_PREEMPT_RT_BASE
  23525. +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
  23526. +EXPORT_PER_CPU_SYMBOL(xt_write_lock);
  23527. +#endif
  23528. +
  23529. static DEFINE_MUTEX(afinfo_mutex);
  23530. const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
  23531. diff -Nur linux-4.8.15.orig/net/packet/af_packet.c linux-4.8.15/net/packet/af_packet.c
  23532. --- linux-4.8.15.orig/net/packet/af_packet.c 2016-12-15 17:50:48.000000000 +0100
  23533. +++ linux-4.8.15/net/packet/af_packet.c 2017-01-01 17:07:16.131433400 +0100
  23534. @@ -63,6 +63,7 @@
  23535. #include <linux/if_packet.h>
  23536. #include <linux/wireless.h>
  23537. #include <linux/kernel.h>
  23538. +#include <linux/delay.h>
  23539. #include <linux/kmod.h>
  23540. #include <linux/slab.h>
  23541. #include <linux/vmalloc.h>
  23542. @@ -694,7 +695,7 @@
  23543. if (BLOCK_NUM_PKTS(pbd)) {
  23544. while (atomic_read(&pkc->blk_fill_in_prog)) {
  23545. /* Waiting for skb_copy_bits to finish... */
  23546. - cpu_relax();
  23547. + cpu_chill();
  23548. }
  23549. }
  23550. @@ -956,7 +957,7 @@
  23551. if (!(status & TP_STATUS_BLK_TMO)) {
  23552. while (atomic_read(&pkc->blk_fill_in_prog)) {
  23553. /* Waiting for skb_copy_bits to finish... */
  23554. - cpu_relax();
  23555. + cpu_chill();
  23556. }
  23557. }
  23558. prb_close_block(pkc, pbd, po, status);
  23559. diff -Nur linux-4.8.15.orig/net/rds/ib_rdma.c linux-4.8.15/net/rds/ib_rdma.c
  23560. --- linux-4.8.15.orig/net/rds/ib_rdma.c 2016-12-15 17:50:48.000000000 +0100
  23561. +++ linux-4.8.15/net/rds/ib_rdma.c 2017-01-01 17:07:16.131433400 +0100
  23562. @@ -34,6 +34,7 @@
  23563. #include <linux/slab.h>
  23564. #include <linux/rculist.h>
  23565. #include <linux/llist.h>
  23566. +#include <linux/delay.h>
  23567. #include "rds_single_path.h"
  23568. #include "ib_mr.h"
  23569. @@ -210,7 +211,7 @@
  23570. for_each_online_cpu(cpu) {
  23571. flag = &per_cpu(clean_list_grace, cpu);
  23572. while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
  23573. - cpu_relax();
  23574. + cpu_chill();
  23575. }
  23576. }
  23577. diff -Nur linux-4.8.15.orig/net/rxrpc/security.c linux-4.8.15/net/rxrpc/security.c
  23578. --- linux-4.8.15.orig/net/rxrpc/security.c 2016-12-15 17:50:48.000000000 +0100
  23579. +++ linux-4.8.15/net/rxrpc/security.c 2017-01-01 17:07:16.131433400 +0100
  23580. @@ -19,9 +19,6 @@
  23581. #include <keys/rxrpc-type.h>
  23582. #include "ar-internal.h"
  23583. -static LIST_HEAD(rxrpc_security_methods);
  23584. -static DECLARE_RWSEM(rxrpc_security_sem);
  23585. -
  23586. static const struct rxrpc_security *rxrpc_security_types[] = {
  23587. [RXRPC_SECURITY_NONE] = &rxrpc_no_security,
  23588. #ifdef CONFIG_RXKAD
  23589. diff -Nur linux-4.8.15.orig/net/sched/sch_api.c linux-4.8.15/net/sched/sch_api.c
  23590. --- linux-4.8.15.orig/net/sched/sch_api.c 2016-12-15 17:50:48.000000000 +0100
  23591. +++ linux-4.8.15/net/sched/sch_api.c 2017-01-01 17:07:16.131433400 +0100
  23592. @@ -975,7 +975,7 @@
  23593. rcu_assign_pointer(sch->stab, stab);
  23594. }
  23595. if (tca[TCA_RATE]) {
  23596. - seqcount_t *running;
  23597. + net_seqlock_t *running;
  23598. err = -EOPNOTSUPP;
  23599. if (sch->flags & TCQ_F_MQROOT)
  23600. diff -Nur linux-4.8.15.orig/net/sched/sch_generic.c linux-4.8.15/net/sched/sch_generic.c
  23601. --- linux-4.8.15.orig/net/sched/sch_generic.c 2016-12-15 17:50:48.000000000 +0100
  23602. +++ linux-4.8.15/net/sched/sch_generic.c 2017-01-01 17:07:16.131433400 +0100
  23603. @@ -426,7 +426,11 @@
  23604. .list = LIST_HEAD_INIT(noop_qdisc.list),
  23605. .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
  23606. .dev_queue = &noop_netdev_queue,
  23607. +#ifdef CONFIG_PREEMPT_RT_BASE
  23608. + .running = __SEQLOCK_UNLOCKED(noop_qdisc.running),
  23609. +#else
  23610. .running = SEQCNT_ZERO(noop_qdisc.running),
  23611. +#endif
  23612. .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
  23613. };
  23614. EXPORT_SYMBOL(noop_qdisc);
  23615. @@ -620,9 +624,17 @@
  23616. lockdep_set_class(&sch->busylock,
  23617. dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
  23618. +#ifdef CONFIG_PREEMPT_RT_BASE
  23619. + seqlock_init(&sch->running);
  23620. + lockdep_set_class(&sch->running.seqcount,
  23621. + dev->qdisc_running_key ?: &qdisc_running_key);
  23622. + lockdep_set_class(&sch->running.lock,
  23623. + dev->qdisc_running_key ?: &qdisc_running_key);
  23624. +#else
  23625. seqcount_init(&sch->running);
  23626. lockdep_set_class(&sch->running,
  23627. dev->qdisc_running_key ?: &qdisc_running_key);
  23628. +#endif
  23629. sch->ops = ops;
  23630. sch->enqueue = ops->enqueue;
  23631. @@ -917,7 +929,7 @@
  23632. /* Wait for outstanding qdisc_run calls. */
  23633. list_for_each_entry(dev, head, close_list)
  23634. while (some_qdisc_is_busy(dev))
  23635. - yield();
  23636. + msleep(1);
  23637. }
  23638. void dev_deactivate(struct net_device *dev)
  23639. diff -Nur linux-4.8.15.orig/net/sunrpc/svc_xprt.c linux-4.8.15/net/sunrpc/svc_xprt.c
  23640. --- linux-4.8.15.orig/net/sunrpc/svc_xprt.c 2016-12-15 17:50:48.000000000 +0100
  23641. +++ linux-4.8.15/net/sunrpc/svc_xprt.c 2017-01-01 17:07:16.131433400 +0100
  23642. @@ -396,7 +396,7 @@
  23643. goto out;
  23644. }
  23645. - cpu = get_cpu();
  23646. + cpu = get_cpu_light();
  23647. pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
  23648. atomic_long_inc(&pool->sp_stats.packets);
  23649. @@ -432,7 +432,7 @@
  23650. atomic_long_inc(&pool->sp_stats.threads_woken);
  23651. wake_up_process(rqstp->rq_task);
  23652. - put_cpu();
  23653. + put_cpu_light();
  23654. goto out;
  23655. }
  23656. rcu_read_unlock();
  23657. @@ -453,7 +453,7 @@
  23658. goto redo_search;
  23659. }
  23660. rqstp = NULL;
  23661. - put_cpu();
  23662. + put_cpu_light();
  23663. out:
  23664. trace_svc_xprt_do_enqueue(xprt, rqstp);
  23665. }
  23666. diff -Nur linux-4.8.15.orig/scripts/mkcompile_h linux-4.8.15/scripts/mkcompile_h
  23667. --- linux-4.8.15.orig/scripts/mkcompile_h 2016-12-15 17:50:48.000000000 +0100
  23668. +++ linux-4.8.15/scripts/mkcompile_h 2017-01-01 17:07:16.131433400 +0100
  23669. @@ -4,7 +4,8 @@
  23670. ARCH=$2
  23671. SMP=$3
  23672. PREEMPT=$4
  23673. -CC=$5
  23674. +RT=$5
  23675. +CC=$6
  23676. vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
  23677. @@ -57,6 +58,7 @@
  23678. CONFIG_FLAGS=""
  23679. if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
  23680. if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
  23681. +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi
  23682. UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
  23683. # Truncate to maximum length
  23684. diff -Nur linux-4.8.15.orig/sound/core/pcm_native.c linux-4.8.15/sound/core/pcm_native.c
  23685. --- linux-4.8.15.orig/sound/core/pcm_native.c 2016-12-15 17:50:48.000000000 +0100
  23686. +++ linux-4.8.15/sound/core/pcm_native.c 2017-01-01 17:07:16.135433664 +0100
  23687. @@ -135,7 +135,7 @@
  23688. void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream)
  23689. {
  23690. if (!substream->pcm->nonatomic)
  23691. - local_irq_disable();
  23692. + local_irq_disable_nort();
  23693. snd_pcm_stream_lock(substream);
  23694. }
  23695. EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq);
  23696. @@ -150,7 +150,7 @@
  23697. {
  23698. snd_pcm_stream_unlock(substream);
  23699. if (!substream->pcm->nonatomic)
  23700. - local_irq_enable();
  23701. + local_irq_enable_nort();
  23702. }
  23703. EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq);
  23704. @@ -158,7 +158,7 @@
  23705. {
  23706. unsigned long flags = 0;
  23707. if (!substream->pcm->nonatomic)
  23708. - local_irq_save(flags);
  23709. + local_irq_save_nort(flags);
  23710. snd_pcm_stream_lock(substream);
  23711. return flags;
  23712. }
  23713. @@ -176,7 +176,7 @@
  23714. {
  23715. snd_pcm_stream_unlock(substream);
  23716. if (!substream->pcm->nonatomic)
  23717. - local_irq_restore(flags);
  23718. + local_irq_restore_nort(flags);
  23719. }
  23720. EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore);